In [1]:
import requests

In [2]:
import pandas as pd

In [3]:
key = 'QsLigX4'

url = 'https://strainapi.evanbusse.com/{}/strains/search/all'.format(key)

r = requests.request('GET', url=url)

In [4]:
r

<Response [200]>

## Get the Strain API data and clean it

In [92]:
json = r.json()

print('Creating pandas dataframe from API response json...')
df = pd.DataFrame.from_dict(json).T.reset_index()
df = df.rename(columns={'index':'name'})
print('Dataframe Created, shape: {}'.format(df.shape))
df.head(3)

Creating pandas dataframe from API response json...
Dataframe Created, shape: (1970, 5)


Unnamed: 0,name,effects,flavors,id,race
0,Afpak,"{'positive': ['Relaxed', 'Hungry', 'Happy', 'S...","[Earthy, Chemical, Pine]",1,hybrid
1,African,"{'positive': ['Euphoric', 'Happy', 'Creative',...","[Spicy/Herbal, Pungent, Earthy]",2,sativa
2,Afternoon Delight,"{'positive': ['Relaxed', 'Hungry', 'Euphoric',...","[Pepper, Flowery, Pine]",3,hybrid


In [93]:
print('Initializing Effect lists...')
positive = []
negative = []
medical = []
print('Lists initialized:')
s = 'Positive shape: {}\tNegative Shape: {}\tMedical Shape: {}'
print(s.format(len(positive), len(negative), len(medical)))

print('Generating individual effect lists...')
for index, row in df.iterrows():
    positive.append(row['effects']['positive'])
    negative.append(row['effects']['negative'])
    medical.append(row['effects']['medical'])

print('Lists populated:')
print(s.format(len(positive), len(negative), len(medical)))
    
print('Setting columns in dataframe to effect lists...')
df['positive'] = positive
df['negative'] = negative
df['medical'] = medical

print('Columns created:')
df.head(3)

Initializing Effect lists...
Lists initialized:
Positive shape: 0	Negative Shape: 0	Medical Shape: 0
Generating individual effect lists...
Lists populated:
Positive shape: 1970	Negative Shape: 1970	Medical Shape: 1970
Setting columns in dataframe to effect lists...
Columns created:


Unnamed: 0,name,effects,flavors,id,race,positive,negative,medical
0,Afpak,"{'positive': ['Relaxed', 'Hungry', 'Happy', 'S...","[Earthy, Chemical, Pine]",1,hybrid,"[Relaxed, Hungry, Happy, Sleepy]",[Dizzy],"[Depression, Insomnia, Pain, Stress, Lack of A..."
1,African,"{'positive': ['Euphoric', 'Happy', 'Creative',...","[Spicy/Herbal, Pungent, Earthy]",2,sativa,"[Euphoric, Happy, Creative, Energetic, Talkative]",[Dry Mouth],"[Depression, Pain, Stress, Lack of Appetite, N..."
2,Afternoon Delight,"{'positive': ['Relaxed', 'Hungry', 'Euphoric',...","[Pepper, Flowery, Pine]",3,hybrid,"[Relaxed, Hungry, Euphoric, Uplifted, Tingly]","[Dizzy, Dry Mouth, Paranoid]","[Depression, Insomnia, Pain, Stress, Cramps, H..."


In [94]:
print('Removing redundant "effects" column')
df = df.drop('effects', axis=1)
print('Column removed:')
df.head(1)

Removing redundant "effects" column
Column removed:


Unnamed: 0,name,flavors,id,race,positive,negative,medical
0,Afpak,"[Earthy, Chemical, Pine]",1,hybrid,"[Relaxed, Hungry, Happy, Sleepy]",[Dizzy],"[Depression, Insomnia, Pain, Stress, Lack of A..."


In [95]:
df_strain = df.copy()

## Get Kaggle Dataset
#### Was saved locally and put into same folder as this notebook

In [179]:
try:
    df_kag = pd.read_csv('./cannabis-strains/cannabis.csv')
except Exception as e:
    print(e)
    print('The file was unable to be found, make sure to download it from here:')
    print('https://www.kaggle.com/kingburrito666/cannabis-strains/download')
    print('Then extract the folder called "cannabis-strains" to the folder containing this notebook')

In [180]:
df_kag.head(3)

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...


In [98]:
#Rename columns to have the same names as in the Strain API dataframe
df_kag = df_kag.rename(columns={'Strain':'name', 'Type':'race'})

#Clean the names so they're in the same format as the name in the Strain API so they can be merged on
df_kag['name'] = df_kag['name'].apply(lambda x: x.strip().replace('-',' '))

### Merge the two datasets based on their name

In [168]:
merged = df_strain.merge(df_kag, how='inner', on=['name', 'race'])

print(merged.shape)
merged.head(3)

### Clean and combine columns in the two datasets to form more complete columns
#### For example, we can combine all the flavors in the two columns for a more complete list of descriptors for the flavors

In [169]:
#The flavors in the kaggle dataset can have any new flavors added to the list from the strain api
for index, row in merged.iterrows():
    for flav in row['Flavor'].split(','):
        if flav not in row['flavors']:
            row['flavors'].append(flav)

merged = merged.drop('Flavor', axis=1)


#The kaggle dataset only lists positive effects, so we can append any effects that are new to the list of positive effects
for index, row in merged.iterrows():
    for effect in row['Effects'].split(','):
        if effect not in row['positive']:
            row['positive'].append(effect)
            
merged = merged.drop('Effects', axis=1)

merged = merged.set_index('id')

## Final Dataset

In [181]:
print('Final Merged Dataset Shape: {}'.format(merged.shape))
merged.head()

Final Merged Dataset Shape: (1495, 8)


Unnamed: 0_level_0,name,flavors,race,positive,negative,medical,Rating,Description
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Afpak,"[Earthy, Chemical, Pine, Spicy/Herbal]",hybrid,"[Relaxed, Hungry, Happy, Sleepy, Creative, Foc...",[Dizzy],"[Depression, Insomnia, Pain, Stress, Lack of A...",4.2,"Afpak, named for its direct Afghani and Pakist..."
2,African,"[Spicy/Herbal, Pungent, Earthy, Pepper]",sativa,"[Euphoric, Happy, Creative, Energetic, Talkati...",[Dry Mouth],"[Depression, Pain, Stress, Lack of Appetite, N...",3.9,African refers to the indigenous varieties of ...
3,Afternoon Delight,"[Pepper, Flowery, Pine, Pungent, Citrus, Tropi...",hybrid,"[Relaxed, Hungry, Euphoric, Uplifted, Tingly, ...","[Dizzy, Dry Mouth, Paranoid]","[Depression, Insomnia, Pain, Stress, Cramps, H...",4.8,"Afternoon Delight, created by Colorado Seed In..."
4,Afwreck,"[Pine, Earthy, Flowery, Pungent]",hybrid,"[Relaxed, Happy, Creative, Uplifted, Sleepy, E...","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Pain, Stress, Headache, Fatigue, Headaches, M...",4.2,Afwreck is a hybrid cross of Afghani and Train...
5,Agent Orange,"[Citrus, Orange, Sweet, Earthy]",hybrid,"[Relaxed, Euphoric, Happy, Energetic, Uplifted]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Pain, Stress, Nausea, Headache, H...",4.2,Don’t let the name scare you! The only herbici...
