In [1]:
import numpy as np
import pandas as pd
import seaborn as sn 

In [2]:
df = pd.read_csv('Cannabis_Strains_Features.csv')

## Initial Data Exploration

In [3]:
df.head()

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."


In [4]:
df.shape

(2351, 6)

## Flavor

In [5]:
df_flavor = df.Flavor.str.split('[,]').apply(pd.Series)
df_flavor = df_flavor.rename(columns={0: 'flavor_1', 1: 'flavor_2', 2: 'flavor_3', 3: 'flavor_4'})
df = pd.concat([df, df_flavor], axis=1)

#Replaced flavor names
df['flavor_1'] = df['flavor_1'].replace(['Earthy\n', 'Diesel\n', 'Bubblegum\n', 'Vanilla\n', 'Minty', 'Grapes'],
                                        ['Earthy', 'Diesel', 'Bubblegum', 'Vanilla', 'Mint', 'Grape'])
df['flavor_2'] = df['flavor_2'].replace(['Minty', 'Citrus\n', 'Earthy\n', 'Berry\n', ' Pungent', ' Sweet', 'Sweet\n', ' Berry', 'Bluberry', 
                                        'Fruit'],
                                        ['Mint', 'Citrus','Earthy', 'Berry', 'Pungent', 'Sweet', 'Sweet', 'Berry', 'Blueberry', 'Fruity'])
df['flavor_3'] = df['flavor_3'].replace(['Minty', 'Earthy\n', ' Spicy/Herbal', 'Sweet\n', ' Earthy', 'Citrus\n', 'Fruit'],
                                        ['Mint', 'Earthy', 'Spicy/Herbal', 'Sweet', 'Earthy', 'Citrus', 'Fruity'])
df['flavor_4'] = df['flavor_4'].replace(['Fruit'],
                                       ['Fruity'])

#Dropping Flavor columns because Ive already created dummy variables for flavors
df.drop('Flavor', axis=1, inplace=True)

#Creating dummy variables of the flavor category
df = pd.get_dummies(data=df, columns= ['flavor_1', 'flavor_2', 'flavor_3', 'flavor_3', 'flavor_4'])

In [6]:
df.head()

Unnamed: 0,Strain,Type,Rating,Effects,Description,flavor_1_Ammonia,flavor_1_Apple,flavor_1_Apricot,flavor_1_Berry,flavor_1_Blue,...,flavor_4_Flowery,flavor_4_Fruity,flavor_4_Grape,flavor_4_Lemon,flavor_4_Orange,flavor_4_Pine,flavor_4_Pineapple,flavor_4_Pungent,flavor_4_Spicy/Herbal,flavor_4_Sweet
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed",$100 OG is a 50/50 hybrid strain that packs a ...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic",The ‘98 Aloha White Widow is an especially pot...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative",1024 is a sativa-dominant hybrid bred in Spain...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted",13 Dawgs is a hybrid of G13 and Chemdawg genet...,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Also known as Kosher Tangie, 24k Gold is a 60%...",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
df.shape

(2351, 212)

In [8]:
df.flavor_1_Ammonia.drop_duplicates()

0      0
124    1
Name: flavor_1_Ammonia, dtype: uint8

In [9]:
df.flavor_2_Ammonia.drop_duplicates()

0     0
16    1
Name: flavor_2_Ammonia, dtype: uint8

In [10]:
Ammonia_1 = df['flavor_1_Ammonia']
Ammonia_2 = df['flavor_2_Ammonia']
Ammonia_3 = df['flavor_2_Ammonia']

In [11]:
Ammonia_1 = Ammonia_1.astype(int)
Ammonia_2 = Ammonia_2.astype(int)
Ammonia_3 = Ammonia_3.astype(int)

In [12]:
Ammonia_1.dtype

dtype('int64')

In [13]:
Ammonia_2.dtypes

dtype('int64')

In [16]:
df['flavor_Ammonia'] = Ammonia_1.combine(Ammonia_2, max)

df['flavor_Ammonia'].drop_duplicates()

0     0
16    1
Name: flavor_Ammonia, dtype: int64

In [15]:
df.flavor_Ammonia.drop_duplicates()

0     0
16    1
Name: flavor_Ammonia, dtype: int64

In [69]:
df['flavor_1_Ammonia'].drop_duplicates()

0      0
124    1
Name: flavor_1_Ammonia, dtype: uint8

In [47]:
print(df['flavor_2_Ammonia'])

0       0
1       0
2       0
3       0
4       0
       ..
2346    0
2347    0
2348    0
2349    0
2350    0
Name: flavor_2_Ammonia, Length: 2351, dtype: uint8


In [16]:
df.flavor_1_Ammonia.drop_duplicates()

0      0
124    1
Name: flavor_1_Ammonia, dtype: uint8

In [23]:
df.flavor_2_Ammonia.drop_duplicates()

0     0
16    1
Name: flavor_2_Ammonia, dtype: uint8

In [39]:
df.flavor_3_Ammonia.drop_duplicates()

Unnamed: 0,flavor_3_Ammonia,flavor_3_Ammonia.1
0,0,0
390,1,1


In [8]:
# 'flavor_1_Ammonia', 'flavor_2_Ammonia', 'flavor_3_Ammonia'
# 'flavor_1_Apple', 'flavor_2_Apple', 'flavor_3_Apple'
# 'flavor_1_Apricot', flavor_2_Apricot', 'flavor_3_Apricot', 'flavor_4_Apricot'
# 'flavor_1_Berry', flavor_2_Berry', 'flavor_3_Berry', 'flavor_4_Berry',
# 'flavor_1_Blue', 'flavor_2_Blue', 'flavor_3_Blue'
# 'flavor_1_Blueberry','flavor_2_Blueberry', 'flavor_3_Blueberry', 'flavor_4_Blueberry'
# 'flavor_1_Bubblegum', 
# 'flavor_1_Butter', 'flavor_2_Butter', 'flavor_3_Butter'
# 'flavor_1_Cheese', 'flavor_2_Cheese', 'flavor_3_Cheese', 'flavor_4_Cheese'
# 'flavor_1_Chemical', 'flavor_2_Chemical', 'flavor_3_Chemical', 'flavor_4_Chemical'
# 'flavor_2_Chestnut', 'flavor_3_Chestnut'
# 'flavor_1_Citrus', 'flavor_2_Citrus', 'flavor_3_Citrus'
# 'flavor_1_Coffee', 'flavor_2_Coffee', 'flavor_3_Coffee'
# 'flavor_1_Diesel', 'flavor_2_Diesel', 'flavor_3_Diesel'
# 'flavor_1_Earthy', 'flavor_2_Earthy', 'flavor_3_Earthy', 'flavor_4_Earthy'
# 'flavor_1_Flowery', 'flavor_2_Flowery', 'flavor_3_Flowery', 'flavor_4_Flowery'
# 'flavor_1_Fruity', 'flavor_2_Fruity', 'flavor_3_Fruity', 'flavor_4_Fruity'
# 'flavor_1_Grape', 'flavor_2_Grape', 'flavor_3_Grape'
# 'flavor_1_Grapefruit', 'flavor_2_Grapefruit', 'flavor_3_Grapefruit'
# 'flavor_1_Grape', 'flavor_2_Grape', 'flavor_4_Grape' 
# 'flavor_1_Honey', 'flavor_2_Honey', 'flavor_3_Honey'
# 'flavor_1_Lavender', 'flavor_2_Lavender', 'flavor_3_Lavender'
# 'flavor_1_Lemon', 'flavor_2_Lemon', 'flavor_3_Lemon', 'flavor_4_Lemon'
# 'flavor_1_Lime',  'flavor_2_Lime', 'flavor_3_Lime'
# 'flavor_1_Mango', 'flavor_2_Mango', 'flavor_3_Mango'
# 'flavor_1_Menthol', 'flavor_2_Menthol', 'flavor_3_Menthol'
# 'flavor_1_Mint', 'flavor_2_Mint', 'flavor_3_Mint'
# 'flavor_1_None',
# 'flavor_1_Nutty', 'flavor_2_Nutty', 'flavor_3_Nutty'
# 'flavor_1_Orange', flavor_2_Orange', 'flavor_3_Orange', 'flavor_4_Orange'
# 'flavor_1_Peach', 'flavor_3_Peach'
# 'flavor_1_Pear', 'flavor_2_Pear', 'flavor_3_Pear'
# 'flavor_1_Pepper', 'flavor_2_Pepper', 'flavor_3_Pepper'
# 'flavor_1_Pine', 'flavor_2_Pine', 'flavor_3_Pine', 'flavor_4_Pine'
# 'flavor_1_Pineapple', 'flavor_2_Pineapple',  'flavor_3_Pineapple', 'flavor_4_Pineapple'
# 'flavor_1_Plum',  'flavor_2_Plum'
# 'flavor_1_Pungent', 'flavor_2_Pungent', 'flavor_3_Pungent', 'flavor_4_Pungent'
# 'flavor_1_Rose',  'flavor_2_Rose', 'flavor_3_Rose'
# 'flavor_1_Sage', 'flavor_2_Sage', 'flavor_3_Sage'
# 'flavor_1_Skunk', 'flavor_2_Skunk', 'flavor_3_Skunk'
# 'flavor_1_Spicy/Herbal', 'flavor_2_Spicy/Herbal', 'flavor_3_Spicy/Herbal', 'flavor_4_Spicy/Herbal'
# 'flavor_1_Strawberry', 'flavor_2_Strawberry', 'flavor_3_Strawberry'
# 'flavor_1_Sweet', 'flavor_2_Sweet', 'flavor_3_Sweet', 'flavor_4_Sweet'
# 'flavor_1_Tar', 'flavor_3_Tar'
# 'flavor_1_Tea', 'flavor_2_Tea', 'flavor_3_Tea'
# 'flavor_1_Tobacco', 'flavor_2_Tobacco', 'flavor_3_Tobacco'
# 'flavor_1_Tree', 'flavor_2_Tree', 'flavor_3_Tree'
# 'flavor_3_Tree Fruit'
# 'flavor_1_Tropical', 'flavor_2_Tropical', 'flavor_3_Tropical' 
# 'flavor_1_Vanilla', 'flavor_2_Vanilla', flavor_3_Vanilla'
# 'flavor_1_Violet', 'flavor_2_Violet', 'flavor_3_Violet'
# 'flavor_1_Woody', 'flavor_2_Woody',  'flavor_3_Woody'


## Effects

In [14]:
#Splitting Effects into separate columns and renaming those columns. Combining both dataframes into one 
df_effects = df.Effects.str.split('[,]').apply(pd.Series)
df_effects = df_effects.rename(columns={0: 'effect_1', 1: 'effect_2', 2: 'effect_3', 3: 'effect_4', 4: 'effect_5',})
df = pd.concat([df, df_effects], axis=1)

#Changed names of effects
df['effect_1'] = df['effect_1'].replace(['\nRelaxed', 'Dry'], ['Relaxed', 'Drymouth'])
df['effect_2'] = df['effect_2'].replace(['Uplifted\n', ' Relaxed', 'Mouth'], ['Uplifted', 'Relaxed', 'Drymouth'])
df['effect_3'] = df['effect_3'].replace(['Sleepy\n', 'Happy\n', 'Hungry\n', 'Energentic\n'], ['Sleepy', 'Happy', 'Hungry', 'Energetic'])
df['effect_5'] = df['effect_5'].replace(['Euphoric\n'], ['Euphoric'])

#Dropping columns I no longer need
df.drop(columns= ['Effects', 'Flavor'])

Unnamed: 0,Strain,Type,Rating,Description,flavor_1,flavor_2,flavor_3,flavor_4,effect_1,effect_2,effect_3,effect_4,effect_5
0,100-Og,hybrid,4.0,$100 OG is a 50/50 hybrid strain that packs a ...,Earthy,Sweet,Citrus,,Creative,Energetic,Tingly,Euphoric,Relaxed
1,98-White-Widow,hybrid,4.7,The ‘98 Aloha White Widow is an especially pot...,Flowery,Violet,Diesel,,Relaxed,Aroused,Creative,Happy,Energetic
2,1024,sativa,4.4,1024 is a sativa-dominant hybrid bred in Spain...,Spicy/Herbal,Sage,Woody,,Uplifted,Happy,Relaxed,Energetic,Creative
3,13-Dawgs,hybrid,4.2,13 Dawgs is a hybrid of G13 and Chemdawg genet...,Apricot,Citrus,Grapefruit,,Tingly,Creative,Hungry,Relaxed,Uplifted
4,24K-Gold,hybrid,4.6,"Also known as Kosher Tangie, 24k Gold is a 60%...",Citrus,Earthy,Orange,,Happy,Relaxed,Euphoric,Uplifted,Talkative
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2346,Zeus-Og,hybrid,4.7,Zeus OG is a hybrid cross between Pineapple OG...,Earthy,Woody,Pine,,Happy,Uplifted,Relaxed,Euphoric,Energetic
2347,Zkittlez,indica,4.6,Zkittlez is an indica-dominant mix of Grape Ap...,Sweet,Berry,Grape,,Relaxed,Happy,Euphoric,Uplifted,Sleepy
2348,Zombie-Kush,indica,5.0,Zombie Kush by Ripper Seeds comes from two dif...,Earthy,Sweet,Spicy/Herbal,,Relaxed,Sleepy,Talkative,Euphoric,Happy
2349,Zombie-Og,indica,4.4,If you’re looking to transform into a flesh-ea...,Sweet,Earthy,Pungent,,Relaxed,Sleepy,Euphoric,Happy,Hungry


In [15]:
df_effect1 = pd.get_dummies(data=df, columns=['effect_1'])
df_effect2 = pd.get_dummies(data=df, columns=['effect_2'])
df_effect3 = pd.get_dummies(data=df, columns=['effect_3'])
df_effect4 = pd.get_dummies(data=df, columns=['effect_4'])
df_effect5 = pd.get_dummies(data=df, columns=['effect_5'])

In [16]:
#'effect_1_Aroused', 'effect_2_Aroused', 'effect_3_Aroused' 'effect_4_Aroused', 'effect_5_Aroused',
#'effect_1_Creative', 'effect_2_Creative', 'effect_3_Creative', 'effect_4_Creative', 'effect_5_Creative',
#'effect_1_Drymouth', 'effect_2_Drymouth',
#'effect_1_Energetic', 'effect_2_Energetic' 'effect_3_Energetic' 'effect_4_Energetic' 'effect_5_Energetic'
#'effect_1_Euphoric',  'effect_2_Euphoric' 'effect_3_Euphoric''effect_4_Euphoric', 'effect_5_Euphoric', 
#'effect_1_Focused', 'effect_2_Focused', 'effect_3_Focused', 'effect_4_Focused', 'effect_5_Focused',
#'effect_1_Giggly', 'effect_2_Giggly' 'effect_3_Giggly' 'effect_4_Giggly' 'effect_5_Giggly',
#'effect_1_Happy', 'effect_2_Happy', 'effect_3_Happy', 'effect_4_Happy', 'effect_5_Happy', 
#'effect_1_Hungry', 'effect_2_Hungry', 'effect_3_Hungry', 'effect_4_Hungry', 'effect_5_Hungry',
#'effect_1_None', 
#'effect_1_Relaxed', 'effect_2_Relaxed','effect_3_Relaxed'  'effect_4_Relaxed', 'effect_5_Relaxed',
#'effect_1_Sleepy', 'effect_2_Sleepy', 'effect_3_Sleepy' 'effect_4_Sleepy', 'effect_5_Sleepy',
#'effect_1_Talkative', 'effect_2_Talkative', 'effect_3_Talkative', 'effect_4_Talkative' 'effect_5_Talkative',
#'effect_1_Tingly', 'effect_2_Tingly', 'effect_3_Tingly',  'effect_4_Tingly', 'effect_5_Tingly',
#'effect_1_Uplifted', 'effect_2_Uplifted', 'effect_3_Uplifted', 'effect_4_Uplifted', 'effect_5_Uplifted'

   

In [17]:
df_effect1.columns

Index(['Strain', 'Type', 'Rating', 'Effects', 'Flavor', 'Description',
       'flavor_1', 'flavor_2', 'flavor_3', 'flavor_4', 'effect_2', 'effect_3',
       'effect_4', 'effect_5', 'effect_1_Aroused', 'effect_1_Creative',
       'effect_1_Drymouth', 'effect_1_Energetic', 'effect_1_Euphoric',
       'effect_1_Focused', 'effect_1_Giggly', 'effect_1_Happy',
       'effect_1_Hungry', 'effect_1_None', 'effect_1_Relaxed',
       'effect_1_Sleepy', 'effect_1_Talkative', 'effect_1_Tingly',
       'effect_1_Uplifted'],
      dtype='object')

In [18]:
df_effect2.columns

Index(['Strain', 'Type', 'Rating', 'Effects', 'Flavor', 'Description',
       'flavor_1', 'flavor_2', 'flavor_3', 'flavor_4', 'effect_1', 'effect_3',
       'effect_4', 'effect_5', 'effect_2_Aroused', 'effect_2_Creative',
       'effect_2_Drymouth', 'effect_2_Energetic', 'effect_2_Euphoric',
       'effect_2_Focused', 'effect_2_Giggly', 'effect_2_Happy',
       'effect_2_Hungry', 'effect_2_Relaxed', 'effect_2_Sleepy',
       'effect_2_Talkative', 'effect_2_Tingly', 'effect_2_Uplifted'],
      dtype='object')

In [19]:
df_effect3.columns

Index(['Strain', 'Type', 'Rating', 'Effects', 'Flavor', 'Description',
       'flavor_1', 'flavor_2', 'flavor_3', 'flavor_4', 'effect_1', 'effect_2',
       'effect_4', 'effect_5', 'effect_3_Aroused', 'effect_3_Creative',
       'effect_3_Energetic', 'effect_3_Euphoric', 'effect_3_Focused',
       'effect_3_Giggly', 'effect_3_Happy', 'effect_3_Hungry',
       'effect_3_Relaxed', 'effect_3_Sleepy', 'effect_3_Talkative',
       'effect_3_Tingly', 'effect_3_Uplifted'],
      dtype='object')

In [20]:
df_effect4.columns

Index(['Strain', 'Type', 'Rating', 'Effects', 'Flavor', 'Description',
       'flavor_1', 'flavor_2', 'flavor_3', 'flavor_4', 'effect_1', 'effect_2',
       'effect_3', 'effect_5', 'effect_4_Aroused', 'effect_4_Creative',
       'effect_4_Energetic', 'effect_4_Euphoric', 'effect_4_Focused',
       'effect_4_Giggly', 'effect_4_Happy', 'effect_4_Hungry',
       'effect_4_Relaxed', 'effect_4_Sleepy', 'effect_4_Talkative',
       'effect_4_Tingly', 'effect_4_Uplifted'],
      dtype='object')

In [21]:
df_effect5.columns

Index(['Strain', 'Type', 'Rating', 'Effects', 'Flavor', 'Description',
       'flavor_1', 'flavor_2', 'flavor_3', 'flavor_4', 'effect_1', 'effect_2',
       'effect_3', 'effect_4', 'effect_5_Aroused', 'effect_5_Creative',
       'effect_5_Energetic', 'effect_5_Euphoric', 'effect_5_Focused',
       'effect_5_Giggly', 'effect_5_Happy', 'effect_5_Hungry',
       'effect_5_Relaxed', 'effect_5_Sleepy', 'effect_5_Talkative',
       'effect_5_Tingly', 'effect_5_Uplifted'],
      dtype='object')

In [22]:
df.head()

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,flavor_1,flavor_2,flavor_3,flavor_4,effect_1,effect_2,effect_3,effect_4,effect_5
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...,Earthy,Sweet,Citrus,,Creative,Energetic,Tingly,Euphoric,Relaxed
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...,Flowery,Violet,Diesel,,Relaxed,Aroused,Creative,Happy,Energetic
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...,Spicy/Herbal,Sage,Woody,,Uplifted,Happy,Relaxed,Energetic,Creative
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...,Apricot,Citrus,Grapefruit,,Tingly,Creative,Hungry,Relaxed,Uplifted
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%...",Citrus,Earthy,Orange,,Happy,Relaxed,Euphoric,Uplifted,Talkative
