In [47]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np

In [213]:
df = pd.read_csv('cannabis.csv')

In [214]:
df['Effects'] = df['Effects'].apply(lambda k: np.NaN if k[0] == 'None' else k)

In [215]:
df = df.dropna()

What's the Goal?
- Target -> Strain
- X: Effects & Flavor
- Description -> Score?

In [216]:
y = df['Strain']
x = df.drop(columns=['Strain']).copy()
print(y.shape)
print(x.shape)

(2277,)
(2277, 5)


In [217]:
y.isnull().sum()

0

In [218]:
x.isnull().sum()

Type           0
Rating         0
Effects        0
Flavor         0
Description    0
dtype: int64

In [219]:
x['Effects'].nunique()

1612

In [220]:
x

Unnamed: 0,Type,Rating,Effects,Flavor,Description
0,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...
2,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
3,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
4,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."
...,...,...,...,...,...
2346,hybrid,4.7,"Happy,Uplifted,Relaxed,Euphoric,Energetic","Earthy,Woody,Pine",Zeus OG is a hybrid cross between Pineapple OG...
2347,indica,4.6,"Relaxed,Happy,Euphoric,Uplifted,Sleepy","Sweet,Berry,Grape",Zkittlez is an indica-dominant mix of Grape Ap...
2348,indica,5.0,"Relaxed,Sleepy,Talkative,Euphoric,Happy","Earthy,Sweet,Spicy/Herbal",Zombie Kush by Ripper Seeds comes from two dif...
2349,indica,4.4,"Relaxed,Sleepy,Euphoric,Happy,Hungry","Sweet,Earthy,Pungent",If you’re looking to transform into a flesh-ea...


In [221]:
def explode_dummies(df: pd.DataFrame, column_name: str) -> pd.DataFrame:
    if type(df[column_name][0]) == str:
        df[column_name] = df[column_name].str.split(',')        
    temp = pd.get_dummies(df[column_name].explode()).sum(level=0)
    new_df = pd.concat([df, temp], axis=1)
    return new_df

In [222]:
x = explode_dummies(x.copy(), 'Effects')

In [223]:
x = explode_dummies(x.copy(), 'Flavor')
x

Unnamed: 0,Type,Rating,Effects,Flavor,Description,Aroused,Creative,Dry,Energetic,Euphoric,...,Strawberry,Sweet,Tar,Tea,Tobacco,Tree,Tropical,Vanilla,Violet,Woody
0,hybrid,4.0,"[Creative, Energetic, Tingly, Euphoric, Relaxed]","[Earthy, Sweet, Citrus]",$100 OG is a 50/50 hybrid strain that packs a ...,0,1,0,1,1,...,0,1,0,0,0,0,0,0,0,0
1,hybrid,4.7,"[Relaxed, Aroused, Creative, Happy, Energetic]","[Flowery, Violet, Diesel]",The ‘98 Aloha White Widow is an especially pot...,1,1,0,1,0,...,0,0,0,0,0,0,0,0,1,0
2,sativa,4.4,"[Uplifted, Happy, Relaxed, Energetic, Creative]","[Spicy/Herbal, Sage, Woody]",1024 is a sativa-dominant hybrid bred in Spain...,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,1
3,hybrid,4.2,"[Tingly, Creative, Hungry, Relaxed, Uplifted]","[Apricot, Citrus, Grapefruit]",13 Dawgs is a hybrid of G13 and Chemdawg genet...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,hybrid,4.6,"[Happy, Relaxed, Euphoric, Uplifted, Talkative]","[Citrus, Earthy, Orange]","Also known as Kosher Tangie, 24k Gold is a 60%...",0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2346,hybrid,4.7,"[Happy, Uplifted, Relaxed, Euphoric, Energetic]","[Earthy, Woody, Pine]",Zeus OG is a hybrid cross between Pineapple OG...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,1
2347,indica,4.6,"[Relaxed, Happy, Euphoric, Uplifted, Sleepy]","[Sweet, Berry, Grape]",Zkittlez is an indica-dominant mix of Grape Ap...,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0
2348,indica,5.0,"[Relaxed, Sleepy, Talkative, Euphoric, Happy]","[Earthy, Sweet, Spicy/Herbal]",Zombie Kush by Ripper Seeds comes from two dif...,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0
2349,indica,4.4,"[Relaxed, Sleepy, Euphoric, Happy, Hungry]","[Sweet, Earthy, Pungent]",If you’re looking to transform into a flesh-ea...,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0


In [224]:
x['Dry Mouth'] = x['Dry']

In [225]:
x = x.drop(columns=['Dry', 'Mouth'])
x

Unnamed: 0,Type,Rating,Effects,Flavor,Description,Aroused,Creative,Energetic,Euphoric,Focused,...,Sweet,Tar,Tea,Tobacco,Tree,Tropical,Vanilla,Violet,Woody,Dry Mouth
0,hybrid,4.0,"[Creative, Energetic, Tingly, Euphoric, Relaxed]","[Earthy, Sweet, Citrus]",$100 OG is a 50/50 hybrid strain that packs a ...,0,1,1,1,0,...,1,0,0,0,0,0,0,0,0,0
1,hybrid,4.7,"[Relaxed, Aroused, Creative, Happy, Energetic]","[Flowery, Violet, Diesel]",The ‘98 Aloha White Widow is an especially pot...,1,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,sativa,4.4,"[Uplifted, Happy, Relaxed, Energetic, Creative]","[Spicy/Herbal, Sage, Woody]",1024 is a sativa-dominant hybrid bred in Spain...,0,1,1,0,0,...,0,0,0,0,0,0,0,0,1,0
3,hybrid,4.2,"[Tingly, Creative, Hungry, Relaxed, Uplifted]","[Apricot, Citrus, Grapefruit]",13 Dawgs is a hybrid of G13 and Chemdawg genet...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,hybrid,4.6,"[Happy, Relaxed, Euphoric, Uplifted, Talkative]","[Citrus, Earthy, Orange]","Also known as Kosher Tangie, 24k Gold is a 60%...",0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2346,hybrid,4.7,"[Happy, Uplifted, Relaxed, Euphoric, Energetic]","[Earthy, Woody, Pine]",Zeus OG is a hybrid cross between Pineapple OG...,0,0,1,1,0,...,0,0,0,0,0,0,0,0,1,0
2347,indica,4.6,"[Relaxed, Happy, Euphoric, Uplifted, Sleepy]","[Sweet, Berry, Grape]",Zkittlez is an indica-dominant mix of Grape Ap...,0,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
2348,indica,5.0,"[Relaxed, Sleepy, Talkative, Euphoric, Happy]","[Earthy, Sweet, Spicy/Herbal]",Zombie Kush by Ripper Seeds comes from two dif...,0,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
2349,indica,4.4,"[Relaxed, Sleepy, Euphoric, Happy, Hungry]","[Sweet, Earthy, Pungent]",If you’re looking to transform into a flesh-ea...,0,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0


In [226]:
x = x.drop(columns=['Effects', 'Flavor'])
x

Unnamed: 0,Type,Rating,Description,Aroused,Creative,Energetic,Euphoric,Focused,Giggly,Happy,...,Sweet,Tar,Tea,Tobacco,Tree,Tropical,Vanilla,Violet,Woody,Dry Mouth
0,hybrid,4.0,$100 OG is a 50/50 hybrid strain that packs a ...,0,1,1,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,hybrid,4.7,The ‘98 Aloha White Widow is an especially pot...,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
2,sativa,4.4,1024 is a sativa-dominant hybrid bred in Spain...,0,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
3,hybrid,4.2,13 Dawgs is a hybrid of G13 and Chemdawg genet...,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,hybrid,4.6,"Also known as Kosher Tangie, 24k Gold is a 60%...",0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2346,hybrid,4.7,Zeus OG is a hybrid cross between Pineapple OG...,0,0,1,1,0,0,1,...,0,0,0,0,0,0,0,0,1,0
2347,indica,4.6,Zkittlez is an indica-dominant mix of Grape Ap...,0,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,0
2348,indica,5.0,Zombie Kush by Ripper Seeds comes from two dif...,0,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,0
2349,indica,4.4,If you’re looking to transform into a flesh-ea...,0,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,0


In [227]:
raw_score = SentimentIntensityAnalyzer()

def sentiment_score(sentence: str) -> int:
    sentiment_dict = raw_score.polarity_scores(sentence)
    return (100 + sentiment_dict['compound'] * 101) // 2

In [228]:
temp = x['Description'].apply(sentiment_score)
temp.Name = 'Sentiment'
x = pd.concat((x, temp.rename('Sentiment')), axis=1)
x

Unnamed: 0,Type,Rating,Description,Aroused,Creative,Energetic,Euphoric,Focused,Giggly,Happy,...,Tar,Tea,Tobacco,Tree,Tropical,Vanilla,Violet,Woody,Dry Mouth,Sentiment
0,hybrid,4.0,$100 OG is a 50/50 hybrid strain that packs a ...,0,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,97.0
1,hybrid,4.7,The ‘98 Aloha White Widow is an especially pot...,1,1,1,0,0,0,1,...,0,0,0,0,0,0,1,0,0,96.0
2,sativa,4.4,1024 is a sativa-dominant hybrid bred in Spain...,0,1,1,0,0,0,1,...,0,0,0,0,0,0,0,1,0,84.0
3,hybrid,4.2,13 Dawgs is a hybrid of G13 and Chemdawg genet...,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,96.0
4,hybrid,4.6,"Also known as Kosher Tangie, 24k Gold is a 60%...",0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,98.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2346,hybrid,4.7,Zeus OG is a hybrid cross between Pineapple OG...,0,0,1,1,0,0,1,...,0,0,0,0,0,0,0,1,0,82.0
2347,indica,4.6,Zkittlez is an indica-dominant mix of Grape Ap...,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,98.0
2348,indica,5.0,Zombie Kush by Ripper Seeds comes from two dif...,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,98.0
2349,indica,4.4,If you’re looking to transform into a flesh-ea...,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,68.0


In [231]:
y_set = set()
for item in y:
    y_set.add(item)

In [232]:
y_set

{'Purple-Candy',
 'Lake-Of-Fire',
 'Blue-Dragon',
 'Critical-Hog',
 'Double-D',
 'Berry-Noir',
 'Grimace-Og',
 'Mothers-Finest',
 'Cherry-Slyder',
 'Blue-Frost',
 'Sage',
 'Bogart',
 'Berry-Larry',
 'Aliens-On-Moonshine',
 'Doobiebird-Daydream',
 'Gods-Gift',
 'Cactus',
 'Sapphire-Star',
 'Cookie-Dough',
 'American-Dream',
 'Chocolate-Rain',
 'Pineapple-Chunk',
 'Chocoholic',
 'Green-Goddess',
 'Orange-Durban',
 'Night-Train',
 'Ghost-Train-Haze',
 'Sonoma-Coma',
 'Lemon-Jack',
 'Blueberry-Cheesecake',
 'Sinai',
 'Euphoria',
 'Purple-Sour-Diesel',
 '303-Og',
 'Blue-Trane',
 'Caramella',
 'Dream-Beaver',
 'Purple-Hashplant',
 'Jackalope',
 'Wsu',
 'Blue-Zkittlez',
 'Queso',
 'Mossimo-Og',
 'Tangelo',
 'Glue-Tech',
 'Cerebro-Haze',
 'Tangerine-Kush',
 'Avalon',
 'Church-Og',
 'Poochie-Love',
 'Pagoda',
 'Sunshine',
 'Deep-Sleep',
 'Blue-Hawaiian',
 'Kona-Gold',
 'Mr-Tusk',
 'Silver-Surfer',
 'Moose-And-Lobsta',
 'Strawberry-Milkshake',
 'Hoodwreck',
 'Odyssey',
 'A-Dub',
 'White-Nightmar

In [123]:
x.to_csv('exploded_cannabis.csv')

In [None]:
y.to_csv('target.csv')