In [None]:
import ast
import bs4
import time
import json
import requests
import pandas as pd
import scipy as scipy
import numpy as np
from bs4 import BeautifulSoup
from datetime import datetime
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
df = pd.read_csv('canna_df.csv')
df['Feelings'] = df['Feelings'].map(ast.literal_eval) ## To get dicts instead of strings
df['Cannabinoids'] = df['Cannabinoids'].map(ast.literal_eval)

In [3]:
## Open dictionaries in columns ['Feelings','Cannabinoids'] to columns

df = df.reset_index(drop=True)


for i in range(5): 
    df[f'Feeling_{i}'] = df.apply(lambda x: x['Feelings']['Feelings'][i],axis=1) ## Turn feelings list to columns
    df[f'Negative_{i}'] = df.apply(lambda x: x['Feelings']['Negatives'][i],axis=1) ## Turn negative feelings list to columns
    df[f'Helps with_{i}'] = df.apply(lambda x: x['Feelings']['Helps with'][i],axis=1) ## Turn helps with list to columns

    df[f'Feeling_{i}'] = df[f'Feeling_{i}'].str.replace(r'\b $','',regex=True) ## Remove space at the end of values
    df[f'Negative_{i}'] = df[f'Negative_{i}'].str.replace(r'\b $','',regex=True)
    df[f'Helps with_{i}'] = df[f'Helps with_{i}'].str.replace(r'\b $','',regex=True)


df['THC'] = df.apply(lambda x: x['Cannabinoids']['THC'],axis=1) ## Turn Cannabinoid dict to columns
df['CBD'] = df.apply(lambda x: x['Cannabinoids']['CBD'],axis=1) 
df['CBG'] = df.apply(lambda x: x['Cannabinoids']['CBG'],axis=1) 


df.drop(['Feelings','Cannabinoids'],inplace=True,axis=1) ## Remove old columns

cols = ['Rating','Rating Users','THC','CBD','CBG']

df['THC'] = df['THC'].str.replace("%",'',regex=True) # Remove % from numbers
df['CBD'] = df['CBD'].str.replace("%",'',regex=True)
df['CBG'] = df['CBG'].str.replace("%",'',regex=True)


df['CBD'].replace("—",np.nan,regex=True,inplace=True) # Replace '-' values with NaN
df['THC'].replace("—",np.nan,regex=True,inplace=True)
df['CBG'].replace("—",np.nan,regex=True,inplace=True)

df[cols] = df[cols].apply(pd.to_numeric) # Change columns type to numeric





In [10]:
nanim = df[(df['THC'].isna()) & ((df['Left Parent'].notna()) | (df['Right parent'].notna()))] ## THC NaN, at least one parent

nanim.columns.str.match("Unnamed") ## A new column was created from some reason
nanim.loc[:,~nanim.columns.str.match("Unnamed")]

Unnamed: 0,Strain Name,Type,Rating,Rating Users,Left Parent,Right parent,Left Child,Right Child,Feeling_0,Negative_0,...,Helps with_2,Feeling_3,Negative_3,Helps with_3,Feeling_4,Negative_4,Helps with_4,THC,CBD,CBG
6,Seattle Summer,Hybrid,5.0,4.0,Gorilla Cookies,,,,Uplifted,,...,,,,,,,,,,
8,Stashsquatch,Hybrid,,,GSC,True OG,Lemon Stash CBD,Lazy Susan CBD,,,...,,,,,,,,,,
9,Lashkar Gah,Indica,4.7,15.0,Afghani,,,,Relaxed,Dry mouth,...,PTSD,Happy,,Insomnia,Hungry,,Lack of appetite,,,
13,Rise ‘n’ Shine,Hybrid,4.7,7.0,Green Crack,Sour Bubble,,,Talkative,Dry eyes,...,Cramps,Uplifted,,Eye pressure,Tingly,,Headaches,,,
37,Sugartown Express,Hybrid,,,Purple Trainwreck,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5121,Flo Limone,Sativa,4.7,13.0,Flo,,,,Uplifted,Dry eyes,...,Stress,Relaxed,,Depression,Talkative,,Fatigue,,,
5124,Americano,Indica,4.6,5.0,Northern Lights,Skunk 1,,,Relaxed,Dry eyes,...,Pain,Creative,,,Energetic,,,,,
5135,Concord Grape Blockhead,Hybrid,4.3,4.0,Sweet Tooth,,,,Creative,Dry mouth,...,,Hungry,,,Happy,,,,,
5138,American Dream,Hybrid,4.6,19.0,Hawaiian,Jamaican,,,Euphoric,Dry mouth,...,Depression,Tingly,,Lack of appetite,Focused,,Anxiety,,,


In [11]:
## Fill missing THC values

def my_func(df,parent):

    func_df = df.loc[(df['Left Parent'] == parent) | (df['Right parent'] == parent) | (df['Strain Name'] == parent)] ## All rows with same parent, including the parent
    mean = round(func_df['THC'].mean(),0)

    return mean

def complete_cannabinoids_nan(df):
    
    nanim = df[(df['THC'].isna()) & ((df['Left Parent'].notna()) | (df['Right parent'].notna()))].copy() # To apply changes on real DF

    nanim['THC'] = nanim.apply(lambda x: my_func(df,x['Left Parent']),axis=1)
    nanim['THC'] = nanim.apply(lambda x: my_func(df,x['Right parent']) if pd.isnull(x['THC']) else x['THC'],axis=1)

    return nanim

# nanim.head()

# nanim = nanim.copy() # To apply changes on real DF

# nanim['THC'] = nanim.apply(lambda x: my_func(df,x['Left Parent']),axis=1)
# nanim['THC'] = nanim.apply(lambda x: my_func(df,x['Right parent']) if pd.isnull(x['THC']) else x['THC'],axis=1)





In [12]:
nanim_df = complete_cannabinoids_nan(df)

In [13]:
nanim_df[nanim_df['THC'].isna()]

Unnamed: 0.1,Unnamed: 0,Strain Name,Type,Rating,Rating Users,Left Parent,Right parent,Left Child,Right Child,Feeling_0,...,Helps with_2,Feeling_3,Negative_3,Helps with_3,Feeling_4,Negative_4,Helps with_4,THC,CBD,CBG
186,186,Frosted Freak,Hybrid,4.9,75.0,Brand X,,,,Happy,...,Depression,Uplifted,,Anxiety,Tingly,,Pain,,,
291,291,Journeymen,Hybrid,,,Odyssey,,,,,...,,,,,,,,,,
910,910,Krakatoa,Hybrid,4.0,2.0,Mau-Mau,,,,Euphoric,...,Pain,Sleepy,,Stress,Hungry,,Anxiety,,,
1356,1356,Hayley's Haze,Sativa,5.0,5.0,Outer Space,Alien Dutchess,,,,...,,,,,,,,,,
1703,1703,Hobbit,Hybrid,,,Ginger Ale,,,,,...,,,,,,,,,,
2018,2018,Siddhartha’s Dream,Hybrid,,,Buddha’s Tooth,,,,,...,,,,,,,,,,
2497,2497,Lazy Susan CBD,Hybrid,,,Stashsquatch,,,,,...,,,,,,,,,,
2990,2990,Pebble Pie,Hybrid,,,Pie 95,,,,,...,,,,,,,,,,
3031,3031,Blueberry Space Cake,Indica,4.7,76.0,Outer Space,Alien Dutchess,,,Relaxed,...,Migraines,Happy,,Headaches,Euphoric,,Insomnia,,,
3177,3177,Maui Citrus Punch,Sativa,4.6,8.0,Tangelo Kush,,,,Uplifted,...,Inflammation,Happy,,Muscle spasms,Talkative,,Pain,,,


In [14]:
df2 = df.copy()
df2.update(nanim_df)

In [16]:

df2[df2['THC'].isna()]


# nanim['THC'].isna()

# df[df['Strain Name'] == 'Alien Dutchess']
# df.loc[(df['Left Parent'] == 'Alien Dutchess') | (df['Right parent'] == 'Alien Dutchess') | (df['Strain Name'] == 'Alien Dutchess')]
# nanim.head(10)

Unnamed: 0.1,Unnamed: 0,Strain Name,Type,Rating,Rating Users,Left Parent,Right parent,Left Child,Right Child,Feeling_0,...,Helps with_2,Feeling_3,Negative_3,Helps with_3,Feeling_4,Negative_4,Helps with_4,THC,CBD,CBG
2,2.0,Y Life,Hybrid,4.0,2.0,,,,,Uplifted,...,,,,,,,,,,
3,3.0,Purple Reign,Hybrid,4.2,17.0,,,,,Relaxed,...,Anxiety,Tingly,,,Focused,,,,,
5,5.0,Peach Cheesewreck,,,,,,,,,...,,,,,,,,,,
15,15.0,Weekend Warrior,,,,,,,,,...,,,,,,,,,,
16,16.0,Mixed Berry,,4.3,6.0,,,,,Tingly,...,,Relaxed,,,Happy,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5146,5146.0,KickFlip #6,,,,,,,,,...,,,,,,,,,,
5148,5148.0,Humpty XIII,,,,,,,,,...,,,,,,,,,,
5155,5155.0,Cherry Icee,,4.3,6.0,,,,,,...,,,,,,,,,,
5157,5157.0,G Purps,,3.6,11.0,,,,,,...,,,,,,,,,,


In [None]:
df['THC'].hist(bins=30,rwidth=0.8,grid=False)
plt.xlabel('THC%')
plt.ylabel('Amount')
plt.title('THC% Distribution')
plt.show()

In [None]:

most_freq1 = df['Feeling_1'].value_counts()[:5].sort_index(ascending=True)
most_freq2 = df['Feeling_2'].value_counts()[:5].sort_index(ascending=True)
most_freq3 = df['Feeling_3'].value_counts()[:5].sort_index(ascending=True)


freq = most_freq1 + most_freq2 + most_freq3


freq.plot(kind='pie')
plt.title('Top 5 Feelings')
plt.show()

In [None]:
## Top health conditions

most_freq1 = df['Helps with_1'].value_counts()[:5].sort_index(ascending=True)
most_freq2 = df['Helps with_2'].value_counts()[:5].sort_index(ascending=True)
most_freq3 = df['Helps with_3'].value_counts()[:5].sort_index(ascending=True)


freq = most_freq1 + most_freq2 + most_freq3


freq.plot(kind='bar')
plt.show()