In [1]:
import ast
import bs4
import time
import json
import requests
import pandas as pd
import scipy as scipy
import numpy as np
from bs4 import BeautifulSoup
from datetime import datetime
import matplotlib.pyplot as plt

%matplotlib inline

In [37]:
df = pd.read_csv('canna_df.csv')
df['Feelings'] = df['Feelings'].map(ast.literal_eval) ## To get dicts instead of strings
df['Cannabinoids'] = df['Cannabinoids'].map(ast.literal_eval)

In [38]:
## Open dictionaries in columns ['Feelings','Cannabinoids'] to columns


df = df.reset_index(drop=True) 


for i in range(5): 
    df[f'Feeling_{i+1}'] = df.apply(lambda x: x['Feelings']['Feelings'][i],axis=1) ## Turn feelings list to columns
    df[f'Negative_{i+1}'] = df.apply(lambda x: x['Feelings']['Negatives'][i],axis=1)   
    df[f'Helps with_{i+1}'] = df.apply(lambda x: x['Feelings']['Helps with'][i],axis=1) 

    df[f'Feeling_{i+1}'] = df[f'Feeling_{i+1}'].str.replace(r'\b $','',regex=True) ## Remove space at the end of values
    df[f'Negative_{i+1}'] = df[f'Negative_{i+1}'].str.replace(r'\b $','',regex=True)
    df[f'Helps with_{i+1}'] = df[f'Helps with_{i+1}'].str.replace(r'\b $','',regex=True)


df['THC'] = df.apply(lambda x: x['Cannabinoids']['THC'],axis=1) ## Turn Cannabinoid dict to columns
df['CBD'] = df.apply(lambda x: x['Cannabinoids']['CBD'],axis=1) 
df['CBG'] = df.apply(lambda x: x['Cannabinoids']['CBG'],axis=1) 


df.drop(['Feelings','Cannabinoids'],inplace=True,axis=1) ## Remove old columns

cols = ['Rating','Rating Users','THC','CBD','CBG']

df['THC'] = df['THC'].str.replace("%",'',regex=True) # Remove % from numbers
df['CBD'] = df['CBD'].str.replace("%",'',regex=True)
df['CBG'] = df['CBG'].str.replace("%",'',regex=True)


df['CBD'].replace("—",np.nan,regex=True,inplace=True) # Replace '-' values with NaN
df['THC'].replace("—",np.nan,regex=True,inplace=True)
df['CBG'].replace("—",np.nan,regex=True,inplace=True)

df[cols] = df[cols].apply(pd.to_numeric) # Change columns type to numeric




In [39]:
def get_nan_replace_value(df,parent,mode,col):

    func_df = df.loc[(df['Left Parent'] == parent) | (df['Right Parent'] == parent) | (df['Strain Name'] == parent)] ## All rows with same parent, including the parent

    if mode == 'Feelings':
        try:
            feeling = func_df[col].value_counts().idxmax() ## Limits to look only at the same col

        except Exception as e:
            feeling = np.NaN

        return feeling

    elif mode == 'Cannabinoids':

        mean = round(func_df[col].mean(),0)
        
        return mean



def complete_nan(df,col_names):
        
    for i,col in enumerate(col_names,0):
        if 'Feeling' in col: ## Create DF that contains 'Feeling' columns with NaN that has at least 1 parent that's not NaN
            nanim = df[(df['Feeling_1'].isnull()) & (df['Feeling_2'].isnull()) & (df['Feeling_3'].isnull()) & (df['Feeling_4'].isnull()) & (df['Feeling_5'].isnull()) & ((df['Left Parent'].notna()) | (df['Right Parent'].notna()))].copy()
            mode = 'Feelings'

        elif 'Negative' in col:
            nanim = df[(df['Negative_1'].isnull()) & (df['Negative_2'].isnull()) & (df['Negative_3'].isnull()) & (df['Negative_4'].isnull()) & (df['Negative_5'].isnull()) & ((df['Left Parent'].notna()) | (df['Right Parent'].notna()))].copy()
            mode = 'Feelings'
        
        elif 'Helps with' in col:
            nanim = df[(df['Helps with_1'].isnull()) & (df['Helps with_2'].isnull()) & (df['Helps with_3'].isnull()) & (df['Helps with_4'].isnull()) & (df['Helps with_5'].isnull()) & ((df['Left Parent'].notna()) | (df['Right Parent'].notna()))].copy()
            mode = 'Feelings'
        
        else: ## Create DF that contains a 'Cannabinoid' column with NaN, with at least 1 parent that's not NaN
            nanim = df[(df[col].isna()) & ((df['Left Parent'].notna()) | (df['Right Parent'].notna()))].copy() # To apply changes on real DF
            mode = 'Cannabinoids'
        
        try:
            nanim[col] = nanim.apply(lambda x: get_nan_replace_value(df,x['Left Parent'],mode,col),axis=1)
            nanim[col] = nanim.apply(lambda x: get_nan_replace_value(df,x['Right Parent'],mode,col) if pd.isnull(x[col]) else x[col],axis=1)
            
            df[col].fillna(nanim[col],inplace=True)


        except Exception as e:
            print(e)
            pass
    

    return df

In [40]:
cols = ['Feeling_1','Feeling_2','Feeling_3','Feeling_4','Feeling_5','Negative_1','Negative_2','Negative_3','Negative_4','Negative_5','Helps with_1','Helps with_2','Helps with_3','Helps with_4','Helps with_5','THC','CBD','CBG']
feelings_complete = complete_nan(df,cols) ## Fill NaN

In [10]:
feelings_complete.head()

Unnamed: 0.1,Unnamed: 0,Strain Name,Type,Rating,Rating Users,Left Parent,Right Parent,Left Child,Right Child,Feeling_1,...,Helps with_3,Feeling_4,Negative_4,Helps with_4,Feeling_5,Negative_5,Helps with_5,THC,CBD,CBG
0,0,Mind Flayer,Hybrid,,,,,,,,...,,,,,,,,19.0,,1.0
1,1,,,5.0,68.0,,,,,,...,,,,,,,,,,
2,2,Lucid Dream,Hybrid,4.6,107.0,Blue Dream,Amnesia Haze,,,Uplifted,...,Stress,Euphoric,Anxious,Pain,Creative,Headache,Fatigue,20.0,11.0,1.0
3,3,Fred Flipn’ Stoned,Indica,5.0,2.0,Pink Champagne,Straight A's Haze,,,Relaxed,...,ADD,Tingly,,PMS,Focused,,,18.0,,1.0
4,4,Black Magic,Indica,4.5,17.0,,,,,Relaxed,...,Fatigue,Sleepy,,Insomnia,Focused,,Lack of appetite,18.0,,


In [41]:
df = df.copy()
df.update(feelings_complete)
df.columns.str.match("Unnamed") ## A new column was created from some reason
df = df.loc[:,~df.columns.str.match("Unnamed")]

In [42]:


without_rating_and_type = df[(df['Rating Users'].notna()) & (df['Type'].notna())].copy()
# without_type = without_rating[without_rating['Type'].notna()].copy()


In [43]:
# without_rating.head(10)

without_rating_and_type.head(10)

# without_type[without_type['THC'].isna()]


# curr = without_type[(without_type['Left Parent'].isna()) & (without_type['Right Parent'].isna()) & (without_type['THC'].isna())].copy()
# curr[curr['Rating Users'] < 10]

Unnamed: 0,Strain Name,Type,Rating,Rating Users,Left Parent,Right Parent,Left Child,Right Child,Feeling_1,Negative_1,...,Helps with_3,Feeling_4,Negative_4,Helps with_4,Feeling_5,Negative_5,Helps with_5,THC,CBD,CBG
2,Lucid Dream,Hybrid,4.6,107.0,Blue Dream,Amnesia Haze,,,Uplifted,Dry mouth,...,Stress,Euphoric,Anxious,Pain,Creative,Headache,Fatigue,20.0,11.0,1.0
3,Fred Flipn’ Stoned,Indica,5.0,2.0,Pink Champagne,Straight A's Haze,,,Relaxed,Dry mouth,...,ADD,Tingly,,PMS,Focused,,,18.0,,1.0
4,Black Magic,Indica,4.5,17.0,,,,,Relaxed,Dry mouth,...,Fatigue,Sleepy,,Insomnia,Focused,,Lack of appetite,18.0,,
6,NYC Diesel,Hybrid,4.2,939.0,Afghani,Mexican,Blue Diesel,Strawberry Diesel,Happy,Dry mouth,...,Depression,Relaxed,Dizzy,Pain,Energetic,Headache,Nausea,18.0,4.0,1.0
7,Purple Goat,Hybrid,4.2,5.0,Trainwreck,Blueberry Skunk,,,Happy,Dry mouth,...,Insomnia,Talkative,,Pain,Creative,,Fatigue,15.0,1.0,1.0
8,Alphadawg,Hybrid,5.0,1.0,Chemdawg,,,,Relaxed,Dry mouth,...,,,,,,,,16.0,1.0,1.0
9,Dorit,Indica,4.4,7.0,,,,,Relaxed,Dry eyes,...,Stress,Tingly,,Anxiety,Sleepy,,Arthritis,,,
10,Magic Jordan,Hybrid,4.0,6.0,,,,,Relaxed,Dry eyes,...,Anxiety,Euphoric,,Gastrointestinal disorder,Uplifted,,PTSD,,,
11,Sour Sunset,Hybrid,4.7,49.0,Sour Diesel,,,,Relaxed,Dry mouth,...,Anxiety,Uplifted,Dizzy,Headaches,Focused,Headache,Pain,18.0,6.0,1.0
12,Purple Chemdawg,Indica,4.5,170.0,Granddaddy Purple,Chemdawg,,,Relaxed,Dry mouth,...,Anxiety,Uplifted,Headache,Depression,Sleepy,Anxious,Insomnia,17.0,7.0,1.0


In [22]:
# df[df['Strain Name'] == "Straight A's Haze"]

Unnamed: 0,Strain Name,Type,Rating,Rating Users,Left Parent,Right Parent,Left Child,Right Child,Feeling_1,Negative_1,...,Helps with_3,Feeling_4,Negative_4,Helps with_4,Feeling_5,Negative_5,Helps with_5,THC,CBD,CBG
2870,Straight A's Haze,Sativa,5.0,3.0,DelaHaze,,,,Happy,Dry mouth,...,Depression,Talkative,,Headaches,Relaxed,,Insomnia,18.0,,1.0


In [15]:
df.head(10)

Unnamed: 0,Strain Name,Type,Rating,Rating Users,Left Parent,Right Parent,Left Child,Right Child,Feeling_1,Negative_1,...,Helps with_3,Feeling_4,Negative_4,Helps with_4,Feeling_5,Negative_5,Helps with_5,THC,CBD,CBG
0,Mind Flayer,Hybrid,,,,,,,,,...,,,,,,,,19.0,,1.0
1,,,5.0,68.0,,,,,,,...,,,,,,,,,,
2,Lucid Dream,Hybrid,4.6,107.0,Blue Dream,Amnesia Haze,,,Uplifted,Dry mouth,...,Stress,Euphoric,Anxious,Pain,Creative,Headache,Fatigue,20.0,11.0,1.0
3,Fred Flipn’ Stoned,Indica,5.0,2.0,Pink Champagne,Straight A's Haze,,,Relaxed,Dry mouth,...,ADD,Tingly,,PMS,Focused,,,18.0,,1.0
4,Black Magic,Indica,4.5,17.0,,,,,Relaxed,Dry mouth,...,Fatigue,Sleepy,,Insomnia,Focused,,Lack of appetite,18.0,,
5,Chocolate Frosted Sherbet,,5.0,2.0,,,,,,,...,,,,,,,,25.0,,1.0
6,NYC Diesel,Hybrid,4.2,939.0,Afghani,Mexican,Blue Diesel,Strawberry Diesel,Happy,Dry mouth,...,Depression,Relaxed,Dizzy,Pain,Energetic,Headache,Nausea,18.0,4.0,1.0
7,Purple Goat,Hybrid,4.2,5.0,Trainwreck,Blueberry Skunk,,,Happy,Dry mouth,...,Insomnia,Talkative,,Pain,Creative,,Fatigue,15.0,1.0,1.0
8,Alphadawg,Hybrid,5.0,1.0,Chemdawg,,,,Relaxed,Dry mouth,...,,,,,,,,16.0,1.0,1.0
9,Dorit,Indica,4.4,7.0,,,,,Relaxed,Dry eyes,...,Stress,Tingly,,Anxiety,Sleepy,,Arthritis,,,


In [44]:

without_rating_and_type.to_csv('clean_df.csv')