In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import keras_tuner as kt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from keras import Model, layers
from pathlib import Path
os.environ["KERAS_BACKEND"] = 'tensorflow'

In [2]:
tf.__version__

'2.17.0'

In [3]:
df = pd.read_csv('data/cleaned_joined_data.csv')
df = df.set_index('Sample Name')
df

Unnamed: 0_level_0,cis-Nerolidol,trans-Nerolidol,trans-Nerolidol 1,trans-Nerolidol 2,trans-Ocimene,3-Carene,Camphene,Caryophyllene Oxide,Eucalyptol,Geraniol,...,fibromyalgia,crohn's_disease,phantom_limb_pain,epilepsy,multiple_sclerosis,parkinson's,tourette's_syndrome,alzheimer's,hiv/aids,tinnitus
Sample Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22,0.0,0.0,0.0,0.0,0.0,0.00,0.040000,0.000000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24k Gold,0.0,0.0,0.0,0.0,0.0,0.00,0.011000,0.003000,0.000,0.001000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3 Kings,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.533333,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3X Crazy,0.0,0.0,0.0,0.0,0.0,0.00,0.003000,0.004000,0.000,0.002000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
501st OG,0.0,0.0,0.0,0.0,0.0,0.00,0.006636,0.057818,0.000,0.000909,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zelly’s Gift,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zeus OG,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zkittlez,0.0,0.0,0.0,0.0,0.0,0.00,0.010000,0.134000,0.001,0.011500,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zookies,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.022000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
fill_ins = {'Animal OG': 'Indica',
 'Cheesecake': 'Indica',
 'Dragon Lady': 'Sativa',
 'Green Marvel': 'Sativa',
 'Jack Straw': 'Sativa',
 'Lemon Punch': 'Hybrid',
 'Lychee': 'Hybrid',
 'M4': 'Hybrid',
 'Melon Cookies': 'Indica',
 'Mendo Purple': 'Hybrid',
 'Petroleum Nightmare': 'Hybrid',
 'Purgatory': 'Indica',
 'Silver Fox': 'Sativa',
 'Sour Walker': 'Hybrid',
 'Spirit In The Sky': 'Hybrid',
 'Strawberry Trainwreck': 'Sativa',
 'Tigers Blood': 'Indica',
 'Zprite': 'Hybrid'}
for strain in fill_ins:
    df.loc[strain, 'type'] = fill_ins[strain]

In [5]:
df.loc['Animal OG','type']

'Indica'

In [6]:
type_enc = OrdinalEncoder(categories = [['Indica', 'Hybrid', 'Sativa']], encoded_missing_value=-1, handle_unknown='use_encoded_value', unknown_value=-1)

In [7]:
df['encoded_type'] = type_enc.fit_transform(df['type'].values.reshape(-1,1))
df

Unnamed: 0_level_0,cis-Nerolidol,trans-Nerolidol,trans-Nerolidol 1,trans-Nerolidol 2,trans-Ocimene,3-Carene,Camphene,Caryophyllene Oxide,Eucalyptol,Geraniol,...,crohn's_disease,phantom_limb_pain,epilepsy,multiple_sclerosis,parkinson's,tourette's_syndrome,alzheimer's,hiv/aids,tinnitus,encoded_type
Sample Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22,0.0,0.0,0.0,0.0,0.0,0.00,0.040000,0.000000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
24k Gold,0.0,0.0,0.0,0.0,0.0,0.00,0.011000,0.003000,0.000,0.001000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3 Kings,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.533333,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3X Crazy,0.0,0.0,0.0,0.0,0.0,0.00,0.003000,0.004000,0.000,0.002000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
501st OG,0.0,0.0,0.0,0.0,0.0,0.00,0.006636,0.057818,0.000,0.000909,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zelly’s Gift,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
Zeus OG,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Zkittlez,0.0,0.0,0.0,0.0,0.0,0.00,0.010000,0.134000,0.001,0.011500,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zookies,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.022000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [36]:
df.columns

Index(['cis-Nerolidol', 'trans-Nerolidol', 'trans-Nerolidol 1',
       'trans-Nerolidol 2', 'trans-Ocimene', '3-Carene', 'Camphene',
       'Caryophyllene Oxide', 'Eucalyptol', 'Geraniol',
       ...
       'crohn's_disease', 'phantom_limb_pain', 'epilepsy',
       'multiple_sclerosis', 'parkinson's', 'tourette's_syndrome',
       'alzheimer's', 'hiv/aids', 'tinnitus', 'encoded_type'],
      dtype='object', length=103)

In [8]:
predictors = ['encoded_type',
 'cis-Nerolidol',
 'trans-Nerolidol',
 'trans-Nerolidol 1',
 'trans-Nerolidol 2',
 'trans-Ocimene',
 '3-Carene',
 'Camphene',
 'Caryophyllene Oxide',
 'Eucalyptol',
 'Geraniol',
 'Guaiol',
 'Isopulegol',
 'Linalool',
 'Ocimene',
 'Terpinolene',
 'alpha-Bisabolol',
 'alpha-Humulene',
 'alpha-Pinene',
 'alpha-Terpinene',
 'beta-Caryophyllene',
 'beta-Myrcene',
 'beta-Ocimene',
 'beta-Pinene',
 'delta-Limonene',
 'gamma-Terpinene',
 'p-Cymene',
 'delta-9 THC-A',
 'delta-9 THC',
 'delta-8 THC',
 'THC-A',
 'THCV',
 'CBN',
 'CBD-A',
 'CBD',
 'CBDV',
 'CBDV-A',
 'delta-9 CBG-A',
 'delta-9 CBG',
 'CBC',
]
effects = ['relaxed',
 'happy',
 'euphoric',
 'uplifted',
 'sleepy',
 'dry_mouth',
 'dry_eyes',
 'dizzy',
 'paranoid',
 'anxious',
 'hungry',
 'talkative',
 'creative',
 'energetic',
 'focused',
 'giggly',
 'tingly',
 'aroused',]
illnesses = [ 
 'stress',
 'pain',
 'depression',
 'anxiety',
 'insomnia',
 'headache',
 'ptsd',
 'fatigue',
 'lack_of_appetite',
 'nausea',
 'headaches',
 'bipolar_disorder',
 'cancer',
 'cramps',
 'gastrointestinal_disorder',
 'inflammation',
 'muscle_spasms',
 'eye_pressure',
 'migraines',
 'asthma',
#  'anorexia',
 'arthritis',
 'add/adhd',
#  'muscular_dystrophy',
#  'hypertension',
 'glaucoma',
 'pms',
 'seizures',
 'spasticity',
#  'spinal_cord_injury',
 'fibromyalgia',
 "crohn's_disease",
#  'phantom_limb_pain',
 'epilepsy',
 'multiple_sclerosis',
 "parkinson's",
#  "tourette's_syndrome",
#  "alzheimer's",
 'hiv/aids',
#  'tinnitus'
 ]

In [9]:
df[effects].describe()

Unnamed: 0,relaxed,happy,euphoric,uplifted,sleepy,dry_mouth,dry_eyes,dizzy,paranoid,anxious,hungry,talkative,creative,energetic,focused,giggly,tingly,aroused
count,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0
mean,0.469802,0.473798,0.382934,0.352898,0.123888,0.213033,0.11955,0.04486,0.032439,0.030135,0.05874,0.026715,0.111926,0.10486,0.065419,0.02018,0.020783,0.00559
std,0.272577,0.216153,0.225564,0.222377,0.203578,0.136311,0.098156,0.051004,0.042986,0.061703,0.143268,0.10975,0.195052,0.201908,0.161529,0.10303,0.098608,0.054
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.37,0.42,0.315,0.25,0.0,0.14,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.53,0.52,0.45,0.4,0.0,0.22,0.12,0.04,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.66,0.6,0.52,0.5,0.295,0.29,0.17,0.07,0.05,0.04,0.0,0.0,0.27,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.66,0.33,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [10]:
df[effects].head()

Unnamed: 0_level_0,relaxed,happy,euphoric,uplifted,sleepy,dry_mouth,dry_eyes,dizzy,paranoid,anxious,hungry,talkative,creative,energetic,focused,giggly,tingly,aroused
Sample Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
22,0.0,0.5,0.5,0.5,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.5,0.5,0.0,0.0,0.0
24k Gold,0.61,0.55,0.41,0.44,0.0,0.23,0.11,0.05,0.0,0.02,0.0,0.0,0.24,0.0,0.0,0.0,0.0,0.0
3 Kings,0.58,0.58,0.48,0.48,0.0,0.21,0.13,0.05,0.02,0.02,0.0,0.0,0.31,0.0,0.0,0.0,0.0,0.0
3X Crazy,0.76,0.48,0.47,0.31,0.38,0.23,0.16,0.03,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
501st OG,0.81,0.47,0.44,0.25,0.41,0.19,0.06,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
df[effects].sum()

relaxed      521.95
happy        526.39
euphoric     425.44
uplifted     392.07
sleepy       137.64
dry_mouth    236.68
dry_eyes     132.82
dizzy         49.84
paranoid      36.04
anxious       33.48
hungry        65.26
talkative     29.68
creative     124.35
energetic    116.50
focused       72.68
giggly        22.42
tingly        23.09
aroused        6.21
dtype: float64

In [12]:
# drop any columns with all zeros
df[illnesses].sum()==0

stress                       False
pain                         False
depression                   False
anxiety                      False
insomnia                     False
headache                     False
ptsd                         False
fatigue                      False
lack_of_appetite             False
nausea                       False
headaches                    False
bipolar_disorder             False
cancer                       False
cramps                       False
gastrointestinal_disorder    False
inflammation                 False
muscle_spasms                False
eye_pressure                 False
migraines                    False
asthma                       False
arthritis                    False
add/adhd                     False
glaucoma                     False
pms                          False
seizures                     False
spasticity                   False
fibromyalgia                 False
crohn's_disease              False
epilepsy            

In [13]:
# empty_cols = ['anorexia',
#               'muscular_dystrophy',
#               'hypertension',
#               'spinal_cord_injury',
#               'phantom_limb_pain',
#               "tourette's_syndrome",
#               "alzheimer's",
#               'tinnitus'
#               ]
# df = df.drop(columns=empty_cols)
# df

In [14]:
df_binary_target = df.copy()
# for each column
for col in df[effects+illnesses].columns:
    print(col)
    # calculate the mean
    col_mean = df[col].mean()
    # for each row
    for row in df.index:
        # if gte to column mean
        if df.loc[row,col]>=col_mean:
            # set to true
            df_binary_target.loc[row,col] = 1
        else:
            df_binary_target.loc[row,col] = 0
df_binary_target

relaxed
happy
euphoric
uplifted
sleepy
dry_mouth
dry_eyes
dizzy
paranoid
anxious
hungry
talkative
creative
energetic
focused
giggly
tingly
aroused
stress
pain
depression
anxiety
insomnia
headache
ptsd
fatigue
lack_of_appetite
nausea
headaches
bipolar_disorder
cancer
cramps
gastrointestinal_disorder
inflammation
muscle_spasms
eye_pressure
migraines
asthma
arthritis
add/adhd
glaucoma
pms
seizures
spasticity
fibromyalgia
crohn's_disease
epilepsy
multiple_sclerosis
parkinson's
hiv/aids


Unnamed: 0_level_0,cis-Nerolidol,trans-Nerolidol,trans-Nerolidol 1,trans-Nerolidol 2,trans-Ocimene,3-Carene,Camphene,Caryophyllene Oxide,Eucalyptol,Geraniol,...,crohn's_disease,phantom_limb_pain,epilepsy,multiple_sclerosis,parkinson's,tourette's_syndrome,alzheimer's,hiv/aids,tinnitus,encoded_type
Sample Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22,0.0,0.0,0.0,0.0,0.0,0.00,0.040000,0.000000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
24k Gold,0.0,0.0,0.0,0.0,0.0,0.00,0.011000,0.003000,0.000,0.001000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3 Kings,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.533333,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3X Crazy,0.0,0.0,0.0,0.0,0.0,0.00,0.003000,0.004000,0.000,0.002000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
501st OG,0.0,0.0,0.0,0.0,0.0,0.00,0.006636,0.057818,0.000,0.000909,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zelly’s Gift,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
Zeus OG,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Zkittlez,0.0,0.0,0.0,0.0,0.0,0.00,0.010000,0.134000,0.001,0.011500,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zookies,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.022000,0.000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [15]:
df_binary_target[effects].describe()

Unnamed: 0,relaxed,happy,euphoric,uplifted,sleepy,dry_mouth,dry_eyes,dizzy,paranoid,anxious,hungry,talkative,creative,energetic,focused,giggly,tingly,aroused
count,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0
mean,0.627363,0.650765,0.680468,0.585959,0.30063,0.523852,0.50405,0.450945,0.405941,0.315032,0.164716,0.067507,0.275428,0.227723,0.160216,0.045005,0.053105,0.012601
std,0.483724,0.476943,0.466505,0.492778,0.458739,0.499656,0.500209,0.497812,0.491294,0.464738,0.371092,0.251011,0.446931,0.419552,0.366971,0.207407,0.224344,0.111596
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [20]:
X = df_binary_target[predictors]
y_effects = df_binary_target[effects]
y_illnesses = df_binary_target[illnesses]

X_train, X_test, y_train_effects, y_test_effects, y_train_illnesses, y_test_illnesses = train_test_split(X, y_effects, y_illnesses)

In [51]:
X_train

Unnamed: 0_level_0,encoded_type,cis-Nerolidol,trans-Nerolidol,trans-Nerolidol 1,trans-Nerolidol 2,trans-Ocimene,3-Carene,Camphene,Caryophyllene Oxide,Eucalyptol,...,THC-A,THCV,CBN,CBD-A,CBD,CBDV,CBDV-A,delta-9 CBG-A,delta-9 CBG,CBC
Sample Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Venom OG,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.011,0.111714,0.000,...,17.680000,0.000000,0.000000,0.057143,0.018571,0.0,0.00000,0.731429,0.067143,0.015714
American Pie,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000,0.000000,0.000,...,0.000000,0.000000,0.000000,0.420000,0.480000,0.0,0.00000,0.000000,0.510000,0.020000
Aurora Indica,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000,0.250000,0.000,...,0.000000,0.000000,0.000000,0.170000,0.230000,0.0,0.00000,0.920000,0.000000,0.070000
Zkittlez,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.010,0.134000,0.001,...,12.720000,0.065000,0.370000,0.065000,0.000000,0.0,0.00000,0.720000,0.460000,0.355000
Dr. Grinspoon,2.0,0.0,0.0,0.0,0.0,0.0,0.010000,0.000,0.205000,0.000,...,0.000000,0.010000,0.015000,0.085000,0.055000,0.0,0.00000,0.095000,0.460000,0.060000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Space Dawg,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000,0.373636,0.000,...,0.000000,0.000000,0.000000,0.125455,0.132727,0.0,0.00000,0.514545,0.212727,0.020000
Elephant,2.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.005,0.098000,0.000,...,9.664000,0.001000,0.001000,0.098000,0.117000,0.0,0.00000,0.553000,0.001000,0.038000
Critical Kush,0.0,0.0,0.0,0.0,0.0,0.0,0.001478,0.000,0.167000,0.000,...,1.886957,0.007391,0.010870,0.104783,0.102174,0.0,0.00087,0.768696,0.106957,0.055652
Lemon Zest,2.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000,1.030000,0.000,...,0.000000,0.000000,0.000000,0.120000,0.060000,0.0,0.00000,0.700000,0.180000,0.000000


In [21]:
# Scale the X data by using StandardScaler()
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [50]:
X_train_scaled.shape, y_train_effects.shape

((833, 40), (833, 18))

In [53]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_effects_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
    # nn_model.add(tf.keras.layers.InputLayer(units=hp.Int('first_units',
        min_value=1,
        max_value=128,
        step=2), activation=activation, input_dim=len(X.columns)))
    
    # model.add(tf.keras.layers.InputLayer(input_shape=(40,), name='input_layer'))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 16)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=128,
            step=2),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=y_train_effects.shape[1], activation="softmax"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    return nn_model

In [54]:
tuner_effects = kt.Hyperband(
    create_effects_model,
    objective="val_accuracy",
    max_epochs=100,
    hyperband_iterations=2,
    overwrite=True)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [55]:
# Run the kerastuner search for best hyperparameters
tuner_effects.search(X_train_scaled,y_train_effects,epochs=20,validation_data=(X_test_scaled,y_test_effects))

Trial 497 Complete [00h 00m 12s]
val_accuracy: 0.564748227596283

Best val_accuracy So Far: 0.7374100685119629
Total elapsed time: 00h 41m 40s


In [56]:
# Get best model hyperparameters
best_hyper_effects = tuner_effects.get_best_hyperparameters(1)[0]
best_hyper_effects.values

{'activation': 'relu',
 'first_units': 103,
 'num_layers': 12,
 'units_0': 21,
 'units_1': 103,
 'units_2': 95,
 'units_3': 3,
 'units_4': 41,
 'units_5': 115,
 'units_6': 123,
 'units_7': 37,
 'units_8': 61,
 'units_9': 31,
 'units_10': 47,
 'units_11': 13,
 'units_12': 85,
 'units_13': 67,
 'units_14': 81,
 'units_15': 105,
 'tuner/epochs': 34,
 'tuner/initial_epoch': 12,
 'tuner/bracket': 4,
 'tuner/round': 3,
 'tuner/trial_id': '0130'}

In [81]:
# Evaluate best model against full test data
best_model = tuner_effects.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test_effects,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


9/9 - 0s - 14ms/step - accuracy: 0.7374 - loss: 0.5267
Loss: 0.5267084240913391, Accuracy: 0.7374100685119629


In [17]:
file_path_effects_model_2 = Path('effects-model-2.keras')
# best_model.save(file_path_effects_model_2)

In [18]:
# Set the model's file path
# file_path = Path("sports-articles.keras")

# Load the model to a new object
best_model = tf.keras.models.load_model(file_path_effects_model_2)

  saveable.load_own_variables(weights_store.get(inner_path))


In [22]:
predictions = best_model.predict(X_test_scaled, verbose=2)
predictions

9/9 - 0s - 11ms/step


array([[0.1567979 , 0.13849534, 0.14424445, ..., 0.00393051, 0.00518295,
        0.00102695],
       [0.15670869, 0.13925324, 0.14371768, ..., 0.00409498, 0.00533864,
        0.00108637],
       [0.19853659, 0.16109328, 0.17787156, ..., 0.00215632, 0.00268098,
        0.00037592],
       ...,
       [0.15616743, 0.14307578, 0.14184497, ..., 0.00547467, 0.00650117,
        0.00161634],
       [0.13672051, 0.13179044, 0.12790436, ..., 0.00851571, 0.00974447,
        0.00322194],
       [0.12422462, 0.12509052, 0.11526982, ..., 0.01056575, 0.01214096,
        0.0046318 ]], dtype=float32)

In [27]:
df[effects].mean()

relaxed      0.469802
happy        0.473798
euphoric     0.382934
uplifted     0.352898
sleepy       0.123888
dry_mouth    0.213033
dry_eyes     0.119550
dizzy        0.044860
paranoid     0.032439
anxious      0.030135
hungry       0.058740
talkative    0.026715
creative     0.111926
energetic    0.104860
focused      0.065419
giggly       0.020180
tingly       0.020783
aroused      0.005590
dtype: float64

In [29]:
predictions_df = pd.DataFrame(data=predictions)
# predictions_df = predictions_df.apply(lambda x: round(x,0))
predictions_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
count,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0
mean,0.167572,0.1415743,0.151709,0.1005077,0.04138857,0.07455305,0.07327052,0.062813,0.048905,0.032951,0.019601,0.007109,0.027079,0.022215,0.016297,0.004892,0.005921,0.001642
std,0.049018,0.01644825,0.034337,0.01057326,0.0054409,0.009647839,0.008672826,0.008873,0.007098,0.008902,0.00635,0.004374,0.006936,0.009537,0.00694,0.003214,0.003568,0.001744
min,0.096429,1.864804e-13,0.091661,1.766379e-28,6.86333e-37,7.622825e-36,2.842209e-38,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.145035,0.1357428,0.133708,0.09926104,0.03941422,0.07085359,0.07200283,0.059997,0.04618,0.027522,0.015427,0.003912,0.022655,0.015617,0.011242,0.002532,0.003356,0.000502
50%,0.161323,0.1430894,0.147528,0.1021445,0.04098105,0.07363446,0.07484199,0.064321,0.050209,0.032617,0.019497,0.006146,0.027464,0.020684,0.015418,0.004171,0.005258,0.001077
75%,0.183176,0.1502637,0.165854,0.1048904,0.04534524,0.08120777,0.07693534,0.068069,0.05309,0.03889,0.023719,0.009347,0.031676,0.027827,0.020547,0.006459,0.007805,0.002142
max,0.667201,0.1736275,0.451821,0.1063651,0.04740679,0.08535503,0.08086193,0.073285,0.057892,0.052196,0.037684,0.024529,0.044477,0.049971,0.0386,0.018135,0.020368,0.01136


In [None]:
for i in range(0,len(predictions_df.columns)):
    

In [24]:
predictions_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
count,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0
mean,0.007194,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
std,0.084666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
from sklearn.metrics import classification_report
print(classification_report(y_test_effects, predictions_df))

              precision    recall  f1-score   support

           0       0.50      0.01      0.01       185
           1       0.00      0.00      0.00       183
           2       0.00      0.00      0.00       181
           3       0.00      0.00      0.00       157
           4       0.00      0.00      0.00        89
           5       0.00      0.00      0.00       149
           6       0.00      0.00      0.00       139
           7       0.00      0.00      0.00       121
           8       0.00      0.00      0.00       106
           9       0.00      0.00      0.00        82
          10       0.00      0.00      0.00        48
          11       0.00      0.00      0.00        14
          12       0.00      0.00      0.00        86
          13       0.00      0.00      0.00        55
          14       0.00      0.00      0.00        42
          15       0.00      0.00      0.00        15
          16       0.00      0.00      0.00        17
          17       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [32]:
X = df[predictors]
y_effects = df[effects]
y_illnesses = df[illnesses]

X_train, X_test, y_train_effects, y_test_effects, y_train_illnesses, y_test_illnesses = train_test_split(X, y_effects, y_illnesses)

In [86]:
# Scale the X data by using StandardScaler()
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [59]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_continuous_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    # nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
    nn_model.add(tf.keras.layers.InputLayer(units=hp.Int('first_units',
        min_value=1,
        max_value=128,
        # step=2), activation=activation, input_dim=len(X.columns))) #shape=(len(X.columns),)
        step=2), activation=activation, shape=(len(X.columns),))) #
    
    # model.add(tf.keras.layers.InputLayer(input_shape=(40,), name='input_layer'))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 16)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=128,
            step=2),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=y_train_effects.shape[1], activation="softmax"))

    # Compile the model
    nn_model.compile(loss="mse", optimizer='adam', metrics=["mae"])

    return nn_model

In [60]:
tuner_effects_continuous = kt.Hyperband(
    create_continuous_model,
    objective="val_mae",
    max_epochs=50,
    hyperband_iterations=2,
    overwrite=True)

In [61]:
# Run the kerastuner search for best hyperparameters
tuner_effects_continuous.search(X_train,y_train_effects,epochs=20,validation_data=(X_test,y_test_effects))

Trial 172 Complete [00h 00m 12s]
val_mae: 0.10913544148206711

Best val_mae So Far: 0.10875701904296875
Total elapsed time: 00h 27m 46s


In [37]:
# Get best model hyperparameters
# best_hyper_effects_continuous = tuner_effects_continuous.get_best_hyperparameters(1)[0]
# best_hyper_effects_continuous.values

{'activation': 'tanh',
 'first_units': 105,
 'num_layers': 5,
 'units_0': 31,
 'units_1': 41,
 'units_2': 113,
 'units_3': 101,
 'units_4': 87,
 'units_5': 77,
 'units_6': 119,
 'units_7': 5,
 'units_8': 35,
 'units_9': 7,
 'units_10': 1,
 'units_11': 99,
 'units_12': 89,
 'units_13': 13,
 'units_14': 89,
 'units_15': 9,
 'tuner/epochs': 34,
 'tuner/initial_epoch': 12,
 'tuner/bracket': 3,
 'tuner/round': 2,
 'tuner/trial_id': '0435'}

In [39]:
# # Evaluate best model against full test data
# best_model_effects_continuous = tuner_effects_continuous.get_best_models(1)[0]
# model_loss, model_accuracy = best_model_effects_continuous.evaluate(X_test,y_test_effects,verbose=2)
# print(f"Mean Squared Error: {model_loss}, Mean Absolute Error: {model_accuracy}")

9/9 - 0s - 10ms/step - loss: 0.0361 - mae: 0.1088
Mean Squared Error: 0.036108773201704025, Mean Absolute Error: 0.10882902890443802


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


In [46]:
# file_path_effects_continuous_model = Path('effects-continuous-tuned-model.keras')
# best_model_effects_continuous.save(file_path_effects_continuous_model)

In [62]:
best_hyper_effects_continuous = tuner_effects_continuous.get_best_hyperparameters(1)[0]
best_hyper_effects_continuous.values

{'activation': 'relu',
 'first_units': 69,
 'num_layers': 7,
 'units_0': 109,
 'units_1': 91,
 'units_2': 43,
 'units_3': 51,
 'units_4': 17,
 'units_5': 117,
 'units_6': 43,
 'units_7': 11,
 'units_8': 83,
 'units_9': 85,
 'units_10': 49,
 'units_11': 37,
 'units_12': 27,
 'units_13': 97,
 'units_14': 53,
 'units_15': 65,
 'tuner/epochs': 50,
 'tuner/initial_epoch': 17,
 'tuner/bracket': 1,
 'tuner/round': 1,
 'tuner/trial_id': '0070'}

In [63]:
# Evaluate best model against full test data
best_model_effects_continuous = tuner_effects_continuous.get_best_models(1)[0]
model_loss, model_accuracy = best_model_effects_continuous.evaluate(X_test,y_test_effects,verbose=2)
print(f"Mean Squared Error: {model_loss}, Mean Absolute Error: {model_accuracy}")

9/9 - 0s - 11ms/step - loss: 0.0356 - mae: 0.1088
Mean Squared Error: 0.03561922162771225, Mean Absolute Error: 0.10875701904296875


  saveable.load_own_variables(weights_store.get(inner_path))


In [64]:
file_path_effects_continuous_model = Path('effects-continuous-tuned-model-2.keras')
best_model_effects_continuous.save(file_path_effects_continuous_model)

In [40]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_illnesses_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    # nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
    nn_model.add(tf.keras.layers.InputLayer(units=hp.Int('first_units',
        min_value=1,
        max_value=128,
        step=2), activation=activation, shape=(len(X.columns),))) #input_dim=len(X.columns)

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 16)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=128,
            step=2),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=y_train_illnesses.shape[1], activation="softmax"))

    # Compile the model
    nn_model.compile(loss="mse", optimizer='adam', metrics=["mae"])

    return nn_model

In [41]:
tuner_illnesses_continuous = kt.Hyperband(
    create_illnesses_model,
    objective="val_mae",
    max_epochs=20,
    hyperband_iterations=2,
    overwrite=True)

In [42]:
# Run the kerastuner search for best hyperparameters
tuner_illnesses_continuous.search(X_train,y_train_illnesses,epochs=20,validation_data=(X_test,y_test_illnesses))

Trial 60 Complete [00h 00m 09s]
val_mae: 0.02463902346789837

Best val_mae So Far: 0.023768460378050804
Total elapsed time: 00h 08m 21s


In [43]:
# Get best model hyperparameters
best_hyper_illnesses = tuner_illnesses_continuous.get_best_hyperparameters(1)[0]
best_hyper_illnesses.values

{'activation': 'relu',
 'first_units': 59,
 'num_layers': 14,
 'units_0': 105,
 'units_1': 7,
 'units_2': 29,
 'units_3': 47,
 'units_4': 77,
 'units_5': 95,
 'units_6': 21,
 'units_7': 59,
 'units_8': 107,
 'units_9': 85,
 'units_10': 9,
 'units_11': 41,
 'units_12': 17,
 'units_13': 59,
 'units_14': 3,
 'units_15': 55,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 7,
 'tuner/bracket': 1,
 'tuner/round': 1,
 'tuner/trial_id': '0048'}

In [44]:
# Evaluate best model against full test data
best_model_ill_continuous = tuner_illnesses_continuous.get_best_models(1)[0]
model_loss, model_accuracy = best_model_ill_continuous.evaluate(X_test,y_test_illnesses,verbose=2)
print(f"Mean Squared Error: {model_loss}, Mean Absolute Error: {model_accuracy}")

9/9 - 0s - 13ms/step - loss: 0.0049 - mae: 0.0238
Mean Squared Error: 0.0048784371465444565, Mean Absolute Error: 0.023768460378050804


  saveable.load_own_variables(weights_store.get(inner_path))


In [45]:
file_path_illnesses_continuous_model = Path('illnesses-continuous-tuned-model.keras')
best_model_ill_continuous.save(file_path_illnesses_continuous_model)

In [49]:
predictions_illnesses = best_model_ill_continuous.predict(X_test, verbose=2)
predictions_df_illnesses = pd.DataFrame(predictions_illnesses)
predictions_df_illnesses

9/9 - 0s - 11ms/step


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,0.320715,0.210051,0.203776,0.205907,0.054222,0.000074,7.329862e-06,0.003928,0.000748,0.000177,...,8.063117e-06,4.530450e-06,1.262515e-05,4.622163e-06,5.940671e-06,8.279968e-06,4.952018e-06,2.532989e-05,1.335480e-05,4.492768e-06
1,0.327155,0.211266,0.202697,0.206786,0.048586,0.000036,2.861121e-06,0.002747,0.000448,0.000093,...,3.192551e-06,1.689887e-06,5.185186e-06,1.736141e-06,2.282967e-06,3.271563e-06,1.863703e-06,1.126320e-05,5.563556e-06,1.682631e-06
2,0.351175,0.213848,0.195910,0.208002,0.030384,0.000002,5.603719e-08,0.000612,0.000052,0.000006,...,6.637632e-08,2.739230e-08,1.255560e-07,2.897484e-08,4.192248e-08,6.743832e-08,3.135689e-08,3.798704e-07,1.429922e-07,2.774164e-08
3,0.322936,0.210500,0.203447,0.206242,0.052255,0.000058,5.332855e-06,0.003481,0.000629,0.000143,...,5.894865e-06,3.245908e-06,9.345012e-06,3.319475e-06,4.299634e-06,6.049195e-06,3.558703e-06,1.926009e-05,9.933066e-06,3.223352e-06
4,0.283752,0.197327,0.200712,0.194689,0.083308,0.001459,3.866479e-04,0.017307,0.006388,0.002662,...,4.000222e-04,2.897157e-04,5.362037e-04,2.870047e-04,3.343303e-04,4.142896e-04,3.050055e-04,7.667571e-04,5.336542e-04,2.824699e-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273,0.319388,0.209777,0.203949,0.205698,0.055391,0.000085,8.816705e-06,0.004213,0.000827,0.000201,...,9.673104e-06,5.498633e-06,1.503665e-05,5.602727e-06,7.170146e-06,9.938143e-06,5.999915e-06,2.970478e-05,1.586325e-05,5.448858e-06
274,0.315674,0.208906,0.204340,0.205015,0.058735,0.000124,1.466840e-05,0.005108,0.001091,0.000286,...,1.596376e-05,9.374001e-06,2.433138e-05,9.514776e-06,1.202083e-05,1.641670e-05,1.017973e-05,4.602654e-05,2.546534e-05,9.268573e-06
275,0.326661,0.211211,0.202787,0.206749,0.048974,0.000038,3.065426e-06,0.002821,0.000465,0.000098,...,3.418917e-06,1.817170e-06,5.536716e-06,1.866318e-06,2.451968e-06,3.505165e-06,2.002735e-06,1.196296e-05,5.937079e-06,1.809012e-06
276,0.304156,0.205631,0.204533,0.202261,0.068840,0.000361,6.034516e-05,0.008705,0.002352,0.000752,...,6.427600e-05,4.130130e-05,9.269736e-05,4.148637e-05,5.063470e-05,6.630939e-05,4.425375e-05,1.555207e-04,9.495619e-05,4.059063e-05


In [58]:
predictions_df_illnesses.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
count,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,...,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0
mean,0.317182,0.207578,0.201631,0.203498,0.057717,0.0003593399,8.781978e-05,0.006532758,0.001893803,0.0006877939,...,9.100306e-05,6.566853e-05,0.0001226929,6.506067e-05,7.586125e-05,9.422986e-05,6.914166e-05,0.0001795349,0.0001224929,6.402955e-05
std,0.023935,0.005583,0.005849,0.004812,0.016803,0.0006461851,0.0002221147,0.005567633,0.002357542,0.001106027,...,0.0002257208,0.0001763529,0.0002893556,0.0001731497,0.0001958297,0.0002343636,0.0001835503,0.0003773713,0.0002831192,0.0001710594
min,0.24981,0.178397,0.149665,0.176515,0.00274,7.711467e-13,2.115146e-16,3.501793e-07,1.244123e-09,9.932443e-12,...,3.371263e-16,4.103762e-17,1.344408e-15,5.005338e-17,1.164775e-16,3.291799e-16,5.62761e-17,2.073628e-14,2.055506e-15,4.5120040000000005e-17
25%,0.305787,0.206026,0.201349,0.20251,0.049704,4.190147e-05,3.475856e-06,0.002958509,0.0004981301,0.0001064036,...,3.867687e-06,2.072654e-06,6.233889e-06,2.126306e-06,2.783532e-06,3.965491e-06,2.281466e-06,1.332237e-05,6.669733e-06,2.061972e-06
50%,0.316301,0.208993,0.203503,0.204988,0.058162,0.0001165705,1.347033e-05,0.004946493,0.00104203,0.000269353,...,1.468171e-05,8.574058e-06,2.244995e-05,8.708661e-06,1.102827e-05,1.509698e-05,9.318277e-06,4.278204e-05,2.352975e-05,8.480819e-06
75%,0.325852,0.211017,0.204409,0.206529,0.067445,0.000313765,5.004955e-05,0.008114344,0.00212522,0.0006613929,...,5.345219e-05,3.39352e-05,7.766304e-05,3.413851e-05,4.184317e-05,5.511328e-05,3.643313e-05,0.0001323653,7.977712e-05,3.338248e-05
max,0.449005,0.214216,0.204697,0.208072,0.093578,0.005067549,0.002068241,0.03095243,0.01529788,0.008174365,...,0.002082879,0.001687142,0.002606811,0.001649986,0.001840647,0.002165397,0.001747148,0.003210491,0.002525866,0.001632832


In [57]:
y_test_illnesses.describe()

Unnamed: 0,stress,pain,depression,anxiety,insomnia,headache,ptsd,fatigue,lack_of_appetite,nausea,...,glaucoma,pms,seizures,spasticity,fibromyalgia,crohn's_disease,epilepsy,multiple_sclerosis,parkinson's,hiv/aids
count,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,...,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0,278.0
mean,0.283345,0.210576,0.206511,0.204317,0.098597,0.019568,0.010863,0.037194,0.033921,0.006259,...,0.0,0.00036,0.003597,0.000899,0.0,0.0,0.0,0.0,0.0,0.0
std,0.159242,0.154072,0.13516,0.138866,0.14179,0.033558,0.049239,0.07541,0.102197,0.032563,...,0.0,0.005998,0.059976,0.014994,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.22,0.11,0.14,0.12,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.32,0.22,0.22,0.24,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.38,0.3,0.2875,0.29,0.19,0.03,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.8,1.0,1.0,0.8,1.0,0.2,0.4,0.5,1.0,0.26,...,0.0,0.1,1.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
