### Keras MMP
- Multilayer Perceptron architecture
- Train model
- Baseline Accuracy

In [94]:
"""
Import Statements

"""

# Classics
import numpy as np
import pandas as pd

from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [95]:
df = pd.read_csv('/Users/jorge/Med-Cabinet-2/Data/cannabis.csv')
print(df.shape)
df.head(2)

(2351, 6)


Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...


In [96]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2351 entries, 0 to 2350
Data columns (total 6 columns):
Strain         2351 non-null object
Type           2351 non-null object
Rating         2351 non-null float64
Effects        2351 non-null object
Flavor         2305 non-null object
Description    2318 non-null object
dtypes: float64(1), object(5)
memory usage: 110.3+ KB


In [97]:
df.isnull().sum()

Strain          0
Type            0
Rating          0
Effects         0
Flavor         46
Description    33
dtype: int64

In [98]:
df.head()

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."


In [99]:
df['Type']= df['Type'].astype(str)

In [100]:
#I will extract the values in Effects and Flavor and pass to a new column
df_effect = pd.DataFrame(df.Effects.str.split(',',4).tolist(),
             columns = ['Effect_1','Effect_2','Effect_3','Effect_4','Effect_5'])

df_flavors = pd.DataFrame(df.Flavor.str.split(',',n=2,expand=True).values.tolist(),
                          columns = ['Flavor_1','Flavor_2','Flavor_3'])

In [101]:
#Concatenating the new variables with strains
df = pd.concat([df, df_effect], axis=1)
df = pd.concat([df, df_flavors], axis=1)

#Looking the result
print(df.shape)
df.head()

(2351, 14)


Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Effect_1,Effect_2,Effect_3,Effect_4,Effect_5,Flavor_1,Flavor_2,Flavor_3
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...,Creative,Energetic,Tingly,Euphoric,Relaxed,Earthy,Sweet,Citrus
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...,Relaxed,Aroused,Creative,Happy,Energetic,Flowery,Violet,Diesel
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...,Uplifted,Happy,Relaxed,Energetic,Creative,Spicy/Herbal,Sage,Woody
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...,Tingly,Creative,Hungry,Relaxed,Uplifted,Apricot,Citrus,Grapefruit
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%...",Happy,Relaxed,Euphoric,Uplifted,Talkative,Citrus,Earthy,Orange


In [102]:
#Let's create subsets by each type and explore their Flavors and Effects
hibridas = df[df.Type == 'hybrid']
indicas = df[df.Type == 'indica']
sativas = df[df.Type == 'sativa']

In [103]:
#Now we can delete some columns that will not be useful
del df["Effects"]
del df["Flavor"]

In [104]:
# Lets do some transformation in data

print(df.head())

           Strain    Type  Rating  \
0          100-Og  hybrid     4.0   
1  98-White-Widow  hybrid     4.7   
2            1024  sativa     4.4   
3        13-Dawgs  hybrid     4.2   
4        24K-Gold  hybrid     4.6   

                                         Description  Effect_1   Effect_2  \
0  $100 OG is a 50/50 hybrid strain that packs a ...  Creative  Energetic   
1  The ‘98 Aloha White Widow is an especially pot...   Relaxed    Aroused   
2  1024 is a sativa-dominant hybrid bred in Spain...  Uplifted      Happy   
3  13 Dawgs is a hybrid of G13 and Chemdawg genet...    Tingly   Creative   
4  Also known as Kosher Tangie, 24k Gold is a 60%...     Happy    Relaxed   

   Effect_3   Effect_4   Effect_5      Flavor_1 Flavor_2    Flavor_3  
0    Tingly   Euphoric    Relaxed        Earthy    Sweet      Citrus  
1  Creative      Happy  Energetic       Flowery   Violet      Diesel  
2   Relaxed  Energetic   Creative  Spicy/Herbal     Sage       Woody  
3    Hungry    Relaxed   Uplif

In [105]:
#Transforming the Type in numerical 
df["Type"] = pd.factorize(df["Type"])[0]
del df["Description"]
# Now we have 3 numerical Types
# 0 - Hybrid
# 1 - Sativa
# 2 - Indica

In [106]:
# Creating the dummies variable of Effects and Flavors
#effect_dummy = df['Effects'].str.get_dummies(sep=',',)
#flavor_dummy = df['Flavor'].str.get_dummies(sep=',')

dummy = pd.get_dummies(df[['Effect_1','Effect_2','Effect_3','Effect_4','Effect_5','Flavor_1','Flavor_2','Flavor_3']])

In [107]:
#Concatenating the result and droping the used variables 
df = pd.concat([df, dummy], axis=1)

df = df.drop(['Strain','Effect_1','Effect_2','Effect_3','Effect_4','Effect_5','Flavor_1','Flavor_2','Flavor_3'], axis=1)

df.shape

(2351, 232)

In [108]:
# setting X and y
X = df.drop("Type",1)
y = df["Type"]
feature_name = X.columns.tolist()
X = X.astype(np.float64, copy=False)
y = y.astype(np.float64, copy=False)

In [109]:
X_train, X_test, y_train, y_test, = train_test_split(X, y, test_size=0.3)

In [110]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Keras Model

In [125]:
#Create Model
model = Sequential()
#Input => Hidden
model.add(Dense(12, input_dim=231, activation='relu'))
#Hidden
model.add(Dense(12, activation='relu'))
#Output
model.add(Dense(1, activation='linear'))
#Compile
model.compile(loss='binary_crossentropy', optimizer='adam',
              metrics=['accuracy'])

model.summary()

Model: "sequential_63"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_189 (Dense)            (None, 12)                2784      
_________________________________________________________________
dense_190 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_191 (Dense)            (None, 1)                 13        
Total params: 2,953
Trainable params: 2,953
Non-trainable params: 0
_________________________________________________________________


In [126]:
# split into input (X) and output (Y) variables
features = df.columns.tolist()[0:13]
target = df.columns.tolist()[13]

X = df[features]
y = df[target]

scaler = StandardScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns = features)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state =42)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [127]:
%%time
# Function to create model, required for KerasClassifier
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(16, input_dim=13, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['accuracy'])
    return model

# create model
model = KerasClassifier(build_fn=create_model, verbose=0)

# define the grid search parameters
# batch_size = [10, 20, 40, 60, 80, 100]
# param_grid = dict(batch_size=batch_size, epochs=epochs)

# define the grid search parameters
param_grid = {'batch_size': [10, 20, 40, 60, 80, 100],
              'epochs': [20]}

# Create Grid Search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(X, y)

# Report Results
print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f"Means: {mean}, Stdev: {stdev} with: {param}")



Best: 0.9621437618905866 using {'batch_size': 60, 'epochs': 20}
Means: 0.960442358866391, Stdev: 0.003105400085217601 with: {'batch_size': 10, 'epochs': 20}
Means: 0.9617184078069709, Stdev: 0.00373515060567165 with: {'batch_size': 20, 'epochs': 20}
Means: 0.9617184211172383, Stdev: 0.004753586108549957 with: {'batch_size': 40, 'epochs': 20}
Means: 0.9621437618905866, Stdev: 0.0043164601509866505 with: {'batch_size': 60, 'epochs': 20}
Means: 0.9621437618905866, Stdev: 0.0043164601509866505 with: {'batch_size': 80, 'epochs': 20}
Means: 0.9621437618905866, Stdev: 0.0043164601509866505 with: {'batch_size': 100, 'epochs': 20}
CPU times: user 56.2 s, sys: 10 s, total: 1min 6s
Wall time: 40.3 s


In [128]:
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')

Best: 0.9621437618905866 using {'batch_size': 60, 'epochs': 20}
