### IMPORTS

In [None]:
# Imports that will be using in this notebook

from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Input
from tensorflow.keras import layers
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
import pickle

### READING IN DATA

In [None]:
# Data Source
# https://www.kaggle.com/kingburrito666/cannabis-strains

In [None]:
# Reading in cannabis data

weed = pd.read_csv('cannabis.csv')

In [None]:
# Checking the whole dataframe and making sure I read it in correctly

weed

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."
...,...,...,...,...,...,...
2346,Zeus-Og,hybrid,4.7,"Happy,Uplifted,Relaxed,Euphoric,Energetic","Earthy,Woody,Pine",Zeus OG is a hybrid cross between Pineapple OG...
2347,Zkittlez,indica,4.6,"Relaxed,Happy,Euphoric,Uplifted,Sleepy","Sweet,Berry,Grape",Zkittlez is an indica-dominant mix of Grape Ap...
2348,Zombie-Kush,indica,5.0,"Relaxed,Sleepy,Talkative,Euphoric,Happy","Earthy,Sweet,Spicy/Herbal",Zombie Kush by Ripper Seeds comes from two dif...
2349,Zombie-Og,indica,4.4,"Relaxed,Sleepy,Euphoric,Happy,Hungry","Sweet,Earthy,Pungent",If you’re looking to transform into a flesh-ea...


### DATA PREPROCESSING

In [None]:
# I know that there are some NaN values in the Flavor column so checking to see how many

weed[weed['Flavor'].isnull()]

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description
103,Amethyst,hybrid,3.9,"Energetic,Creative,Giggly,Hungry,Happy",,Amethyst is a hybrid cannabis strain from the ...
123,Arabian-Gold,sativa,4.1,"Euphoric,Tingly,Giggly,Sleepy,Creative",,Arabian Gold is a heavy sativa strain of myste...
160,Bad-Azz-Kush,hybrid,5.0,,,Bad Azz Kush by Barney’s Farm was created with...
215,Birds-Eye,sativa,5.0,"Happy,Uplifted,Energetic,Euphoric,Relaxed",,Birds Eye is a variety of Jack Herer grown by ...
364,Boombaye,hybrid,0.0,,,Boombaye by Mr. Mack’s Snack is a CBD-rich str...
379,Broke-Diesel,hybrid,5.0,"Uplifted,Euphoric,Creative,Energetic,Happy",,Broke Diesel is a 50/50 hybrid marijuana strai...
484,Chem-Jong-Ill,hybrid,5.0,"Uplifted,Creative,Happy,Talkative,Energetic",,This sativa-dominant cross is for cannabis pro...
574,Confidential-Wreck,hybrid,4.2,"Hungry,Relaxed,Tingly,Uplifted,Creative",,"Confidential Wreck, also known as Lohan, is th..."
673,Do-Over-Og,hybrid,4.7,"Talkative,Uplifted,Creative,Euphoric,Happy",,Do-Over OG is an indica-dominant strain that c...
679,Domino,indica,4.0,"Sleepy,Happy,Tingly,Uplifted,Euphoric",,


In [None]:
# Replacing the NaN values for none in the Flavor column

weed['Flavor'] = weed['Flavor'].replace(np.nan, 'none')

In [None]:
# Making sure that there are no NaN values left

weed[weed['Flavor'].isnull()]

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description


In [None]:
# Combining Effects and Flavor into once column to use for one hot encoding

weed['Effect_Flavor'] = weed['Effects'] + ',' +  weed['Flavor'] # If you don't add the comma it just smooshes the words together

In [None]:
# Checking to making sure it added correctly

weed

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Effect_Flavor
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...,"Creative,Energetic,Tingly,Euphoric,Relaxed,Ear..."
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...,"Relaxed,Aroused,Creative,Happy,Energetic,Flowe..."
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...,"Uplifted,Happy,Relaxed,Energetic,Creative,Spic..."
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...,"Tingly,Creative,Hungry,Relaxed,Uplifted,Aprico..."
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%...","Happy,Relaxed,Euphoric,Uplifted,Talkative,Citr..."
...,...,...,...,...,...,...,...
2346,Zeus-Og,hybrid,4.7,"Happy,Uplifted,Relaxed,Euphoric,Energetic","Earthy,Woody,Pine",Zeus OG is a hybrid cross between Pineapple OG...,"Happy,Uplifted,Relaxed,Euphoric,Energetic,Eart..."
2347,Zkittlez,indica,4.6,"Relaxed,Happy,Euphoric,Uplifted,Sleepy","Sweet,Berry,Grape",Zkittlez is an indica-dominant mix of Grape Ap...,"Relaxed,Happy,Euphoric,Uplifted,Sleepy,Sweet,B..."
2348,Zombie-Kush,indica,5.0,"Relaxed,Sleepy,Talkative,Euphoric,Happy","Earthy,Sweet,Spicy/Herbal",Zombie Kush by Ripper Seeds comes from two dif...,"Relaxed,Sleepy,Talkative,Euphoric,Happy,Earthy..."
2349,Zombie-Og,indica,4.4,"Relaxed,Sleepy,Euphoric,Happy,Hungry","Sweet,Earthy,Pungent",If you’re looking to transform into a flesh-ea...,"Relaxed,Sleepy,Euphoric,Happy,Hungry,Sweet,Ear..."


In [None]:
# Checking one row to better see all the items and making sure the comma seperated the words correctly

weed['Effect_Flavor'][0]

'Creative,Energetic,Tingly,Euphoric,Relaxed,Earthy,Sweet,Citrus'

In [None]:
# Looking at my new column

weed['Effect_Flavor'].describe()

count          2351
unique         2257
top       None,None
freq             73
Name: Effect_Flavor, dtype: object

### MODEL PREPERATION

In [None]:
# Split into X and y

X = weed['Effect_Flavor']
y = weed['Strain']

In [None]:
# Checking X shape

X.shape

(2351,)

In [None]:
# Checking y shape

y.shape

(2351,)

In [None]:
# Since the neural network cannot take in straight text I am going to vectorize my X data

# create the transformer
vect = CountVectorizer()

# build vocab
vect.fit(X)

# transform text
dtm = vect.transform(X)

In [None]:
# Checking to see that the feature names are correct

print(vect.get_feature_names())

['ammonia', 'apple', 'apricot', 'aroused', 'berry', 'blue', 'blueberry', 'butter', 'cheese', 'chemical', 'chestnut', 'citrus', 'coffee', 'creative', 'diesel', 'dry', 'earthy', 'energetic', 'euphoric', 'flowery', 'focused', 'fruit', 'giggly', 'grape', 'grapefruit', 'happy', 'herbal', 'honey', 'hungry', 'lavender', 'lemon', 'lime', 'mango', 'menthol', 'mint', 'minty', 'mouth', 'none', 'nutty', 'orange', 'peach', 'pear', 'pepper', 'pine', 'pineapple', 'plum', 'pungent', 'relaxed', 'rose', 'sage', 'skunk', 'sleepy', 'spicy', 'strawberry', 'sweet', 'talkative', 'tar', 'tea', 'tingly', 'tobacco', 'tree', 'tropical', 'uplifted', 'vanilla', 'violet', 'woody']


In [None]:
# Now I am going to use todense() to turn X into a matrix

X = dtm.todense()

In [None]:
# Make sure it turned into a matrix correctly

X

matrix([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 0, 0, 1],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]])

In [None]:
# I am using label encoder for y
# Label encoder turns y into a numeric representation of the data

le = preprocessing.LabelEncoder()
y = le.fit_transform(y)

In [None]:
# Check to make sure that y is an array now

y

array([   0,   18,    1, ..., 2347, 2348, 2349])

In [None]:
# Checking to make sure the X shape looks good

X.shape

(2351, 66)

In [None]:
# Checking to make sure the y shape looks good

y.shape

(2351,)

In [None]:
# Checking to make sure my X and y look good

print(X[1])
print(y[0:5])

[[0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]]
[ 0 18  1  2  3]


### CREATE NEURAL NETWORK

In [None]:
# Creating my own optimizer
# AdamamsGrad is using a super low learning_rate which means the changes between epochs will be smaller

AdamamsGrad = tf.keras.optimizers.Adam(
    learning_rate=0.0001,
    amsgrad=False,
)

In [None]:
# Instantiating my model type and creating the model architecture

model = Sequential()

model.add(Input(66))
model.add(Dense(64, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(2350, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=AdamamsGrad, 
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                4288      
_________________________________________________________________
dense_1 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_2 (Dense)              (None, 256)               33024     
_________________________________________________________________
dense_3 (Dense)              (None, 2350)              603950    
Total params: 649,582
Trainable params: 649,582
Non-trainable params: 0
_________________________________________________________________


### TEST PREDICTION MODEL

In [None]:
# Fitting my model and getting my accuracy

test = model.fit(X,y, epochs=100)

In [None]:
weed_layer = model.get_layer('book_embedding')
book_weights = book_layer.get_weights()[0]

### PICKLE THE MODEL