In [4]:
import pickle

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

leafly = pd.read_json('../data/leafly.json')
leafly = leafly.dropna()
leafly.shape

(311, 37)

## Data Wrangling

In [11]:
leafly['feelings'] = leafly['feeling_1'] + ' ' + leafly['feeling_2'] + ' ' + leafly['feeling_3'] + ' ' + leafly['feeling_4'] + ' ' + leafly['feeling_5']

leafly['helps'] = leafly['helps_1'] + ' ' + leafly['helps_2'] + ' ' + leafly['helps_3'] + ' ' + leafly['helps_4'] + ' ' + leafly['helps_5']

leafly['feelings_helps_description'] = leafly["feelings"] + ' ' + leafly["helps"] + ' ' + leafly['description']

leafly['feelings_helps_description']

0       Happy Euphoric Creative Relaxed Uplifted Stres...
42      Happy Relaxed Euphoric Hungry Uplifted Depress...
45      Relaxed Happy Euphoric Hungry Focused Stress A...
47      Happy Relaxed Sleepy Talkative Uplifted Pain S...
52      Relaxed Happy Sleepy Euphoric Hungry Stress Pa...
                              ...                        
3341    Relaxed Happy Euphoric Sleepy Uplifted Stress ...
3364    Relaxed Euphoric Uplifted Happy Creative Pain ...
3370    Relaxed Euphoric Happy Uplifted Sleepy Stress ...
3415    Happy Relaxed Euphoric Uplifted Hungry Stress ...
3434    Happy Energetic Relaxed Focused Uplifted Stres...
Name: feelings_helps_description, Length: 311, dtype: object

In [12]:
leafly['feelings_helps_description'][45]

'Relaxed Happy Euphoric Hungry Focused Stress Anxiety Nausea Pain Depression Blurple, also known as Blue Dream Purple, is a balanced hybrid cross between Blue Dream and Mendocino Purps. Its effects position themselves cerebrally at first, with gentle body relaxation that keeps you feeling light and free of tension. Drawing from both sides of the family, Blurple carries a sweet, dessert-like berry and grape aroma that comes to life on the inhale. This strain earns its name as bluish purple hues swirl throughout Blurple’s green buds.'

## Build preprocessor and model

In [13]:
tfidf = TfidfVectorizer(stop_words = 'english',ngram_range = (1,2),max_features = 2000)
dtm = tfidf.fit_transform(leafly['feelings_helps_description'])
dtm = pd.DataFrame(dtm.todense(), columns=tfidf.get_feature_names())
nn = NearestNeighbors(n_neighbors=4, algorithm='kd_tree')
nn.fit(dtm)

NearestNeighbors(algorithm='kd_tree', n_neighbors=4)

## Pickle preprocessor and model

In [17]:
import sklearn
sklearn.__version__

'0.23.2'

In [19]:
with open ('../pickles/nn_model_w.pkl', 'wb') as nn_pkl:
    pickle.dump(nn, nn_pkl)

with open ('../pickles/tfidf.pkl', 'wb') as tfidf_pkl:
    pickle.dump(tfidf, tfidf_pkl)