## Here are all of the functions used to create and mass upload data

In [19]:
import random
import sklearn
import csv
import numpy as np
from collections import defaultdict
import pandas as pd
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import datetime
import re 
import unidecode
from textblob import TextBlob

In [2]:
# Use a service account
cred = credentials.Certificate('recipe-recommendation-94b2e-e2e0a9ee33eb.json')
firebase_admin.initialize_app(cred)

db = firestore.client()

In [3]:
strain_df = pd.read_csv('DetailedStrainData.csv')

In [4]:
strain_grouped = strain_df.groupby(['leafly_strain', 'strain_category'], as_index=False).mean()

In [5]:
strain_grouped.loc[strain_grouped['strain_category'] == 'Sativa', 'sativa'] = True
strain_grouped.loc[strain_grouped['strain_category'] == 'Sativa', 'hybrid'] = False
strain_grouped.loc[strain_grouped['strain_category'] == 'Sativa', 'indica'] = False

strain_grouped.loc[strain_grouped['strain_category'] == 'Hybrid', 'sativa'] = False
strain_grouped.loc[strain_grouped['strain_category'] == 'Hybrid', 'hybrid'] = True
strain_grouped.loc[strain_grouped['strain_category'] == 'Hybrid', 'indica'] = False

strain_grouped.loc[strain_grouped['strain_category'] == 'Indica', 'sativa'] = False
strain_grouped.loc[strain_grouped['strain_category'] == 'Indica', 'hybrid'] = False
strain_grouped.loc[strain_grouped['strain_category'] == 'Indica', 'indica'] = True

In [11]:
def submitStrainToDB(row):
     
    data = {
        u'friendly_name': u'{}'.format(row['leafly_strain']),
        u'cbd_percent': (row['cbd_max']/100.0),
        u'hybrid': row['hybrid'],
        u'sativa': row['sativa'],
        u'indica': row['indica'],
        u'thc_percent': (row['thc_max']/100.0),
        u'avg_rating': random.uniform(0, 1),
        u'avg_polarity': random.uniform(0, 1),
        u'avg_sentiment': random.uniform(0, 1)
    }
    
    db.collection(u'strains').document(u'{}'.format(row['leafly_strain'])).set(data)

    return True


In [None]:
strain_grouped.apply(submitStrainToDB, axis=1)

In [6]:
for i in range(30):

    recipe_ref = db.collection(u'recipes').document(u'test_recipe_{}'.format(i))
    
    usesOil = bool(random.getrandbits(1))
    
    recipe_ref.set({
        u'friendly_name': u'test_recipe_{}'.format(i),
        u'cooking_time_min': random.randint(1,240),
        u'is_desert': bool(random.getrandbits(1)),
        u'oven_percentage_power': random.uniform(0, 1),
        u'percent_serving_weight_weed_ingredient':  random.uniform(0, 0.2),
        u'uses_butter': not usesOil,
        u'uses_oil': usesOil
    })

### Here we are going to create the review data and add it to the db.

In [5]:
# Here we need to pull in the data that goes with the reviews
review_df = pd.read_csv('sample_review_data.csv')

In [15]:
def remove_html_tags(sentence):
    return re.sub(re.compile('<.*?>'), '', str(sentence))

def remove_URLS(sentence):
    return re.sub(re.compile('http\S+|www\.\S+'), '', str(sentence))

def remove_breaks(sentence):
    return re.sub(re.compile('\n|\r|\t'), '', str(sentence))

def remove_unicode(sentence):
    return unidecode.unidecode(str(sentence))

def remove_numbers(sentence):
    return re.sub(re.compile('\d+'), '', str(sentence))

def lower_case(sentence):
    return str(sentence).lower()

def remove_punctuation(sentence):
    return re.sub(re.compile('[^\w\s]'), '', str(sentence))

# Put it all together (Order is important)
def apply_basic_cleaning(sentence):
    iteration_one = remove_html_tags(sentence)
    iteration_two = remove_URLS(iteration_one)
    iteration_three = remove_breaks(iteration_two)
    iteration_four = remove_numbers(iteration_three)
    iteration_five = lower_case(iteration_four)
    
    return remove_punctuation(iteration_five)

In [20]:
def getCurrentReviewSentiment(comment):
    
    clean_comment = apply_basic_cleaning(comment)
    
    sentiment = TextBlob(clean_comment).sentiment
    
    polarity = sentiment.polarity
    subjectivity = sentiment.subjectivity
    
    return pd.Series((polarity, subjectivity))

In [21]:
review_df[['commnet_polarity', 'comment_subjectivity']] = review_df.apply(lambda row: getCurrentReviewSentiment(row['comment']), axis=1)

In [48]:
def addRecipeAndStrainDataToReview(row):
    
    recipeID = u'test_recipe_{}'.format(random.randint(1,29))
    strainID = u'{}'.format(row['strain_id'])

    recipe_ref = db.collection(u'recipes').document(u'{}'.format(recipeID))
    recipe = recipe_ref.get()
    recipeData = recipe.to_dict()
    
    strain_ref = db.collection(u'strains').document(u'{}'.format(strainID))
    strain = strain_ref.get()
    
    strainData = strain.to_dict()
    
    conditions = ['cramps', 'depression', 'eye_pressure', 'fatigue_headaches', 'inflamation_insomnia', 'lack_appetite', 'muscle_spasms', 'nausea', 'pain', 'seizures', 'spasticity', 'stress']
    
    # Add to the database
    data = [strainData['hybrid'], strainData['sativa'],strainData['indica'], strainData['thc_percent'], strainData['cbd_percent'], strainData['avg_sentiment'], strainData['avg_polarity'], strainData['avg_rating'], recipeData['cooking_time_min'], recipeData['is_desert'], recipeData['oven_percentage_power'], recipeData['percent_serving_weight_weed_ingredient'], recipeData['uses_butter'], recipeData['uses_oil'], conditions[random.randint(0,11)], row['comment'], row['commnet_polarity'], row['comment_subjectivity'], row['rating'], bool(random.getrandbits(1)), row['username']]
    
    review_ref = db.collection(u'reviews').document()    
    review_ref.set({
        u'hybrid': data[0],
        u'sativa': data[1],
        u'indica': data[2],
        u'thc_percent': data[3],
        u'cbd_percent': data[4],
        u'avg_subjectivity': data[5],
        u'avg_polarity': data[6],
        u'avg_rating': data[7],
        u'cooking_time_min': data[8],
        u'is_desert': data[9],
        u'oven_percentage_power': data[10],
        u'percent_serving_weight_weed_ingredient': data[11],
        u'uses_butter': data[12],
        u'uses_oil': data[13],
        u'condition': data[14],
        u'comment': u'{}'.format(data[15]),
        u'commnet_polarity': data[16],
        u'comment_subjectivity': data[17],
        u'rating': data[18],
        u'relief': data[19],
        u'username': u'{}'.format(data[20])
    })
    
    return pd.Series((strainData['hybrid'], strainData['sativa'],strainData['indica'], strainData['thc_percent'], strainData['cbd_percent'], strainData['avg_sentiment'], strainData['avg_polarity'], strainData['avg_rating'], recipeData['cooking_time_min'], recipeData['is_desert'], recipeData['oven_percentage_power'], recipeData['percent_serving_weight_weed_ingredient'], recipeData['uses_butter'], recipeData['uses_oil'], conditions[random.randint(0,11)], row['comment'], row['commnet_polarity'], row['comment_subjectivity'], row['rating'], bool(random.getrandbits(1)), row['username']))

In [49]:
review_df[['hybrid', 'sativa', 'indica', 'thc_percent', 'cbd_percent', 'avg_subjectivity', 'avg_polarity', 'avg_rating', 'cooking_time_min', 'is_desert', 'oven_percentage_power', 'percent_serving_weight_weed_ingredient', 'uses_butter', 'uses_oil', 'condition', 'comment', 'commnet_polarity', 'comment_subjectivity', 'rating', 'relief', 'username']] = review_df.apply(lambda row: addRecipeAndStrainDataToReview(row), axis=1)

In [50]:
review_df.to_csv(r'TestData.csv')