In [28]:
import numpy as np
import pandas as pd
import pickle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split 
from sklearn.metrics.pairwise import pairwise_distances

import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

In [22]:
# Load credential_key
cred = credentials.Certificate('./drone-capstone-386903-d2b13d68a9d6.json')

try:
    firebase_admin.initialize_app(cred)
except Exception as e:
    print(f"An error occurred: {str(e)}")
    
db = firestore.client()

An error occurred: The default Firebase app already exists. This means you called initialize_app() more than once without providing an app name as the second argument. In most cases you only need to call initialize_app() once. But if you do want to initialize multiple apps, pass a second argument to initialize_app() to give each app a unique name.


In [29]:
# Get user data
users_ref = db.collection('reviews')
docs = users_ref.get()

data_list = []

for doc in docs:
    data = doc.to_dict()
    data_list.append(data)

data = pd.DataFrame(data_list)

In [24]:
# Create pivot table function (row(user), column(laundry))
def createPivot(data, fillVal = None):
  pivot_table = data.pivot_table(index='review_author_id', columns='laundry_id', values='review_rating')
  return pivot_table.fillna(fillVal) if fillVal is not None else pivot_table

In [30]:
random_state = 50
test_size = 0.30 

train, test = train_test_split(data, test_size=test_size, random_state=random_state)
test = test[test['review_author_id'].isin(train['review_author_id'])]

train_pivot = createPivot(train, 0)
test_pivot = createPivot(test, 0)

train_binary = train.copy()
train_binary['review_rating'] = train_binary['review_rating'].apply(lambda x: 0 if x >= 1 else 1)
train_binary = createPivot(train_binary, 1)

test_binary = test.copy()
test_binary['review_rating'] = test_binary['review_rating'].apply(lambda x: 1 if x >= 1 else 0)
test_binary = createPivot(test_binary, 0)

In [32]:
mean = np.nanmean(createPivot(train), axis=1)

train_subtracted_by_mean = (createPivot(train).T - mean).T
train_subtracted_by_mean.fillna(0, inplace=True)

correlation = 1 - pairwise_distances(train_subtracted_by_mean, metric='cosine')
correlation[np.isnan(correlation)] = 0

correlation_df = pd.DataFrame(correlation)
correlation_df['review_author_id'] = train_subtracted_by_mean.index
correlation_df.set_index('review_author_id',inplace=True)
correlation_df.columns = train_subtracted_by_mean.index.tolist()

predicted_rating = np.dot(correlation, train_pivot)
final_rating = np.multiply(predicted_rating, train_binary)

In [33]:
# Adjust with Cloud Storage path to store model
with open('./recomender_models/final_rating.pickle', 'wb') as f:
    pickle.dump(final_rating, f)