# Cindy Ho 127008544 User Feedback Prediction Algorithm

In [171]:
import csv #library for importing csv files
import firebase_admin #needed for connecting to database
from firebase_admin import credentials #needed for certifying password to database
from firebase_admin import firestore #needed for admin access to database
import pandas as pd #for pandas dataframe - structure of organizing data
from sklearn.model_selection import train_test_split #used to split dataset into training and testing dataset
import numpy as np 
from sklearn import neighbors #import library for KNN
from sklearn.ensemble import RandomForestClassifier #import RFClassifier model
from sklearn.naive_bayes import GaussianNB #import GaussianNB model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score
#imports tools needed to verify results of models

# Connection to Firebase Database

In [3]:
#the purpose of this is to gain access to the database
if not firebase_admin._apps: #using a if not statement to prevent overwriting over same password key
    #saves to cred variable of admin password key
    cred = credentials.Certificate(r"C:\Users\cindy\OneDrive\Documents\403\perfectpourovercoffee-3c0ce-firebase-adminsdk-h2jgk-1590472422.json")
    
    #calls upon firebase admin library and inserts password key to gain access to private database
    default_app = firebase_admin.initialize_app(cred)
    

db = firestore.client() #saves db as the database


# Collects user dataset from CSV file and writes to database

In [75]:
#read from my csv user feedback file and then write it to the database
rows = [] #created a list
with open(r'C:\Users\cindy\OneDrive\Documents\403\UserFeedback.csv','r') as file: #opens csv input 
    csv_reader = csv.reader(file, delimiter=',') #reads from csv file
    for row in csv_reader: #iterates through csv
        rows.append(row) #appends to list while it iterates through csv
        doc_ref = { #generates brew data
            'user_id': row[0], #takes first value and sets as user id value
            'cup_size': row[1], #takes second value and sets as cup size value
            'roast_type': row[2], #takes third value and sets as roast type value
            'bean_type': row[3], #takes fourth value and sets as coffee bean type value
            'strength': row[4], #takes fifth value and sets as user preferred strength value
            'rating': row[5], #takes sixth value and sets as user rating value
            'temperature': row[6], #takes seventh value and sets as target temperature value
            'grind_size': row[7], #takes eighth value and sets as recommended grind size value
            'target_saturation': row[8] #takes ninth value and sets as target water volume value
        }
        db.collection('trainingandvalidating').add(doc_ref) #all these datapoints are saved as one collection / folder 

# K-Nearest Neighbors Model Train & Validation

In [76]:
docs = db.collection('trainingandvalidating').stream() #reads from all documents in a collection of the database
data = pd.DataFrame() #this creates the pandas dataframe containing all of the user feedback from the database
for doc in docs: #iterates through the document stream 
    print(f'{doc.id} => {doc.to_dict()}') #shows programmer what is being saved
    data = data.append(doc.to_dict(),ignore_index=True) # appends it to my pandas dataframe

0QzqRBaqY9rRYUnrwYeH => {'strength': '1', 'roast_type': 'mediumdark', 'rating': '2', 'target_saturation': '12', 'temperature': '183', 'user_id': '9', 'grind_size': 'larger', 'bean_type': 'liberica', 'cup_size': '8'}
0WeNmhfJ62F0h9pmdzU1 => {'user_id': '1', 'target_saturation': '20', 'strength': '2', 'rating': '7', 'roast_type': 'mild', 'cup_size': '8', 'grind_size': 'medium', 'bean_type': 'liberica', 'temperature': '201'}
0YmogPIMBrtraB5AGyz5 => {'strength': '2', 'roast_type': 'mild', 'target_saturation': '20', 'rating': '6', 'grind_size': 'larger', 'temperature': '200', 'user_id': '1', 'bean_type': 'arabica', 'cup_size': '8'}
0iRRFZXHSZFt3auSXgh2 => {'strength': '1', 'temperature': '198', 'rating': '9', 'grind_size': 'larger', 'user_id': '3', 'cup_size': '8', 'roast_type': 'mild', 'target_saturation': '18', 'bean_type': 'robusta'}
1PKvdgwJWtCso45JLzHz => {'bean_type': 'liberica', 'cup_size': '8', 'target_saturation': '16', 'user_id': '9', 'grind_size': 'larger', 'strength': '1', 'roas

GFAq9Gok4wLGi5HvFamR => {'grind_size': 'medium', 'cup_size': '24', 'bean_type': 'liberica', 'user_id': '5', 'target_saturation': '18', 'roast_type': 'medium', 'temperature': '185', 'strength': '3', 'rating': '7'}
GFtREZz62rCkGXXzkiIC => {'bean_type': 'robusta', 'cup_size': '8', 'roast_type': 'dark', 'grind_size': 'medium', 'user_id': '11', 'strength': '1', 'rating': '5', 'target_saturation': '20', 'temperature': '188'}
GV0mdidtrv4b4SlYVZLG => {'grind_size': '', 'cup_size': '', 'bean_type': '', 'target_saturation': '', 'user_id': '', 'roast_type': '', 'temperature': '', 'strength': '', 'rating': ''}
HFeVPupqtoq99UdcsaTO => {'bean_type': 'liberica', 'cup_size': '24', 'roast_type': 'mediumdark', 'grind_size': 'medium', 'user_id': '9', 'strength': '2', 'rating': '4', 'temperature': '194', 'target_saturation': '18'}
HhzHkobuD6ldYdaeuJr1 => {'temperature': '187', 'strength': '1', 'grind_size': 'smaller', 'rating': '7', 'user_id': '12', 'roast_type': 'dark', 'bean_type': 'arabica', 'cup_size'

WAaXVDDATwIKSf7j07xM => {'temperature': '190', 'strength': '2', 'grind_size': 'medium', 'rating': '2', 'user_id': '12', 'roast_type': 'dark', 'cup_size': '24', 'bean_type': 'robusta', 'target_saturation': '10'}
WV80ro7ps63wjyLocdsW => {'temperature': '189', 'strength': '2', 'rating': '9', 'cup_size': '16', 'grind_size': 'smaller', 'bean_type': 'excelsa', 'user_id': '9', 'target_saturation': '12', 'roast_type': 'mediumdark'}
WiSCTSUviKZH7t6zRShm => {'grind_size': '', 'cup_size': '', 'bean_type': '', 'strength': '', 'roast_type': '', 'user_id': '', 'rating': '', 'temperature': '', 'target_saturation': ''}
XWxS6q0kfcWClv4Ffz9o => {'grind_size': 'medium', 'cup_size': '24', 'bean_type': 'robusta', 'target_saturation': '16', 'user_id': '11', 'roast_type': 'dark', 'temperature': '203', 'strength': '3', 'rating': '6'}
XalV6SNGDqKWeuOiKZZa => {'strength': '2', 'roast_type': 'dark', 'rating': '9', 'target_saturation': '16', 'user_id': '10', 'grind_size': 'smaller', 'temperature': '190', 'bean_ty

hyg4dG0aTwYlnrOdUn5t => {'roast_type': 'dark', 'bean_type': 'arabica', 'temperature': '200', 'grind_size': 'larger', 'strength': '3', 'target_saturation': '16', 'user_id': '12', 'rating': '7', 'cup_size': '8'}
hz1QqnmbIt1wLIhx1Exs => {'cup_size': '16', 'grind_size': 'smaller', 'bean_type': 'robusta', 'user_id': '5', 'target_saturation': '20', 'roast_type': 'medium', 'temperature': '189', 'strength': '3', 'rating': '8'}
i279h8HZBeZhWY6hB67z => {'bean_type': 'excelsa', 'cup_size': '24', 'roast_type': 'dark', 'grind_size': 'medium', 'user_id': '10', 'strength': '1', 'rating': '7', 'temperature': '188', 'target_saturation': '18'}
iDzBfl4Ic9Oei5uWaiPV => {'rating': '8', 'user_id': '1', 'cup_size': '24', 'bean_type': 'arabica', 'roast_type': 'mild', 'temperature': '205', 'strength': '3', 'target_saturation': '20', 'grind_size': 'smaller'}
iTxPom1TG253AoK9sLAH => {'target_saturation': '20', 'grind_size': 'medium', 'bean_type': 'robusta', 'user_id': '8', 'roast_type': 'mediumdark', 'temperatur

zpoHYSJzhg9zq5Ad4tne => {'strength': '3', 'temperature': '185', 'grind_size': 'medium', 'rating': '3', 'user_id': '6', 'cup_size': '24', 'roast_type': 'medium', 'target_saturation': '18', 'bean_type': 'liberica'}


# Clean Data

In [78]:
#this replaces all of the string values of coffee bean type and converting it to float values
data['bean_type'].loc[data['bean_type'] == 'robusta'] = 1.0 #converting robusta to 1
data['bean_type'].loc[data['bean_type'] == 'arabica'] = 2.0 #converting arabica to 2
data['bean_type'].loc[data['bean_type'] == 'liberica'] = 3.0 #converting liberica to 3
data['bean_type'].loc[data['bean_type'] == 'excelsa'] = 4.0 #converting excelsa to 4

#this replaces all of the string values of grind size and converting it to float values
data['grind_size'].loc[data['grind_size'] == 'medium'] = 0.0 #converting medium to 0
data['grind_size'].loc[data['grind_size'] == 'larger'] = 1.0 #converting larger to 1
data['grind_size'].loc[data['grind_size'] == 'smaller'] = 2.0 #converting smaller to 2

#this replaces all of the string values of roast type and converting it to float values
data['roast_type'].loc[data['roast_type'] == 'mild'] = 0.0 #converting mild to 0
data['roast_type'].loc[data['roast_type'] == 'medium'] = 1.0 #converting medium to 1
data['roast_type'].loc[data['roast_type'] == 'mediumdark'] = 2.0 #converting mediumdark to 2
data['roast_type'].loc[data['roast_type'] == 'dark'] = 3.0 #converting dark to 3

#converts all strings/objects into floats
data = data._convert(numeric=True)

#deletes all non-null responses
data.dropna(subset=['target_saturation', 'strength', 'cup_size', 'grind_size', 'bean_type', 'user_id', 'temperature', 'roast_type', 'rating'], inplace=True)

#checking dataframe for there's any non-null responses
data.info() 

<class 'pandas.core.frame.DataFrame'>
Int64Index: 179 entries, 0 to 194
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   strength           179 non-null    float64
 1   roast_type         179 non-null    float64
 2   rating             179 non-null    float64
 3   target_saturation  179 non-null    float64
 4   temperature        179 non-null    float64
 5   user_id            179 non-null    float64
 6   grind_size         179 non-null    float64
 7   bean_type          179 non-null    float64
 8   cup_size           179 non-null    float64
dtypes: float64(9)
memory usage: 14.0 KB


# KNN Model for Strength

In [79]:
#first splitting 60% out for training then 40% for validation & testing
x_train, x_test, y_train, y_test = train_test_split(data[['roast_type','bean_type']].values, data[['strength']].values, test_size=0.2, random_state=42) 

#then splitting the 40% into 20% validation & 20% testing
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size=0.25, random_state=42) 

knn_strength = neighbors.KNeighborsClassifier(n_neighbors=21) #set K Nearest Neighbors model as variable 
knn_strength.fit(x_train, y_train) #fitting training independent and dependent data 
y_pred = knn_strength.predict(x_validate) #model predict independent validation dataset values

#outputs accuracy score of dependent validate values and predicted independent values
#accuracy score: calculate the accuracy of faction of correct prediction
#(TP+TN)/ (TP+FN+TN+FP)
print('accuracy =', accuracy_score(y_validate, y_pred))

#outputs confusion matrix of dependent validate values and predicted independent values
print('confusion matrix')
print(confusion_matrix(y_validate, y_pred))



accuracy = 0.2777777777777778
confusion matrix
[[ 3  8  0]
 [ 6  6  0]
 [ 2 10  1]]


  return self._fit(X, y)


# KNN Model for Temperature

In [80]:
#first splitting 60% out for training then 40% for validation & testing
x_train, x_test, y_train, y_test = train_test_split(data[['roast_type','bean_type']].values, data[['temperature']].values, test_size=0.2, random_state=42) 

#then splitting the 40% into 20% validation & 20% testing
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size=0.25, random_state=42) 

knn_temp = neighbors.KNeighborsClassifier(n_neighbors=21) #set K Nearest Neighbors model as variable 
knn_temp.fit(x_train, y_train) #fitting training independent and dependent data 
y_pred = knn_temp.predict(x_validate) #model predict independent validation dataset values

#outputs accuracy score of dependent validate values and predicted independent values
#accuracy score: calculate the accuracy of faction of correct prediction
#(TP+TN)/ (TP+FN+TN+FP)
print('accuracy =', accuracy_score(y_validate, y_pred))

#outputs confusion matrix of dependent validate values and predicted independent values
print('confusion matrix')
print(confusion_matrix(y_validate, y_pred))


accuracy = 0.08333333333333333
confusion matrix
[[0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 2 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 3 0 1 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 1 2 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]]


  return self._fit(X, y)


# KNN Model for Water Volume

In [81]:
#first splitting 60% out for training then 40% for validation & testing
x_train, x_test, y_train, y_test = train_test_split(data[['roast_type','bean_type']].values, data[['target_saturation']].values, test_size=0.2, random_state=42) 

#then splitting the 40% into 20% validation & 20% testing
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size=0.25, random_state=42) 

knn_sat = neighbors.KNeighborsClassifier(n_neighbors=21) #set K Nearest Neighbors model as variable 
knn_sat.fit(x_train, y_train) #fitting training independent and dependent data 
y_pred = knn_sat.predict(x_validate) #model predict independent validation dataset values

#outputs accuracy score of dependent validate values and predicted independent values
#accuracy score: calculate the accuracy of faction of correct prediction
#(TP+TN)/ (TP+FN+TN+FP)
print('accuracy =', accuracy_score(y_validate, y_pred))

#outputs confusion matrix of dependent validate values and predicted independent values
print('confusion matrix')
print(confusion_matrix(y_validate, y_pred))


accuracy = 0.3611111111111111
confusion matrix
[[0 0 1 0]
 [0 1 5 4]
 [0 1 9 6]
 [0 1 5 3]]


  return self._fit(X, y)


# KNN Model for Grind Size

In [82]:
#first splitting 60% out for training then 40% for validation & testing
x_train, x_test, y_train, y_test = train_test_split(data[['roast_type','bean_type']].values, data[['grind_size']].values, test_size=0.2, random_state=42) 

#then splitting the 40% into 20% validation & 20% testing
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size=0.25, random_state=42) 

knn_grind = neighbors.KNeighborsClassifier(n_neighbors=21) #set K Nearest Neighbors model as variable 
knn_grind.fit(x_train, y_train) #fitting training independent and dependent data 
y_pred = knn_grind.predict(x_validate) #model predict independent validation dataset values

#outputs accuracy score of dependent validate values and predicted independent values
#accuracy score: calculate the accuracy of faction of correct prediction
#(TP+TN)/ (TP+FN+TN+FP)
print('accuracy =', accuracy_score(y_validate, y_pred))

#outputs confusion matrix of dependent validate values and predicted independent values
print('confusion matrix')
print(confusion_matrix(y_validate, y_pred))


accuracy = 0.5555555555555556
confusion matrix
[[8 3 2]
 [1 6 4]
 [4 2 6]]


  return self._fit(X, y)


# KNN Model for Rating

In [83]:
#first splitting 60% out for training then 40% for validation & testing
x_train, x_test, y_train, y_test = train_test_split(data[['roast_type','bean_type']].values, data[['rating']].values, test_size=0.2, random_state=42) 

#then splitting the 40% into 20% validation & 20% testing
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size=0.25, random_state=42) 

knn_rating = neighbors.KNeighborsClassifier(n_neighbors=21) #set K Nearest Neighbors model as variable 
knn_rating.fit(x_train, y_train) #fitting training independent and dependent data 
y_pred = knn_rating.predict(x_validate) #model predict independent validation dataset values

#outputs accuracy score of dependent validate values and predicted independent values
#accuracy score: calculate the accuracy of faction of correct prediction
#(TP+TN)/ (TP+FN+TN+FP)
print('accuracy =', accuracy_score(y_validate, y_pred))

#outputs confusion matrix of dependent validate values and predicted independent values
print('confusion matrix')
print(confusion_matrix(y_validate, y_pred))


accuracy = 0.05555555555555555
confusion matrix
[[0 1 0 0 0 0 1 0 0 0]
 [2 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 3 0 0 0]
 [2 0 0 0 4 0 1 0 0 0]
 [0 1 0 0 1 0 1 0 0 0]
 [0 1 0 0 0 0 2 0 0 0]
 [2 0 0 0 1 0 1 0 1 0]
 [0 0 0 0 1 1 0 0 0 0]
 [0 2 0 0 1 0 1 1 0 0]
 [0 0 0 0 0 0 2 1 0 0]]


  return self._fit(X, y)


# Collects user dataset from database & puts into a pandas dataframe for ML

In [154]:
#now reading from database to implement machine learning
docs = db.collection('brews').stream() #reads from all documents in a collection of the database
data = pd.DataFrame() #this creates the pandas dataframe containing all of the user feedback from the database
doc_id = []
for doc in docs: #iterates through the document stream
    doc_id.append(doc.id)
    print(f'{doc.id} => {doc.to_dict()}') #shows programmer what is being saved
    data = data.append(doc.to_dict(),ignore_index=True) # appends it to my pandas dataframe

Nov112022164051lOT0hpcl => {'time': '16:40:51', 'strength': '2', 'temperature': '200', 'date': 'Nov 11 2022', 'rating': 10, 'status': 'done', 'user_id': 'lOT0hpcl4Pb4tEE11RlawC5eodn2', 'cup_size': '16 oz', 'roast_type': 'Light', 'target_saturation': '50', 'bean_type': 'Robusta'}
Nov112022164220lOT0hpcl => {'user_id': 'lOT0hpcl4Pb4tEE11RlawC5eodn2', 'strength': 'null', 'target_saturation': '50', 'rating': 'null', 'roast_type': 'MedDark', 'cup_size': '16 oz', 'time': '16:42:20', 'status': 'done', 'bean_type': 'Liberica', 'date': 'Nov 11 2022', 'temperature': '200'}
Nov112022170727lOT0hpcl => {'bean_type': 'Robusta', 'cup_size': '16 oz', 'date': 'Nov 11 2022', 'roast_type': 'Medium', 'time': '17:07:27', 'user_id': 'lOT0hpcl4Pb4tEE11RlawC5eodn2', 'status': 'done', 'strength': 'null', 'rating': 'null', 'temperature': '200', 'target_saturation': '50'}
Nov112022174247lOT0hpcl => {'time': '17:42:47', 'cup_size': '8 oz', 'date': 'Nov 11 2022', 'bean_type': 'Arabica', 'status': 'done', 'user_id'

In [155]:
data.head() #this is to see the top 5 instances to see what i pulled from the database

Unnamed: 0,time,strength,temperature,date,rating,status,user_id,cup_size,roast_type,target_saturation,bean_type,grind_size
0,16:40:51,2.0,200,Nov 11 2022,10.0,done,lOT0hpcl4Pb4tEE11RlawC5eodn2,16 oz,Light,50,Robusta,
1,16:42:20,,200,Nov 11 2022,,done,lOT0hpcl4Pb4tEE11RlawC5eodn2,16 oz,MedDark,50,Liberica,
2,17:07:27,,200,Nov 11 2022,,done,lOT0hpcl4Pb4tEE11RlawC5eodn2,16 oz,Medium,50,Robusta,
3,17:42:47,,200,Nov 11 2022,,done,lOT0hpcl4Pb4tEE11RlawC5eodn2,8 oz,Light,50,Arabica,
4,17:49:32,,200,Nov 11 2022,,started,lOT0hpcl4Pb4tEE11RlawC5eodn2,16 oz,MedDark,50,Robusta,


In [156]:
#this replaces all of the string values of coffee bean type and converting it to float values
data['bean_type'].loc[data['bean_type'] == 'Robusta'] = 1.0 #converting robusta to 1
data['bean_type'].loc[data['bean_type'] == 'Arabica'] = 2.0 #converting arabica to 2
data['bean_type'].loc[data['bean_type'] == 'Liberica'] = 3.0 #converting liberica to 3
data['bean_type'].loc[data['bean_type'] == 'Excelsa'] = 4.0 #converting excelsa to 4

#this replaces all of the string values of grind size and converting it to float values
#data['grind_size'].loc[data['grind_size'] == 'medium'] = 0.0 #converting medium to 0
#data['grind_size'].loc[data['grind_size'] == 'larger'] = 1.0 #converting larger to 1
#data['grind_size'].loc[data['grind_size'] == 'smaller'] = 2.0 #converting smaller to 2

#this replaces all of the string values of roast type and converting it to float values
data['roast_type'].loc[data['roast_type'] == 'Light'] = 0.0 #converting mild to 0
data['roast_type'].loc[data['roast_type'] == 'Medium'] = 1.0 #converting medium to 1
data['roast_type'].loc[data['roast_type'] == 'MedDark'] = 2.0 #converting mediumdark to 2
data['roast_type'].loc[data['roast_type'] == 'Dark'] = 3.0 #converting dark to 3

data = data._convert(numeric=True) #converts all strings/objects into floats


data.head() #calling top 5 instances of dataframe to check that I called it correctly


Unnamed: 0,time,strength,temperature,date,rating,status,user_id,cup_size,roast_type,target_saturation,bean_type,grind_size
0,16:40:51,2.0,200,Nov 11 2022,10.0,done,lOT0hpcl4Pb4tEE11RlawC5eodn2,16 oz,0.0,50,1.0,
1,16:42:20,,200,Nov 11 2022,,done,lOT0hpcl4Pb4tEE11RlawC5eodn2,16 oz,2.0,50,3.0,
2,17:07:27,,200,Nov 11 2022,,done,lOT0hpcl4Pb4tEE11RlawC5eodn2,16 oz,1.0,50,1.0,
3,17:42:47,,200,Nov 11 2022,,done,lOT0hpcl4Pb4tEE11RlawC5eodn2,8 oz,0.0,50,2.0,
4,17:49:32,,200,Nov 11 2022,,started,lOT0hpcl4Pb4tEE11RlawC5eodn2,16 oz,2.0,50,1.0,


# KNN Prediction  

In [165]:
recent_entry = data.tail(1) #pulls the most recent
if recent_entry['rating'].values == 10.0: #retains values if ratings are 10
    #runs knn prediction on rating, saturation, temp, strength, and grind to train model
    rating_prediction = knn_strength.predict(recent_entry[['roast_type','bean_type']].values)
    saturation_prediction = knn_sat.predict(recent_entry[['roast_type','bean_type']].values)
    temp_prediction = knn_temp.predict(recent_entry[['roast_type','bean_type']].values)
    strength_prediction = knn_strength.predict(recent_entry[['roast_type','bean_type']].values)
    grind_prediction = knn_grind.predict(recent_entry[['roast_type','bean_type']].values)
    
elif recent_entry['rating'].values < 10.0: #implements algorithm if ratings are less than 10
    #runs knn prediction on rating, saturation, temp, strength, and grind to implement suggestions

    rating_prediction = knn_strength.predict(recent_entry[['roast_type','bean_type']].values)
    saturation_prediction = knn_sat.predict(recent_entry[['roast_type','bean_type']].values)
    temp_prediction = knn_temp.predict(recent_entry[['roast_type','bean_type']].values)
    strength_prediction = knn_strength.predict(recent_entry[['roast_type','bean_type']].values)
    grind_prediction = knn_grind.predict(recent_entry[['roast_type','bean_type']].values)

    for i in grind_prediction: #pulling data from grind prediction array to add to database
        grind_predict = i
    
    for i in temp_prediction: #pulling data from temp prediction array to add to database
        temp_predict = i

    for i in saturation_prediction: #pulling data from saturation prediction array to add to database
        sat_predict = i

    users_ref = db.collection("brews").document(doc_id[-1]).update({ #updates document to include new grind suggestion
        'grind_size': str(grind_predict)
    })

    useridstring = recent_entry['user_id'].values #converting user id into string from array
    for i in useridstring:
        useridstring = i

    temp_ref = db.collection("users").document(str(useridstring)).update({ #updates document to include next temperature
        'next_target_temperature': str(temp_predict)
    })

    sat_ref = db.collection("users").document(str(useridstring)).update({ #updates document to include next water saturation
        'next_target_saturation': str(sat_predict)
    })

    

# Convert KNN to Tensorflow

In [173]:
paramk = 21 # parameter k of K-nearest neighbors

# Defining KNN Graph with L0 Norm
x = tf.placeholder(trImages.dtype, shape=trImages.shape) # all train images, i.e., 60000 x 28 x 28
y = tf.placeholder(tImages.dtype, shape=tImages.shape[1:]) # a test image, 28 x 28

xThresholded = tf.clip_by_value(tf.cast(x, tf.int32), 0, 1) # x is int8 which is not supported in many tf functions, hence typecast
yThresholded = tf.clip_by_value(tf.cast(y, tf.int32), 0, 1) # clip_by_value converts dataset to tensors of 0 and 1, i.e., 1 where tensor is non-zero
computeL0Dist = tf.count_nonzero(xThresholded - yThresholded, axis=[1,2]) # Computing L0 Norm by reducing along axes
findKClosestTrImages = tf.contrib.framework.argsort(computeL0Dist, direction='ASCENDING') # sorting (image) indices in order of ascending metrics, pick first k in the next step
findLabelsKClosestTrImages = tf.gather(trLabels, findKClosestTrImages[0:paramk]) # doing trLabels[findKClosestTrImages[0:paramk]] throws error, hence this workaround
findULabels, findIdex, findCounts = tf.unique_with_counts(findLabelsKClosestTrImages) # examine labels of k closest Train images
findPredictedLabel = tf.gather(findULabels, tf.argmax(findCounts)) # assign label to test image based on most occurring labels among k closest Train images

# Let's run the graph
numErrs = 0
numTestImages = np.shape(tLabels)[0]
numTrainImages = np.shape(trLabels)[0] # so many train images

with tf.Session() as sess:
  for iTeI in range(0,numTestImages): # iterate each image in test set
    predictedLabel = sess.run([findPredictedLabel], feed_dict={x:trImages, y:tImages[iTeI]})   

    if predictedLabel == tLabels[iTeI]:
      numErrs += 1
      print(numErrs,"/",iTeI)
      print("\t\t", predictedLabel[0], "\t\t\t\t", tLabels[iTeI])
      
      if (1):
        plt.figure(1)
        plt.subplot(1,2,1)
        plt.imshow(tImages[iTeI])
        plt.title('Test Image has label %i' %(predictedLabel[0]))
        
        for i in range(numTrainImages):
          if trLabels[i] == predictedLabel:
            plt.subplot(1,2,2)
            plt.imshow(trImages[i])
            plt.title('Correctly Labeled as %i' %(tLabels[iTeI]))
            plt.draw()
            break
        plt.show()

print("# Classification Errors= ", numErrs, "% accuracy= ", 100.*(numTestImages-numErrs)/numTestImages)

#first splitting 60% out for training then 40% for validation & testing
#x_train, x_test, y_train, y_test = train_test_split(data[['roast_type','bean_type']].values, data[['strength']].values, test_size=0.2, random_state=42) 

#then splitting the 40% into 20% validation & 20% testing
#x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size=0.25, random_state=42) 

#knn_strength = neighbors.KNeighborsClassifier(n_neighbors=21) #set K Nearest Neighbors model as variable 
#knn_strength.fit(x_train, y_train) #fitting training independent and dependent data 
#y_pred = knn_strength.predict(x_validate) #model predict independent validation dataset values

#outputs accuracy score of dependent validate values and predicted independent values
#accuracy score: calculate the accuracy of faction of correct prediction
#(TP+TN)/ (TP+FN+TN+FP)
#print('accuracy =', accuracy_score(y_validate, y_pred))

#outputs confusion matrix of dependent validate values and predicted independent values
#print('confusion matrix')
#print(confusion_matrix(y_validate, y_pred))



AttributeError: module 'tensorflow' has no attribute 'placeholder'