In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import deque
import random

import firebase_admin
from firebase_admin import credentials, firestore

cred = credentials.Certificate('./serviceAccountKey.json')
default_app = firebase_admin.initialize_app(cred)
db = firestore.client()
print('Firebase Connected')

Firebase Connected


In [2]:
# install category_encoders library
!pip install category_encoders

from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import category_encoders as ce

Collecting category_encoders
[?25l  Downloading https://files.pythonhosted.org/packages/44/57/fcef41c248701ee62e8325026b90c432adea35555cbc870aff9cfba23727/category_encoders-2.2.2-py2.py3-none-any.whl (80kB)
[K     |████                            | 10kB 4.8MB/s eta 0:00:01[K     |████████▏                       | 20kB 7.7MB/s eta 0:00:01[K     |████████████▏                   | 30kB 8.9MB/s eta 0:00:01[K     |████████████████▎               | 40kB 8.8MB/s eta 0:00:01[K     |████████████████████▎           | 51kB 5.1MB/s eta 0:00:01[K     |████████████████████████▍       | 61kB 5.7MB/s eta 0:00:01[K     |████████████████████████████▍   | 71kB 6.1MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 3.8MB/s 
Installing collected packages: category-encoders
Successfully installed category-encoders-2.2.2


  import pandas.util.testing as tm


In [3]:
# Collect user data from Firestore and put into DataFrame

users_docs = db.collection(u'users').stream()
user_key = []
user_list = []
for u in users_docs:
  user_key.append(list(u.to_dict().keys()))
  user_list.append(u.to_dict())

user_long_col = max(user_key, key=len)
user_df = pd.DataFrame(columns=user_long_col)
for doc in user_list:
  user_df = user_df.append(user_df.from_records([doc])).reset_index(drop=True)
user_df

Unnamed: 0,satisfaction,email,userid,dob,lname,age,videoswatched,role,dobYear,fname,gender
0,3.88,test6@testmail.com,0KAAFtAXIHYEG6WnQ5cq96a4ilO2,2021 05 26,test6,0,25.0,user,2021,test6,male
1,,xadewar523@frnla.com,2TbJxfqD2wVERW6KOgisS8W9bAP2,2005-05-10 16:21:00+00:00,test,16,,user,2005,email,male
2,0.0,gate1001@testmail.com,2cBjJaRQKjWIjUi5oz1hTxZWZ0b2,2006 11 21,test,15,1.0,user,2006,gate1001,male
3,0.0,gate1002@testmail.com,FXF6bjg9JLfELtuWl1XqYE7wDaE2,2006 11 21,test,15,1.0,user,2006,gate1002,female
4,0.0,gate1003@testmail.com,FdU1cWctC1dX7vNVbTzemBWp01I2,2006 11 21,test,15,1.0,user,2006,gate1003,female
5,,gatetest3@gmail.com,H4qDaIws1cao4bKIr5w7KnD2c152,2000 05 05,Ee,21,,user,2000,yk,male
6,0.0,gate1004@testmail.com,my81SbX4tCWiGpmMak1S3rf04Lk2,2006 11 21,test,15,1.0,user,2006,gate1004,female
7,,test56@test.com,oiSWOgcKvwe6iW77TXJCJtbud1T2,2021 06 06,test56,0,,user,2021,test56,male
8,,test5@testmail.com,u5nWdta3n4dHR0ZaVYsxIirpGUw2,2021-05-24 05:05:46.410000+00:00,test5,0,,user,2021,test5,male
9,,gatetest2@gmail.com,vCgeD162vPgGKXV4Drf8lIIf5oK2,2000-10-10 15:11:00+00:00,yk,21,,admin,2000,yoget,male


In [4]:
# Collect video data from Firestore and put into DataFrame

videos_docs = db.collection(u'videos').stream()
video_key = []
video_list = []
for v in videos_docs:
  video_key.append(list(v.to_dict().keys()))
  video_list.append(v.to_dict())

video_long_col = max(video_key, key=len)
video_df = pd.DataFrame(columns=video_long_col)
for doc in video_list:
  video_df = video_df.append(video_df.from_records([doc])).reset_index(drop=True)
video_df

Unnamed: 0,videoId,uniqueviewers,youtubeId,avgRating,title,category,view
0,2dZiMBwX_5Q,3.0,2dZiMBwX_5Q,3.38,Java Programming Tutorial 1 - Introduction to ...,java,66
1,2ePf9rue1Ao,,2ePf9rue1Ao,4.0,What is Artificial Intelligence? In 5 minutes.,artificial intelligent,6
2,BvJYXl2ywUE,,BvJYXl2ywUE,3.0,Introduction to HTML || Your First Web Page ||...,html,4
3,Hjl6gbg9kmk,,Hjl6gbg9kmk,4.5,Web Development: Intro to HTML,html,7
4,Ig1nfPjrETc,,Ig1nfPjrETc,3.0,Machine Learning Tutorial: Supervised Learning,machine learning,3
5,L--IxUH4fac,,L--IxUH4fac,3.0,Evolutionary Algorithms,evolutionary algorithm,3
6,OXWvrRLzEaU,,OXWvrRLzEaU,3.5,An Introduction to Generative Adversarial Netw...,generative adversarial networks,8
7,RaOejcczPas,,RaOejcczPas,5.0,"Introduction to Digital Twin: Simple, but deta...",digital twin,3
8,TlB_eWDSMt4,2.0,TlB_eWDSMt4,4.0,Node.js Tutorial for Beginners: Learn Node in ...,nodejs,18
9,USjZcfj8yxE,,USjZcfj8yxE,5.0,Learn Git In 15 Minutes,git,2


In [5]:
# Collect rating data from Firestore and put into DataFrame

ratings_docs = db.collection(u'ratings').stream()
rating_key = []
rating_list = []
for u in ratings_docs:
  rating_key.append(list(u.to_dict().keys()))
  rating_list.append(u.to_dict())

rating_long_col = max(rating_key, key=len)
rating_df = pd.DataFrame(columns=rating_long_col)
for doc in rating_list:
  rating_df = rating_df.append(rating_df.from_records([doc])).reset_index(drop=True)
rating_df

Unnamed: 0,videoId,userId,value
0,2dZiMBwX_5Q,0KAAFtAXIHYEG6WnQ5cq96a4ilO2,3
1,Hjl6gbg9kmk,0KAAFtAXIHYEG6WnQ5cq96a4ilO2,5
2,OXWvrRLzEaU,0KAAFtAXIHYEG6WnQ5cq96a4ilO2,3
3,TlB_eWDSMt4,0KAAFtAXIHYEG6WnQ5cq96a4ilO2,5
4,W6NZfCO5SIk,0KAAFtAXIHYEG6WnQ5cq96a4ilO2,4
5,edZ_JYpOM8U,0KAAFtAXIHYEG6WnQ5cq96a4ilO2,5
6,lL2PXC1fmnQ,0KAAFtAXIHYEG6WnQ5cq96a4ilO2,1
7,yzIMircGU5I,0KAAFtAXIHYEG6WnQ5cq96a4ilO2,5
8,W6NZfCO5SIk,2cBjJaRQKjWIjUi5oz1hTxZWZ0b2,2
9,2dZiMBwX_5Q,FXF6bjg9JLfELtuWl1XqYE7wDaE2,3


In [6]:
print('Encoding data...')

# Complete userID refer to userid in user_df
actual_userId = np.unique(user_df.userid)
user_enc = LabelEncoder().fit(actual_userId)

# Complete videoID refer to videoid in video_df
video_userId = np.unique(video_df.videoId)
video_enc = LabelEncoder().fit(video_userId)

# encode the userID and videoId for all DataFrame
user_df['userId'] = user_enc.transform(user_df['userid'])
rating_df['userId'] = user_enc.transform(rating_df['userId'])
video_df['videoId'] = video_enc.transform(video_df['videoId'])
rating_df['videoId'] = video_enc.transform(rating_df['videoId'])

print('Encoding done!')

Encoding data...
Encoding done!


In [9]:
video_df

Unnamed: 0,videoId,uniqueviewers,youtubeId,avgRating,title,category,view
0,0,3.0,2dZiMBwX_5Q,3.38,Java Programming Tutorial 1 - Introduction to ...,java,66
1,1,,2ePf9rue1Ao,4.0,What is Artificial Intelligence? In 5 minutes.,artificial intelligent,6
2,2,,BvJYXl2ywUE,3.0,Introduction to HTML || Your First Web Page ||...,html,4
3,3,,Hjl6gbg9kmk,4.5,Web Development: Intro to HTML,html,7
4,4,,Ig1nfPjrETc,3.0,Machine Learning Tutorial: Supervised Learning,machine learning,3
5,5,,L--IxUH4fac,3.0,Evolutionary Algorithms,evolutionary algorithm,3
6,6,,OXWvrRLzEaU,3.5,An Introduction to Generative Adversarial Netw...,generative adversarial networks,8
7,7,,RaOejcczPas,5.0,"Introduction to Digital Twin: Simple, but deta...",digital twin,3
8,8,2.0,TlB_eWDSMt4,4.0,Node.js Tutorial for Beginners: Learn Node in ...,nodejs,18
9,9,,USjZcfj8yxE,5.0,Learn Git In 15 Minutes,git,2
