# Model and Conclusion

# Setup

In [1]:
# dependencies
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline 

import seaborn as sns

import missingno as msno

In [2]:
import turicreate as tc

# Get data

In [3]:
# Panda - stored in memory (limited)
# df = pd.read_csv('data/clean_table.csv')

In [4]:
# SFrame - better when you have to scale up
df = tc.SFrame.read_csv('./data/base_table_time_slots.csv')

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[int,str,str,int,str,int,str,int,int,int]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


# Data Exploration
## Standard analysis

In [5]:
df.shape

(3386, 10)

In [6]:
# panda only
# df.dtypes

In [7]:
df.head(5)

X1,activity,category,start_date[ms],start_date,end_date[ms]
0,Trello,Personal Adjusting,1540159273005,Mon Oct 22 00:01:13 GMT+02:00 2018 ...,1540159869559
1,Series / Docu,Entertainment,1540159869559,Mon Oct 22 00:11:09 GMT+02:00 2018 ...,1540162820068
2,Sleep,Refresh,1540162820068,Mon Oct 22 01:00:20 GMT+02:00 2018 ...,1540189458018
3,Moving - youtube,Transport,1540189458018,Mon Oct 22 08:24:18 GMT+02:00 2018 ...,1540189949037
4,Trello,Personal Adjusting,1540189949037,Mon Oct 22 08:32:29 GMT+02:00 2018 ...,1540190444165

end_date,activityDuration[m],id,user_id
Mon Oct 22 00:11:09 GMT+02:00 2018 ...,9,1,0
Mon Oct 22 01:00:20 GMT+02:00 2018 ...,49,2,1
Mon Oct 22 08:24:18 GMT+02:00 2018 ...,443,3,2
Mon Oct 22 08:32:29 GMT+02:00 2018 ...,8,4,3
Mon Oct 22 08:40:44 GMT+02:00 2018 ...,8,5,4


# Is the data good enough for the model?

Conclusion: A recommender system needs a table with an ID, an activity ID and optionally ratings. 

- We have a table with IDs
- We have a table with Activity IDs 
- We have no ratings. 

Based on no ratings we will find patterns in similarity of activities.



# Split dataset

In [8]:
#train, test = tc.recommender.util.random_split_by_user(df, 'category', 'activity') 

# Build model pipeline

In [9]:
model = tc.recommender.create(df, 'user_id','activity', ranking=False)

# Declare hyperparameter

# Fit and tune with cross-validation

# Evaluate Model

In [14]:
results = model.recommend(exclude_known=False)
results

user_id,activity,score,rank
0,Clean,0.15479297041893,1
0,Sleep,0.1420957744121551,2
0,Social - girl,0.1307287156581878,3
0,Food,0.1295599102973938,4
0,Moving - social,0.1286361813545227,5
0,Series / Docu,0.1195059418678283,6
0,Food - Prepare,0.1185280919075012,7
0,Learning - university,0.1174400925636291,8
0,Social - Real Life,0.1160344123840332,9
0,WC,0.1151248633861541,10


In [19]:
results = model.recommend(users=[3],exclude_known=False, )
results

user_id,activity,score,rank
3,Sleep,0.1596254543824629,1
3,Clean,0.1527095166119662,2
3,Food,0.1435760042884133,3
3,Food - Prepare,0.1314592144706032,4
3,Moving - social,0.1286273598670959,5
3,Social - girl,0.1270830793814225,6
3,WC,0.1195696700703014,7
3,Analyzing,0.1172505508769642,8
3,Social - Real Life,0.1167457483031533,9
3,Learning - university,0.1161378080194646,10


In [11]:
n_rows = 10
activities = ['Sleep']
similar_items = model.get_similar_items(activities, k=n_rows)
similar_items.print_rows(n_rows)

+----------+--------------------+---------------------+------+
| activity |      similar       |        score        | rank |
+----------+--------------------+---------------------+------+
|  Sleep   |        Food        |  0.3153526782989502 |  1   |
|  Sleep   |       Clean        | 0.21962618827819824 |  2   |
|  Sleep   |   Social - girl    | 0.20202022790908813 |  3   |
|  Sleep   |  Moving - social   | 0.19895285367965698 |  4   |
|  Sleep   |   Series / Docu    | 0.18435752391815186 |  5   |
|  Sleep   |         WC         | 0.18435752391815186 |  6   |
|  Sleep   | Social - Real Life |  0.1666666865348816 |  7   |
|  Sleep   |   Food - Prepare   |  0.1666666865348816 |  8   |
|  Sleep   |    Food - shit     | 0.16463416814804077 |  9   |
|  Sleep   |   Work - Purpose   | 0.15882354974746704 |  10  |
+----------+--------------------+---------------------+------+
[10 rows x 4 columns]



# Select winner model

# Save winning model

In [12]:
model.save("time_slot_recommendation.model")

# Load winning model

In [13]:
model = tc.load_model("time_slot_recommendation.model")

# Communicate Results (Conclusion)

TODO