### Setting Up DataFrame

In [1]:
import pandas as pd
import numpy as np
import keras

from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.utils import np_utils
from sklearn import preprocessing

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Read in CSV with All Posible Info
df = pd.read_csv('Incomplete.csv')
df = df.drop(df.columns[df.columns.str.contains('unnamed', case = False)],axis = 1)
df = df.drop(['business_id', 'date', 'user_id', 'user_cool', 'user_funny', 'user_userful', 'business_city'],axis = 1)
df.head(10)

Unnamed: 0,stars,name_of_month,day_of_week,user_average_stars,user_num_reviews,user_num_friends,business_num_reviews,business_average_stars
0,5,May,Saturday,4.67,6,1,1953,4.0
1,5,May,Saturday,4.67,6,1,84,4.0
2,5,May,Saturday,4.67,6,1,50,4.5
3,5,May,Saturday,4.67,6,1,70,4.0
4,4,May,Saturday,4.67,6,1,61,3.5
5,4,May,Saturday,4.67,6,1,397,4.5
6,5,September,Wednesday,5.0,1,0,38,4.5
7,4,May,Friday,3.62,359,48,111,4.0
8,4,October,Tuesday,3.62,359,48,64,4.0
9,3,February,Saturday,3.62,359,48,251,3.5


In [3]:
# Categorize df

# name_of_month
df['name_of_month'] = df['name_of_month'].astype('category')
df['name_of_month'] = df['name_of_month'].cat.codes

# day_of_week
df['day_of_week'] = df['day_of_week'].astype('category')
df['day_of_week'] = df['day_of_week'].cat.codes

# Check Categorization
df.head(3)

Unnamed: 0,stars,name_of_month,day_of_week,user_average_stars,user_num_reviews,user_num_friends,business_num_reviews,business_average_stars
0,5,8,2,4.67,6,1,1953,4.0
1,5,8,2,4.67,6,1,84,4.0
2,5,8,2,4.67,6,1,50,4.5


In [5]:
# Divide into Cold_Start_df and Neural_Net_df

# Cold_Start_df
Cold_Start_df = df.loc[df['user_num_reviews'] <= 5]

# Neural_Net_df
Neural_Net_df = df.loc[df['user_num_reviews'] > 5]

# Print Length Results
print('Cold_Start_df:', len(Cold_Start_df))
print('Neural_Net_df:', len(Neural_Net_df))

Cold_Start_df: 1078471
Neural_Net_df: 4183198


In [6]:
# Normalize Cold_Start_df
Cold_Start_df = (Cold_Start_df - Cold_Start_df.mean()) / (Cold_Start_df.max() - Cold_Start_df.min())
Cold_Start_df.head(3)

Unnamed: 0,stars,name_of_month,day_of_week,user_average_stars,user_num_reviews,user_num_friends,business_num_reviews,business_average_stars
6,0.334265,0.489716,0.503256,0.331519,-0.362373,-0.0122,-0.025228,0.182696
87,0.334265,-0.419375,0.503256,0.331519,-0.362373,-0.0122,-0.029849,0.307696
88,-0.665735,-0.419375,0.503256,-0.468481,-0.162373,-0.0122,0.000186,-0.192304


In [7]:
# Normalize Neural_Net_df
Neural_Net_df = (Neural_Net_df - Neural_Net_df.mean()) / (Neural_Net_df.max() - Neural_Net_df.min())
Neural_Net_df.head(3)

Unnamed: 0,stars,name_of_month,day_of_week,user_average_stars,user_num_reviews,user_num_friends,business_num_reviews,business_average_stars
0,0.313889,0.225229,-0.16761,0.229086,-0.012244,-0.009125,0.213405,0.06982
1,0.313889,0.225229,-0.16761,0.229086,-0.012244,-0.009125,-0.040605,0.06982
2,0.313889,0.225229,-0.16761,0.229086,-0.012244,-0.009125,-0.045226,0.19482


In [8]:
# Save Cold_Start_df
Cold_Start_df.to_csv(path_or_buf='Cold_Start_df.csv')

In [9]:
# Save Neural_Net_df
Neural_Net_df.to_csv(path_or_buf='Neural_Net_df.csv')