# Load and Scale Data

In [94]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import Imputer
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

In [71]:
def classify_ratings(y):
    y = y - y.min()
    y /= y.max()
    y *= 10
    return y

In [92]:
def load_data():
    imp = Imputer(missing_values=np.nan, strategy='most_frequent')
    train_df = pd.read_csv('yelp_dataset/businesses_train.csv')
    test_df = pd.read_csv('yelp_dataset/businesses_test.csv')
    data = train_df.append(test_df)
    result = data['rating'].values
    data = data.drop(['business_id','categories','rating','id'], axis = 1)
    data = data.values
    return imp.fit_transform(data), classify_ratings(result)

In [115]:
def scale_input_data(X):
    scaler = StandardScaler()
    return scaler.fit_transform(X)

In [116]:
data = load_data()
X = scale_input_data(data[0])
y = data[1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Neural Network Regression

In [117]:
mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=10,max_iter=1000, shuffle=True, random_state=1,activation='logistic', alpha=0.1, learning_rate='adaptive', beta_1=0.7, beta_2=0.8)
mlp.fit(X_train, y_train)

MLPRegressor(activation='logistic', alpha=0.1, batch_size='auto', beta_1=0.7,
       beta_2=0.8, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=10, learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

In [118]:
print("RMSE MLP Regressor: ", mean_squared_error(mlp.predict(X_test), y_test))

RMSE MLP Regressor:  0.956230029393


# Neural Network Classifier

In [120]:
y_train = np.rint(y_train)
y_test = np.rint(y_test)

In [121]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='sgd', alpha=0.1,hidden_layer_sizes=10, random_state=1)
clf.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.1, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=10, learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='sgd', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

In [122]:
print("Classifier Accuracy : ", clf.score(X_test, y_test))

MLP Accuracy :  0.412458923713
