In [1]:
import pandas as pd
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline

# import seaborn as sns
# sns.set(style="whitegrid", color_codes=True)

import seaborn as sns

# For plot sizes
from pylab import rcParams
rcParams['figure.figsize'] = 10, 5

# this is the standard import if you're using "formula notation" (similar to R)
import statsmodels.formula.api as smf

In [2]:
df = pd.read_hdf("cleaned_store.h5", key="table_name", where='Journey_Pattern_ID == "00401001"', columns=['Timestamp', "Journey_Pattern_ID", "Time_Frame", "Vehicle_Journey_ID", "Week_Day", "Distance", "TravelTime", "TimeCategory", "Rain"])
df.columns = ["Timestamp", "JourneyPatternId", "TimeFrame", "VehicleJourneyId", "Day", "Distance", "TravelTime", "TimeCategory", "Rain"]

In [3]:
df.head()

Unnamed: 0,Timestamp,JourneyPatternId,TimeFrame,VehicleJourneyId,Day,Distance,TravelTime,TimeCategory,Rain
823,2012-11-12 13:01:23,401001,2012-11-12,6462,0,10.693702,1953.0,12:30:00,0.0
824,2012-11-12 13:02:41,401001,2012-11-12,6462,0,11.236275,2031.0,12:30:00,0.0
825,2012-11-12 13:03:01,401001,2012-11-12,6462,0,11.359826,2051.0,12:30:00,0.0
826,2012-11-12 13:05:23,401001,2012-11-12,6462,0,11.750859,2193.0,12:30:00,0.0
827,2012-11-12 13:06:02,401001,2012-11-12,6462,0,12.15868,2232.0,12:30:00,0.0


In [4]:
df.shape

(256594, 9)

## Change Day Category

In [5]:
# Organise the Data
df = df.sort_values(['TimeFrame', 'VehicleJourneyId', 'Timestamp'], ascending=True)

# Clean up index
df = df.reset_index()
del df['index']

In [6]:
df.Day = df.Day.astype("str")

In [7]:
# Create group object to work with 
gb = df.groupby(["TimeFrame"], as_index=False, group_keys=False)

In [8]:
def change_day(group):
    
    day_num = group.Day.value_counts().index.tolist()[0]
    
    if day_num == '0' or day_num == '1' or day_num == '2' or day_num == '3' or day_num == '4':
        group.Day = 'Mon-Fri'
        
    if day_num == '5':
        group.Day = 'Sat'
        
    if day_num == '6':
        group.Day = 'Sun'
    
    return group

In [9]:
df = gb.apply(change_day)

In [10]:
df.Day = df.Day.astype("category")

In [11]:
df.dtypes

Timestamp           datetime64[ns]
JourneyPatternId            object
TimeFrame           datetime64[ns]
VehicleJourneyId            object
Day                       category
Distance                   float64
TravelTime                 float64
TimeCategory                object
Rain                       float32
dtype: object

## Change TimeCategory To New Speed Category

In [12]:
df.head()

Unnamed: 0,Timestamp,JourneyPatternId,TimeFrame,VehicleJourneyId,Day,Distance,TravelTime,TimeCategory,Rain
0,2012-11-06 12:40:39,401001,2012-11-06,6320,Mon-Fri,0.0,0.0,12:30:00,0.0
1,2012-11-06 12:42:58,401001,2012-11-06,6320,Mon-Fri,1.288087,139.0,12:30:00,0.0
2,2012-11-06 12:43:41,401001,2012-11-06,6320,Mon-Fri,1.539224,182.0,12:30:00,0.0
3,2012-11-06 12:43:59,401001,2012-11-06,6320,Mon-Fri,1.860739,200.0,12:30:00,0.0
4,2012-11-06 12:44:20,401001,2012-11-06,6320,Mon-Fri,2.188079,221.0,12:30:00,0.0


In [13]:
df.columns = ["Timestamp", "JourneyPatternId", "TimeFrame", "VehicleJourneyId", "Day", "Distance", "TravelTime", "Speed", "Rain"]

In [14]:
# Organise the Data
df = df.sort_values(['TimeFrame', 'VehicleJourneyId', 'Timestamp'], ascending=True)

# Clean up index
df = df.reset_index()
del df['index']

In [15]:
# Create group object to work with 
gb = df.groupby(["TimeFrame", "VehicleJourneyId"], as_index=False, group_keys=False)

In [16]:
def change_day(group):
    
    day = group.Day.value_counts().index.tolist()[0]
    time_cat = group.Speed.value_counts().index.tolist()[0]
    
    # For Weekdays
    if day == 'Mon-Fri':
        if time_cat == "06:00:00":
            group.Speed = 'Fast'
        if time_cat == "06:30:00":
            group.Speed = 'Fast'
        if time_cat == "07:00:00":
            group.Speed = 'Medium'
        if time_cat == "07:30:00":
            group.Speed = 'Medium'
        if time_cat == "08:00:00":
            group.Speed = 'Slow'
        if time_cat == "08:30:00":
            group.Speed = 'Slow'
        if time_cat == "09:00:00":
            group.Speed = 'Slow'
        if time_cat == "09:30:00":
            group.Speed = 'Medium'
        if time_cat == "10:00:00":
            group.Speed = 'Medium'
        if time_cat == "10:30:00":
            group.Speed = 'Medium'
        if time_cat == "11:00:00":
            group.Speed = 'Medium'
        if time_cat == "11:30:00":
            group.Speed = 'Medium'
        if time_cat == "12:00:00":
            group.Speed = 'Medium'
        if time_cat == "12:30:00":
            group.Speed = 'Medium'
        if time_cat == "13:00:00":
            group.Speed = 'Slow'
        if time_cat == "13:30:00":
            group.Speed = 'Slow'
        if time_cat == "14:00:00":
            group.Speed = 'Medium'
        if time_cat == "14:30:00":
            group.Speed = 'Medium'
        if time_cat == "15:00:00":
            group.Speed = 'Medium'
        if time_cat == "15:30:00":
            group.Speed = 'Medium'
        if time_cat == "16:00:00":
            group.Speed = 'Slow'
        if time_cat == "16:30:00":
            group.Speed = 'Slow'
        if time_cat == "17:00:00":
            group.Speed = 'Slow'
        if time_cat == "17:30:00":
            group.Speed = 'Slow'
        if time_cat == "18:00:00":
            group.Speed = 'Slow'
        if time_cat == "18:30:00":
            group.Speed = 'Medium'
        if time_cat == "19:00:00":
            group.Speed = 'Medium'
        if time_cat == "19:30:00":
            group.Speed = 'Medium'
        if time_cat == "20:00:00":
            group.Speed = 'Medium'
        if time_cat == "20:30:00":
            group.Speed = 'Medium'
        if time_cat == "21:00:00":
            group.Speed = 'Medium'
        if time_cat == "21:30:00":
            group.Speed = 'Fast'
        if time_cat == "22:00:00":
            group.Speed = 'Fast'
        if time_cat == "22:30:00":
            group.Speed = 'Fast'
        if time_cat == "23:00:00":
            group.Speed = 'Fast'
        if time_cat == "23:30:00":
            group.Speed = 'Fast'
        if time_cat == "00:00:00":
            group.Speed = 'Fast'
        if time_cat == "00:30:00":
            group.Speed = 'Fast'
        if time_cat == "01:00:00":
            group.Speed = 'Fast'
        if time_cat == "01:30:00":
            group.Speed = 'Fast'
        if time_cat == "02:00:00":
            group.Speed = 'Fast'
        if time_cat == "02:30:00":
            group.Speed = 'Fast'
            
    # For Saturday
    if day == 'Sat':
        if time_cat == "06:00:00":
            group.Speed = 'Fast'
        if time_cat == "06:30:00":
            group.Speed = 'Fast'
        if time_cat == "07:00:00":
            group.Speed = 'Fast'
        if time_cat == "07:30:00":
            group.Speed = 'Fast'
        if time_cat == "08:00:00":
            group.Speed = 'Medium'
        if time_cat == "08:30:00":
            group.Speed = 'Medium'
        if time_cat == "09:00:00":
            group.Speed = 'Slow'
        if time_cat == "09:30:00":
            group.Speed = 'Medium'
        if time_cat == "10:00:00":
            group.Speed = 'Medium'
        if time_cat == "10:30:00":
            group.Speed = 'Medium'
        if time_cat == "11:00:00":
            group.Speed = 'Medium'
        if time_cat == "11:30:00":
            group.Speed = 'Medium'
        if time_cat == "12:00:00":
            group.Speed = 'Medium'
        if time_cat == "12:30:00":
            group.Speed = 'Medium'
        if time_cat == "13:00:00":
            group.Speed = 'Slow'
        if time_cat == "13:30:00":
            group.Speed = 'Medium'
        if time_cat == "14:00:00":
            group.Speed = 'Medium'
        if time_cat == "14:30:00":
            group.Speed = 'Medium'
        if time_cat == "15:00:00":
            group.Speed = 'Medium'
        if time_cat == "15:30:00":
            group.Speed = 'Medium'
        if time_cat == "16:00:00":
            group.Speed = 'Slow'
        if time_cat == "16:30:00":
            group.Speed = 'Medium'
        if time_cat == "17:00:00":
            group.Speed = 'Medium'
        if time_cat == "17:30:00":
            group.Speed = 'Medium'
        if time_cat == "18:00:00":
            group.Speed = 'Medium'
        if time_cat == "18:30:00":
            group.Speed = 'Medium'
        if time_cat == "19:00:00":
            group.Speed = 'Medium'
        if time_cat == "19:30:00":
            group.Speed = 'Medium'
        if time_cat == "20:00:00":
            group.Speed = 'Medium'
        if time_cat == "20:30:00":
            group.Speed = 'Medium'
        if time_cat == "21:00:00":
            group.Speed = 'Fast'
        if time_cat == "21:30:00":
            group.Speed = 'Fast'
        if time_cat == "22:00:00":
            group.Speed = 'Fast'
        if time_cat == "22:30:00":
            group.Speed = 'Fast'
        if time_cat == "23:00:00":
            group.Speed = 'Fast'
        if time_cat == "23:30:00":
            group.Speed = 'Fast'
        if time_cat == "00:00:00":
            group.Speed = 'Fast'
        if time_cat == "00:30:00":
            group.Speed = 'Fast'
        if time_cat == "01:00:00":
            group.Speed = 'Fast'
        if time_cat == "01:30:00":
            group.Speed = 'Fast'
        if time_cat == "02:00:00":
            group.Speed = 'Fast'
        if time_cat == "02:30:00":
            group.Speed = 'Fast'
            
    # For Sunday
    if day == 'Sun':
        if time_cat == "06:00:00":
            group.Speed = 'Fast'
        if time_cat == "06:30:00":
            group.Speed = 'Fast'
        if time_cat == "07:00:00":
            group.Speed = 'Fast'
        if time_cat == "07:30:00":
            group.Speed = 'Fast'
        if time_cat == "08:00:00":
            group.Speed = 'Fast'
        if time_cat == "08:30:00":
            group.Speed = 'Fast'
        if time_cat == "09:00:00":
            group.Speed = 'Medium'
        if time_cat == "09:30:00":
            group.Speed = 'Fast'
        if time_cat == "10:00:00":
            group.Speed = 'Fast'
        if time_cat == "10:30:00":
            group.Speed = 'Fast'
        if time_cat == "11:00:00":
            group.Speed = 'Medium'
        if time_cat == "11:30:00":
            group.Speed = 'Medium'
        if time_cat == "12:00:00":
            group.Speed = 'Medium'
        if time_cat == "12:30:00":
            group.Speed = 'Medium'
        if time_cat == "13:00:00":
            group.Speed = 'Slow'
        if time_cat == "13:30:00":
            group.Speed = 'Medium'
        if time_cat == "14:00:00":
            group.Speed = 'Medium'
        if time_cat == "14:30:00":
            group.Speed = 'Medium'
        if time_cat == "15:00:00":
            group.Speed = 'Medium'
        if time_cat == "15:30:00":
            group.Speed = 'Medium'
        if time_cat == "16:00:00":
            group.Speed = 'Medium'
        if time_cat == "16:30:00":
            group.Speed = 'Medium'
        if time_cat == "17:00:00":
            group.Speed = 'Medium'
        if time_cat == "17:30:00":
            group.Speed = 'Medium'
        if time_cat == "18:00:00":
            group.Speed = 'Medium'
        if time_cat == "18:30:00":
            group.Speed = 'Medium'
        if time_cat == "19:00:00":
            group.Speed = 'Medium'
        if time_cat == "19:30:00":
            group.Speed = 'Medium'
        if time_cat == "20:00:00":
            group.Speed = 'Medium'
        if time_cat == "20:30:00":
            group.Speed = 'Medium'
        if time_cat == "21:00:00":
            group.Speed = 'Fast'
        if time_cat == "21:30:00":
            group.Speed = 'Fast'
        if time_cat == "22:00:00":
            group.Speed = 'Fast'
        if time_cat == "22:30:00":
            group.Speed = 'Fast'
        if time_cat == "23:00:00":
            group.Speed = 'Fast'
        if time_cat == "23:30:00":
            group.Speed = 'Fast'
        if time_cat == "00:00:00":
            group.Speed = 'Fast'
        if time_cat == "00:30:00":
            group.Speed = 'Fast'
        if time_cat == "01:00:00":
            group.Speed = 'Fast'
        if time_cat == "01:30:00":
            group.Speed = 'Fast'
        if time_cat == "02:00:00":
            group.Speed = 'Fast'
        if time_cat == "02:30:00":
            group.Speed = 'Fast'
    
    
    return group

In [17]:
df = gb.apply(change_day)

In [18]:
df.head()

Unnamed: 0,Timestamp,JourneyPatternId,TimeFrame,VehicleJourneyId,Day,Distance,TravelTime,Speed,Rain
0,2012-11-06 12:40:39,401001,2012-11-06,6320,Mon-Fri,0.0,0.0,Medium,0.0
1,2012-11-06 12:42:58,401001,2012-11-06,6320,Mon-Fri,1.288087,139.0,Medium,0.0
2,2012-11-06 12:43:41,401001,2012-11-06,6320,Mon-Fri,1.539224,182.0,Medium,0.0
3,2012-11-06 12:43:59,401001,2012-11-06,6320,Mon-Fri,1.860739,200.0,Medium,0.0
4,2012-11-06 12:44:20,401001,2012-11-06,6320,Mon-Fri,2.188079,221.0,Medium,0.0


# Make Linear Regression Model

In [19]:
# df.JourneyPatternId = df.JourneyPatternId.astype("object")
# df.Day = df.Day.astype("object")
# df.Speed = df.Speed.astype("object")

df.Speed = df.Speed.astype("category")
df.TimeFrame = df.TimeFrame.astype("str")

In [20]:
df.dtypes

Timestamp           datetime64[ns]
JourneyPatternId            object
TimeFrame                   object
VehicleJourneyId            object
Day                       category
Distance                   float64
TravelTime                 float64
Speed                     category
Rain                       float32
dtype: object

In [21]:
df.head()

Unnamed: 0,Timestamp,JourneyPatternId,TimeFrame,VehicleJourneyId,Day,Distance,TravelTime,Speed,Rain
0,2012-11-06 12:40:39,401001,2012-11-06,6320,Mon-Fri,0.0,0.0,Medium,0.0
1,2012-11-06 12:42:58,401001,2012-11-06,6320,Mon-Fri,1.288087,139.0,Medium,0.0
2,2012-11-06 12:43:41,401001,2012-11-06,6320,Mon-Fri,1.539224,182.0,Medium,0.0
3,2012-11-06 12:43:59,401001,2012-11-06,6320,Mon-Fri,1.860739,200.0,Medium,0.0
4,2012-11-06 12:44:20,401001,2012-11-06,6320,Mon-Fri,2.188079,221.0,Medium,0.0


In [22]:
train = df[df['TimeFrame'] < '2012-11-16']
test = df[df['TimeFrame'] >= '2012-11-16']

In [23]:
# create a fitted test model
lm = smf.ols(formula='TravelTime ~ Speed + Distance + Day', data=train).fit()

In [24]:
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:             TravelTime   R-squared:                       0.935
Model:                            OLS   Adj. R-squared:                  0.935
Method:                 Least Squares   F-statistic:                 1.589e+05
Date:                Thu, 17 Aug 2017   Prob (F-statistic):               0.00
Time:                        13:30:20   Log-Likelihood:            -4.1129e+05
No. Observations:               55106   AIC:                         8.226e+05
Df Residuals:                   55100   BIC:                         8.226e+05
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept        -857.1531      5.996   -1

In [25]:
test_y = test["TravelTime"]
test_X = test[["Distance", "Speed", "Day"]]

In [26]:
# test_X = test_X[test_X.TimeCategory != '00:00']
predictions = lm.predict(test_X)

In [27]:
# Custom Metric
abs(abs(test_y) - abs(predictions)).mean()  

341.78135787868297

In [28]:
# Print the Mean Squared Error of the model on the training set
mse = ((test_y - predictions)** 2).mean()
print("\nMean Squared Error:\n", mse)


Mean Squared Error:
 243588.14878318756


In [29]:
from math import sqrt

sqrt(mse)

493.54650113559467

# Sklearn Linear Regression

In [55]:
lin_df = df

In [56]:
# Get dummies
day_dummies = pd.get_dummies(lin_df.Day)
day_dummies.columns = ["Mon-Fri", "Sat", "Sun"]
speed_dummies = pd.get_dummies(lin_df.Speed)

In [57]:
# Assign to df
lin_df = pd.concat([lin_df, day_dummies], axis=1)
lin_df = pd.concat([lin_df, speed_dummies], axis=1)

In [58]:
lin_df.head()

Unnamed: 0,Timestamp,JourneyPatternId,TimeFrame,VehicleJourneyId,Day,Distance,TravelTime,Speed,Rain,Mon-Fri,Sat,Sun,Fast,Medium,Slow
0,2012-11-06 12:40:39,401001,2012-11-06,6320,Mon-Fri,0.0,0.0,Medium,0.0,1,0,0,0,1,0
1,2012-11-06 12:42:58,401001,2012-11-06,6320,Mon-Fri,1.288087,139.0,Medium,0.0,1,0,0,0,1,0
2,2012-11-06 12:43:41,401001,2012-11-06,6320,Mon-Fri,1.539224,182.0,Medium,0.0,1,0,0,0,1,0
3,2012-11-06 12:43:59,401001,2012-11-06,6320,Mon-Fri,1.860739,200.0,Medium,0.0,1,0,0,0,1,0
4,2012-11-06 12:44:20,401001,2012-11-06,6320,Mon-Fri,2.188079,221.0,Medium,0.0,1,0,0,0,1,0


In [59]:
lin_df.Sat = lin_df.Sat.astype("int")
lin_df.Sun = lin_df.Sun.astype("int")
lin_df["Mon-Fri"] = lin_df["Mon-Fri"].astype("int")
lin_df.Fast = lin_df.Fast.astype("int")
lin_df.Medium = lin_df.Medium.astype("int")
lin_df.Slow = lin_df.Slow.astype("int")

lin_df.TravelTime = lin_df.TravelTime.astype("float")
lin_df.Distance = lin_df.Distance.astype("float")

In [60]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model

In [61]:
# Split the data into training/testing sets
X_train = lin_df[lin_df['TimeFrame'] < '2013-01-15']
X_test = lin_df[lin_df['TimeFrame'] >= '2013-01-15']

In [62]:
# Split the targets into training/testing sets
y_train = X_train.TravelTime
y_test = X_test.TravelTime

In [63]:
X_train.head()

Unnamed: 0,Timestamp,JourneyPatternId,TimeFrame,VehicleJourneyId,Day,Distance,TravelTime,Speed,Rain,Mon-Fri,Sat,Sun,Fast,Medium,Slow
0,2012-11-06 12:40:39,401001,2012-11-06,6320,Mon-Fri,0.0,0.0,Medium,0.0,1,0,0,0,1,0
1,2012-11-06 12:42:58,401001,2012-11-06,6320,Mon-Fri,1.288087,139.0,Medium,0.0,1,0,0,0,1,0
2,2012-11-06 12:43:41,401001,2012-11-06,6320,Mon-Fri,1.539224,182.0,Medium,0.0,1,0,0,0,1,0
3,2012-11-06 12:43:59,401001,2012-11-06,6320,Mon-Fri,1.860739,200.0,Medium,0.0,1,0,0,0,1,0
4,2012-11-06 12:44:20,401001,2012-11-06,6320,Mon-Fri,2.188079,221.0,Medium,0.0,1,0,0,0,1,0


In [64]:
# Erase columns from X which are not part of the reggression
del X_train["Timestamp"]
del X_train["JourneyPatternId"]
del X_train["TimeFrame"]
del X_train["VehicleJourneyId"]
del X_train["Day"]
del X_train["Speed"]
# del X_train["Rain"]
del X_train["TravelTime"]


del X_test["Timestamp"]
del X_test["JourneyPatternId"]
del X_test["TimeFrame"]
del X_test["VehicleJourneyId"]
del X_test["Day"]
del X_test["Speed"]
# del X_test["Rain"]
del X_test["TravelTime"]

In [65]:
X_train.head()

Unnamed: 0,Distance,Rain,Mon-Fri,Sat,Sun,Fast,Medium,Slow
0,0.0,0.0,1,0,0,0,1,0
1,1.288087,0.0,1,0,0,0,1,0
2,1.539224,0.0,1,0,0,0,1,0
3,1.860739,0.0,1,0,0,0,1,0
4,2.188079,0.0,1,0,0,0,1,0


In [66]:
# Create linear regression object
regr = linear_model.LinearRegression()

In [67]:
# Train the model using the training sets
regr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [68]:
# The coefficients
print('Coefficients: \n', regr.coef_)

Coefficients: 
 [ 205.6853305    71.96746592  101.4253776    14.15351674 -115.57889434
 -474.1360628    97.43120888  376.70485392]


In [69]:
# Custom Metric
np.mean(abs(abs(regr.predict(X_test)) - abs(y_test)))

338.33463804600444

In [70]:
print("Mean squared error: %.2f"
      % np.mean((regr.predict(X_test) - y_test) ** 2))

Mean squared error: 209754.00


In [71]:
mse = np.mean((regr.predict(X_test) - y_test) ** 2)

print("Root mean squared error:", np.sqrt(mse))

Root mean squared error: 457.989085927


In [72]:
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(X_test, y_test))

Variance score: 0.92


# Random Forest Sklearn

In [73]:
from sklearn.ensemble import RandomForestRegressor

In [74]:
# Create linear regression object
regr = RandomForestRegressor(n_estimators=15)

In [75]:
# Train the model using the training sets
regr.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=15, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [76]:
# Custom Metric
np.mean(abs(abs(regr.predict(X_test)) - abs(y_test)))

333.96737323097756

In [77]:
print("Mean squared error: %.2f"
      % np.mean((regr.predict(X_test) - y_test) ** 2))

Mean squared error: 228959.63


In [78]:
mse = np.mean((regr.predict(X_test) - y_test) ** 2)

print("Root mean squared error:", np.sqrt(mse))

Root mean squared error: 478.497261161


In [79]:
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(X_test, y_test))

Variance score: 0.91


# Sklearn Adaboost

In [80]:
from sklearn.ensemble import AdaBoostRegressor

In [81]:
# Create linear regression object
regr = AdaBoostRegressor()

In [82]:
# Train the model using the training sets
regr.fit(X_train, y_train)

AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
         n_estimators=50, random_state=None)

In [83]:
# Custom Metric
np.mean(abs(abs(regr.predict(X_test)) - abs(y_test)))

379.20607448077425

In [84]:
print("Mean squared error: %.2f"
      % np.mean((regr.predict(X_test) - y_test) ** 2))

Mean squared error: 243694.01


In [85]:
mse = np.mean((regr.predict(X_test) - y_test) ** 2)

print("Root mean squared error:", np.sqrt(mse))

Root mean squared error: 493.653735258


In [86]:
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(X_test, y_test))

Variance score: 0.91


# Sklearn KNN

In [87]:
from sklearn.neighbors import KNeighborsRegressor

In [88]:
# Create linear regression object
regr = KNeighborsRegressor()

In [89]:
# Train the model using the training sets
regr.fit(X_train, y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='uniform')

In [90]:
# Custom Metric
np.mean(abs(abs(regr.predict(X_test)) - abs(y_test)))

310.56661890269146

In [91]:
print("Mean squared error: %.2f"
      % np.mean((regr.predict(X_test) - y_test) ** 2))

Mean squared error: 196921.72


In [92]:
mse = np.mean((regr.predict(X_test) - y_test) ** 2)

print("Root mean squared error:", np.sqrt(mse))

Root mean squared error: 443.758631586


In [93]:
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(X_test, y_test))

Variance score: 0.93


# Sklearn SVM Regressor

In [325]:
from sklearn.svm import SVR

In [None]:
# Create linear regression object
regr = SVR()

In [None]:
# Train the model using the training sets
regr.fit(X_train, y_train)

In [None]:
# Custom Metric
np.mean(abs(abs(regr.predict(X_test)) - abs(y_test)))

In [None]:
print("Mean squared error: %.2f"
      % np.mean((regr.predict(X_test) - y_test) ** 2))

In [None]:
mse = np.mean((regr.predict(X_test) - y_test) ** 2)

print("Root mean squared error:", np.sqrt(mse))

In [None]:
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(X_test, y_test))

# ANN Regressor

In [94]:
from sklearn.neural_network import MLPRegressor

In [95]:
# Create linear regression object
regr = MLPRegressor()

In [96]:
# Train the model using the training sets
regr.fit(X_train, y_train)

MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [97]:
# Custom Metric
np.mean(abs(abs(regr.predict(X_test)) - abs(y_test)))

318.11698729391094

In [98]:
print("Mean squared error: %.2f"
      % np.mean((regr.predict(X_test) - y_test) ** 2))

Mean squared error: 181976.99


In [99]:
mse = np.mean((regr.predict(X_test) - y_test) ** 2)

print("Root mean squared error:", np.sqrt(mse))

Root mean squared error: 426.587614474


In [100]:
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(X_test, y_test))

Variance score: 0.93
