In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error

from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('wc-wo-outliers.csv')

In [3]:
y = df['results']
X = df.drop(columns=['results'])

X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.20, random_state=1)

In [4]:
n_samples, n_features = 10, 5

# Always scale the input. The most convenient way is to use a pipeline.
reg = make_pipeline(StandardScaler(),
                    SGDRegressor(max_iter=1000, tol=1e-3))

In [5]:
reg.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('sgdregressor', SGDRegressor())])

In [6]:
reg.score(X_test, y_test, sample_weight=None)

0.22127707138424924

In [7]:
pred_train= reg.predict(X_train)
predictions = reg.predict(X_test)

In [8]:
print('Stochastic Gradient Descent Regression')
print('RMSE: '+str(np.sqrt(mean_squared_error(y_train,pred_train))))
print('R Squared: '+str(r2_score(y_train, pred_train)))
print('MSE: '+str(mean_squared_error(y_train, pred_train)))
print('MAE: '+str(mean_absolute_error(y_train, pred_train)))
print('MAPE: '+str(mean_absolute_percentage_error(y_train, pred_train)))

print('--------------------')
print('RMSE: '+str(np.sqrt(mean_squared_error(y_test,predictions))))
print('R Squared: '+str(r2_score(y_test, predictions)))
print('MSE: '+str(mean_squared_error(y_test, predictions)))
print('MAE: '+str(mean_absolute_error(y_test, predictions)))
print('MAPE: '+str(mean_absolute_percentage_error(y_test, predictions)))

Stochastic Gradient Descent Regression
RMSE: 6.375387212025714
R Squared: 0.49622651760551373
MSE: 40.645562103261014
MAE: 5.2048301220510425
MAPE: 0.8477046414296767
--------------------
RMSE: 7.5633978036404415
R Squared: 0.22127707138424924
MSE: 57.20498633611305
MAE: 6.364266400748778
MAPE: 0.8947413432409581
