In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score, StratifiedKFold, learning_curve, KFold
from sklearn.preprocessing import MinMaxScaler 
from sklearn.metrics import r2_score, mean_squared_error

In [7]:
bikes_test = pd.read_excel('bike_test.xlsx')
bikes_train = pd.read_excel('bike_train.xlsx')

In [38]:
bikes_train.columns

Index(['instant', 'dteday', 'season', 'yr', 'mnth', 'hr', 'holiday', 'weekday',
       'workingday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed',
       'casual', 'registered', 'cnt'],
      dtype='object')

In [30]:
bikes_train_temp = bikes_train.copy()
bikes_train_temp = bikes_train_temp.drop(["instant", "yr", "atemp", "casual","registered","dteday"], axis=1)

In [41]:
bikes_test_temp = bikes_test.copy()
bikes_test_temp = bikes_test_temp.drop(["instant", "yr", "atemp", "dteday"], axis=1)

In [31]:
y = bikes_train_temp["cnt"]
X = bikes_train_temp.drop("cnt", axis=1)

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size= 0.2, random_state=42)

In [33]:
xg_model = XGBRegressor()
xg_scores = cross_val_score(xg_model, X_train, y_train, scoring="neg_root_mean_squared_error", cv=10)

In [34]:
rmse = np.sqrt(-xg_scores)
rmse

array([6.96128766, 6.96108762, 7.14637588, 7.11864735, 6.8257026 ,
       6.93436269, 7.26072291, 7.42015815, 7.17109995, 7.03594511])

In [35]:
np.mean(rmse).round()

7.0

In [36]:
xg_model.fit(X_train, y_train)
xg_model_prediction = xg_model.predict(X_test)

In [37]:
print('R2 Score: ', r2_score(y_test, xg_model_prediction)*100)
print('RMSE: ', np.sqrt(mean_squared_error(y_test, xg_model_prediction)))

R2 Score:  89.21969238508282
RMSE:  48.782666634006596


In [42]:
xg_model_prediction_proper = xg_model.predict(bikes_test_temp)

In [44]:
xg_model_prediction_proper

array([ 15.400438 ,   6.1870584,  11.985815 , ...,  44.316    ,
        35.13337  , -15.092947 ], dtype=float32)

In [45]:
xg_model_prediction_proper = [abs(n) for n in xg_model_prediction_proper]
xg_model_prediction_proper

[15.400438,
 6.1870584,
 11.985815,
 49.91487,
 188.0948,
 237.31346,
 344.60107,
 411.03473,
 451.46,
 528.4249,
 540.2811,
 538.7328,
 548.7326,
 469.23004,
 396.5573,
 316.74225,
 229.02817,
 140.96178,
 134.66586,
 28.329147,
 2.257413,
 11.8196535,
 9.373965,
 10.301345,
 22.278433,
 9.551431,
 10.986968,
 100.31347,
 320.5382,
 180.76984,
 92.04153,
 165.36005,
 156.67847,
 138.05795,
 148.24962,
 199.66333,
 315.1781,
 556.53174,
 563.1386,
 395.16278,
 270.43484,
 189.25029,
 140.3292,
 73.60404,
 38.487167,
 21.189787,
 11.263946,
 9.578331,
 11.750112,
 14.113189,
 64.990814,
 216.12529,
 464.61465,
 254.4979,
 150.4695,
 167.20789,
 178.36276,
 203.28395,
 181.96227,
 231.05939,
 319.984,
 612.4966,
 602.27875,
 459.4526,
 337.60562,
 236.25632,
 170.15959,
 96.28855,
 46.838802,
 24.888489,
 38.935078,
 15.498508,
 11.212352,
 13.542607,
 118.032135,
 366.2871,
 516.2155,
 302.5951,
 175.00813,
 193.3826,
 258.88867,
 259.33438,
 229.90222,
 277.31454,
 352.64606,
 603.7342

In [46]:
prototype = pd.DataFrame()
prototype['pred'] = xg_model_prediction_proper
prototype.to_csv('KevinG91.csv', index= False)