# 7 - Modeling 2

In [100]:
import pandas as pd
train = pd.read_pickle('../pickles/final/round_2/train_final')
test = pd.read_pickle('../pickles/final/round_2/test_final')

from xgboost import XGBRegressor
from sklearn.metrics import root_mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

In [101]:
train_col = train.select_dtypes(include='object').columns.tolist()
train[train_col] = train[train_col].astype(int)

test_col = test.select_dtypes(include='object').columns.tolist()
test[test_col] = test[test_col].astype(int)

### Hyperparameter Tuning

In [102]:
boost = XGBRegressor(objective='reg:squarederror',random_state=42)

x_train = train.drop('SalePrice',axis=1)
y_train = train['SalePrice']

In [103]:
params = {
    'alpha': [5,10,15],
    'lambda': [35,40,45],
    'max_depth': [2,3,4],
    'learning_rate': [0.01,0.1,1],
    'n_estimators': [550,555,560]}

In [104]:
search = GridSearchCV(boost, params, scoring='neg_root_mean_squared_error',n_jobs=-1)
result = search.fit(x_train,y_train)
print('Best Score: %s' % (result.best_score_*-1))
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: 23739.430931715575
Best Hyperparameters: {'alpha': 10, 'lambda': 40, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 555}


### The Model

In [105]:
boost_best = XGBRegressor(reg_alpha=10,reg_lambda=40,learning_rate=0.1,max_depth=3,n_estimators=555,objective='reg:squarederror',random_state=42)
boost_best.fit(x_train,y_train)

train_pred = boost_best.predict(x_train)

In [106]:
rmse = root_mean_squared_error(y_train, train_pred)
r2 = r2_score(y_train, train_pred)

print(f'Root Mean Squared Error: {rmse} \nR-Squared: {r2}')

Root Mean Squared Error: 12062.933442857448 
R-Squared: 0.9751548171043396


### Submission Table

In [107]:
for_cols = pd.read_pickle('../pickles/cleaned/test_cleaned')

preds = boost_best.predict(test_drops)

submit = pd.DataFrame(data=for_cols['Id'])
submit['SalePrice'] = preds.tolist()
submit.head()

Unnamed: 0,Id,SalePrice
0,1461,132092.609375
1,1462,172892.46875
2,1463,178402.984375
3,1464,200319.296875
4,1465,179419.921875


In [108]:
submit.to_csv('../data/submission2.csv',index=False)