In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.subplots import make_subplots
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import recall_score, confusion_matrix, precision_score, f1_score, accuracy_score, classification_report
from sklearn import preprocessing
from imblearn.over_sampling import SMOTE
from sklearn.metrics import roc_auc_score
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.isotonic import IsotonicRegression

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
#Import train & test
test_label = pd.read_csv('/content/drive/MyDrive/test_label.csv의 사본')
train_label = pd.read_csv('/content/drive/MyDrive/train_label.csv의 사본')
test_none = pd.read_csv('/content/drive/MyDrive/train_none.csv의 사본')
train_none = pd.read_csv('/content/drive/MyDrive/test_none.csv의 사본')
test_onehot = pd.read_csv('/content/drive/MyDrive/train_onehot.csv의 사본')
train_onehot = pd.read_csv('/content/drive/MyDrive/train_onehot.csv의 사본')

# 1) LABEL ENCODING

In [None]:
X = train_label.drop(['rating', 'nickname'],axis=1)
y = train_label['rating']

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=26)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((40328, 19), (10082, 19), (40328,), (10082,))

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso, Ridge, ElasticNet

In [None]:
# random forest

rf = RandomForestRegressor()
rf.fit(x_train, y_train)
rf_predict = rf.predict(x_test)
print('RMSE :', mean_squared_error(y_test, rf_predict, squared = False))

RMSE : 122.51380580370295


In [None]:
# XGB Boost
from xgboost import XGBRegressor
xgb = XGBRegressor(n_estimators=100)
xgb.fit(x_train, y_train)
xgb_predict = xgb.predict(x_test)
print('RMSE :', mean_squared_error(y_test, xgb_predict, squared = False))

RMSE : 127.761320513539


In [None]:
# linear regression

from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(x_train, y_train)
lr_predict = lr.predict(x_test)
print('RMSE :', mean_squared_error(y_test, lr_predict, squared = False))

RMSE : 140.91990836835262


In [None]:
# elastic net
elastic = ElasticNet(alpha=0.1, l1_ratio = 0.5)
elastic.fit(x_train, y_train)
elastic_predict = elastic.predict(x_test)
print('RMSE :', mean_squared_error(y_test, elastic_predict, squared = False))

RMSE : 141.8425093353625



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.227e+08, tolerance: 2.129e+05



In [None]:
# Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(x_train, y_train)
lasso_predict = lasso.predict(x_test)
print('RMSE :', mean_squared_error(y_test, lasso_predict, squared = False))

RMSE : 141.00865051396013



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.028e+08, tolerance: 2.129e+05



In [None]:
# Ridge
ridge = Ridge(alpha=0.1)
ridge.fit(x_train, y_train)
ridge_predict = ridge.predict(x_test)
print('RMSE :', mean_squared_error(y_test, ridge_predict, squared = False))

RMSE : 140.91996768295914


In [None]:
# knn
from sklearn.neighbors import KNeighborsRegressor
knn = KNeighborsRegressor(n_neighbors=3, weights = "distance")
knn.fit(x_train, y_train)
knn_predict = knn.predict(x_test)
print('RMSE :', mean_squared_error(y_test, knn_predict, squared = False))

RMSE : 172.83783111264586


## 하이퍼파라미터 GridSearch

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [10, 50, 150],
    'max_depth': [None, 2, 3, 4, 5],
    'min_samples_split': [0.01, 0.05, 0.1],
    'max_features': ['auto', 'sqrt'],
}

In [None]:
from sklearn.model_selection import KFold

kf = KFold(random_state=30,
           n_splits=5,
           shuffle=True,
          )

In [None]:
estimator = RandomForestRegressor()
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )

# fit with (x_train, y_train)
grid_search.fit(x_train, y_train)

print('최적 하이퍼 파라미터: \n', grid_search.best_params_)

Fitting 5 folds for each of 90 candidates, totalling 450 fits
최적 하이퍼 파라미터: 
 {'max_depth': None, 'max_features': 'auto', 'min_samples_split': 0.01, 'n_estimators': 150}


In [None]:
rf = RandomForestRegressor(max_depth = None, n_estimators = 150)
rf.fit(x_train, y_train)
rf_pred = rf.predict(x_test)

print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, rf_pred))))

RMSE : 122.36020681534329


In [None]:
estimator = XGBRegressor()
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )

# fit with (x_train, y_train)
grid_search.fit(x_train, y_train)

print('최적 하이퍼 파라미터: \n', grid_search.best_params_)

Fitting 5 folds for each of 90 candidates, totalling 450 fits




90 fits failed out of a total of 450.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
90 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.8/dist-packages/xgboost/sklearn.py", line 391, in fit
    self._Booster = train(params, trainDmatrix,
  File "/usr/local/lib/python3.8/dist-packages/xgboost/training.py", line 212, in train
    return _train_internal(params, dtrain,
  File "/usr/local/lib/python3.8/dist-packages/xgboost/training.py", line 74, in _train_internal
    bst.update(dtrain, i, obj)
  File "/usr/local/lib/py

최적 하이퍼 파라미터: 
 {'max_depth': 5, 'max_features': 'auto', 'min_samples_split': 0.01, 'n_estimators': 150}


In [None]:
xgb = XGBRegressor(n_estimators = 150, max_depth = 5, min_samples_split = 0.01)
xgb.fit(x_train, y_train)
xgb_pred = xgb.predict(x_test)

print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, xgb_pred))))

RMSE : 122.26569265347361


In [None]:
estimator = ElasticNet()
param_grid = {
    'alpha': [0.1, 0.5, 1.0],
    'l1_ratio': [0.1, 0.3, 0.5, 0.7],
    'max_iter': [10, 50, 100, 500],
    'random_state' : [None],
    'tol' : [0.0001]
}
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )

# fit with (x_train, y_train)
grid_search.fit(x_train, y_train)

print('최적 하이퍼 파라미터: \n', grid_search.best_params_)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
최적 하이퍼 파라미터: 
 {'alpha': 0.1, 'l1_ratio': 0.7, 'max_iter': 50, 'random_state': None, 'tol': 0.0001}



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.159e+08, tolerance: 2.129e+05



In [None]:
elastic = ElasticNet(alpha=0.1, l1_ratio=0.7, max_iter=50)
elastic.fit(x_train, y_train)
elastic_pred = elastic.predict(x_test)

print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, elastic_pred))))

RMSE : 141.42420039304474



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.159e+08, tolerance: 2.129e+05



# 2) None 

In [None]:
train_none = train_none[train_none.T[train_none.dtypes!=np.object].index]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations



In [None]:
X = train_none.drop(['rating'],axis=1)
y = train_none['rating']

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=26)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((17890, 14), (4473, 14), (17890,), (4473,))

In [None]:
# random forest
rf = RandomForestRegressor()
rf.fit(x_train, y_train)
rf_predict = rf.predict(x_test)
print('RMSE :', mean_squared_error(y_test, rf_predict, squared = False))

ValueError: ignored

In [None]:
# XGB Boost
from xgboost import XGBRegressor
xgb = XGBRegressor(n_estimators=100)
xgb.fit(x_train, y_train)
xgb_predict = xgb.predict(x_test)
print('RMSE :', mean_squared_error(y_test, xgb_predict, squared = False))

In [None]:
# linear regression

from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(x_train, y_train)
lr_predict = lr.predict(x_test)
print('RMSE :', mean_squared_error(y_test, lr_predict, squared = False))

In [None]:
# elastic net
elastic = ElasticNet(alpha=0.1, l1_ratio = 0.5)
elastic.fit(x_train, y_train)
elastic_predict = elastic.predict(x_test)
print('RMSE :', mean_squared_error(y_test, elastic_predict, squared = False))

In [None]:
# Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(x_train, y_train)
lasso_predict = lasso.predict(x_test)
print('RMSE :', mean_squared_error(y_test, lasso_predict, squared = False))

In [None]:
# Ridge
ridge = Ridge(alpha=0.1)
ridge.fit(x_train, y_train)
ridge_predict = ridge.predict(x_test)
print('RMSE :', mean_squared_error(y_test, ridge_predict, squared = False))

In [None]:
# knn
from sklearn.neighbors import KNeighborsRegressor
knn = KNeighborsRegressor(n_neighbors=3, weights = "distance")
knn.fit(x_train, y_train)
knn_predict = knn.predict(x_test)
print('RMSE :', mean_squared_error(y_test, knn_predict, squared = False))

## GridSearch

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [10, 50, 150],
    'max_depth': [None, 2, 3, 4, 5],
    'min_samples_split': [0.01, 0.05, 0.1],
    'max_features': ['auto', 'sqrt'],
}

In [None]:
from sklearn.model_selection import KFold

kf = KFold(random_state=30,
           n_splits=5,
           shuffle=True,
          )

In [None]:
estimator = RandomForestRegressor()
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )

# fit with (x_train, y_train)
grid_search.fit(x_train, y_train)

print('최적 하이퍼 파라미터: \n', grid_search.best_params_)

In [None]:
rf = RandomForestRegressor(max_depth = None, n_estimators = 150)
rf.fit(x_train, y_train)
rf_pred = rf.predict(x_test)

print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, rf_pred))))

In [None]:
estimator = XGBRegressor()
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )

# fit with (x_train, y_train)
grid_search.fit(x_train, y_train)

print('최적 하이퍼 파라미터: \n', grid_search.best_params_)

In [None]:
xgb = XGBRegressor(n_estimators = 150, max_depth = 5)
xgb.fit(x_train, y_train)
xgb_pred = xgb.predict(x_test)

print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, xgb_pred))))

In [None]:
estimator = ElasticNet()
param_grid = {
    'alpha': [0.1, 0.5, 1.0],
    'copy_X' : True,
    'l1_ratio': [0.1, 0.3, 0.5, 0.7],
    'max_iter': [10, 50, 100, 500],
    'random_state' : None,
    'tol' : 0.0001
}
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )

# fit with (x_train, y_train)
grid_search.fit(x_train, y_train)

print('최적 하이퍼 파라미터: \n', grid_search.best_params_)

In [None]:
elastic = ElasticNet(n_estimators = 150, max_depth = 5)
elastic.fit(x_train, y_train)
elastic_pred = elastic.predict(x_test)

print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, elastic_pred))))

# ONE-HOT

In [None]:
X = train_onehot.drop(['rating', 'nickname'],axis=1)
y = train_onehot['rating']

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=26)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((40328, 30), (10082, 30), (40328,), (10082,))

In [None]:
# random forest

rf = RandomForestRegressor()
rf.fit(x_train, y_train)
rf_predict = rf.predict(x_test)
print('RMSE :', mean_squared_error(y_test, rf_predict, squared = False))

RMSE : 122.21126969952131


In [None]:
# XGB Boost
from xgboost import XGBRegressor
xgb = XGBRegressor(n_estimators=100)
xgb.fit(x_train, y_train)
xgb_predict = xgb.predict(x_test)
print('RMSE :', mean_squared_error(y_test, xgb_predict, squared = False))

RMSE : 127.63360926332824


In [None]:
# linear regression

from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(x_train, y_train)
lr_predict = lr.predict(x_test)
print('RMSE :', mean_squared_error(y_test, lr_predict, squared = False))

RMSE : 138.22667829558284


In [None]:
# elastic net
elastic = ElasticNet(alpha=0.1, l1_ratio = 0.5)
elastic.fit(x_train, y_train)
elastic_predict = elastic.predict(x_test)
print('RMSE :', mean_squared_error(y_test, elastic_predict, squared = False))

RMSE : 139.61221634742827



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.105e+08, tolerance: 2.129e+05



In [None]:
# Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(x_train, y_train)
lasso_predict = lasso.predict(x_test)
print('RMSE :', mean_squared_error(y_test, lasso_predict, squared = False))

RMSE : 138.22977221831445



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.892e+08, tolerance: 2.129e+05



In [None]:
# Ridge
ridge = Ridge(alpha=0.1)
ridge.fit(x_train, y_train)
ridge_predict = ridge.predict(x_test)
print('RMSE :', mean_squared_error(y_test, ridge_predict, squared = False))

RMSE : 138.2265611210079


In [None]:
# knn
from sklearn.neighbors import KNeighborsRegressor
knn = KNeighborsRegressor(n_neighbors=3, weights = "distance")
knn.fit(x_train, y_train)
knn_predict = knn.predict(x_test)
print('RMSE :', mean_squared_error(y_test, knn_predict, squared = False))

RMSE : 172.81476446064835


## GridSearch

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [10, 50, 150],
    'max_depth': [None, 2, 3, 4, 5],
    'min_samples_split': [0.01, 0.05, 0.1],
    'max_features': ['auto', 'sqrt'],
}

In [None]:
from sklearn.model_selection import KFold

kf = KFold(random_state=30,
           n_splits=5,
           shuffle=True,
          )

In [None]:
estimator = RandomForestRegressor()
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )

# fit with (x_train, y_train)
grid_search.fit(x_train, y_train)

print('최적 하이퍼 파라미터: \n', grid_search.best_params_)

Fitting 5 folds for each of 90 candidates, totalling 450 fits
최적 하이퍼 파라미터: 
 {'max_depth': None, 'max_features': 'auto', 'min_samples_split': 0.01, 'n_estimators': 150}


In [None]:
rf = RandomForestRegressor(max_depth = None, n_estimators = 150)
rf.fit(x_train, y_train)
rf_pred = rf.predict(x_test)

print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, rf_pred))))

RMSE : 122.11839380550339


In [None]:
estimator = XGBRegressor()
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )

# fit with (x_train, y_train)
grid_search.fit(x_train, y_train)

print('최적 하이퍼 파라미터: \n', grid_search.best_params_)

Fitting 5 folds for each of 90 candidates, totalling 450 fits




90 fits failed out of a total of 450.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
90 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.8/dist-packages/xgboost/sklearn.py", line 391, in fit
    self._Booster = train(params, trainDmatrix,
  File "/usr/local/lib/python3.8/dist-packages/xgboost/training.py", line 212, in train
    return _train_internal(params, dtrain,
  File "/usr/local/lib/python3.8/dist-packages/xgboost/training.py", line 74, in _train_internal
    bst.update(dtrain, i, obj)
  File "/usr/local/lib/py

최적 하이퍼 파라미터: 
 {'max_depth': 5, 'max_features': 'auto', 'min_samples_split': 0.01, 'n_estimators': 150}


In [None]:
xgb = XGBRegressor(n_estimators = 150, max_depth = 5, min_samples_split = 0.01)
xgb.fit(x_train, y_train)
xgb_pred = xgb.predict(x_test)

print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, xgb_pred))))

RMSE : 122.11753059844155


In [None]:
estimator = ElasticNet()
param_grid = {
    'alpha': [0.1, 0.5, 1.0],
    'copy_X' : [True],
    'l1_ratio': [0.1, 0.3, 0.5, 0.7],
    'max_iter': [10, 50, 100, 500],
    'random_state' : [None],
    'tol' : [0.0001]
}
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )

# fit with (x_train, y_train)
grid_search.fit(x_train, y_train)

print('최적 하이퍼 파라미터: \n', grid_search.best_params_)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
최적 하이퍼 파라미터: 
 {'alpha': 0.1, 'copy_X': True, 'l1_ratio': 0.7, 'max_iter': 10, 'random_state': None, 'tol': 0.0001}



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.074e+08, tolerance: 2.129e+05



In [None]:
elastic = ElasticNet(alpha=0.1, copy_X =True, l1_ratio = 0.7, max_iter = 10, random_state = None)
elastic.fit(x_train, y_train)
elastic_pred = elastic.predict(x_test)

print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, elastic_pred))))

RMSE : 139.1404861635486



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.074e+08, tolerance: 2.129e+05



* 각 단일 모델별 RMSE 비교 및 GridSearch 이후 RMSE 값을 확인한 결과, ONE-HOT 모델의 XG Boost 사용한 모델에서 가장 좋은 성능을 보임. (RMSE = 122.1)
해당 모델의 파라미터 보정을 좀 더 정밀화한 후 최종 Submission모델로 선정.

In [None]:
param_grid = {
    'n_estimators': [10, 50, 150, 200],
    'max_depth': [None, 2, 3, 4, 5, 6],
    'min_samples_split': [0.01, 0.05, 0.1],
    'max_features': ['auto', 'sqrt'],
}

In [None]:
estimator = XGBRegressor()
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )


# fit with (x_train, y_train)
grid_search.fit(x_train, y_train)

print('최적 하이퍼 파라미터: \n', grid_search.best_params_)

Fitting 5 folds for each of 144 candidates, totalling 720 fits




120 fits failed out of a total of 720.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
120 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.8/dist-packages/xgboost/sklearn.py", line 391, in fit
    self._Booster = train(params, trainDmatrix,
  File "/usr/local/lib/python3.8/dist-packages/xgboost/training.py", line 212, in train
    return _train_internal(params, dtrain,
  File "/usr/local/lib/python3.8/dist-packages/xgboost/training.py", line 74, in _train_internal
    bst.update(dtrain, i, obj)
  File "/usr/local/lib/

최적 하이퍼 파라미터: 
 {'max_depth': 6, 'max_features': 'auto', 'min_samples_split': 0.01, 'n_estimators': 200}


In [None]:
xgb_final = XGBRegressor(n_estimators = 200, max_depth = 6, min_samples_split = 0.01)
sub = xgb_final.fit(x_train, y_train)
xgb_final_pred = sub.predict(x_test)

print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, xgb_final_pred))))
print(x_test)

RMSE : 120.95863199372585
       Unnamed: 0  game_id  score  first  winner  max_overtime_minutes  \
2472         2472     3585  407.0    1.0     1.0                   1.0   
29608       29608    42730  405.0    0.0     1.0                   1.0   
11320       11320    16253  469.0    1.0     1.0                   1.0   
44566       44566    64339  354.0    1.0     0.0                   1.0   
11884       11884    17090  524.0    0.0     0.0                   1.0   
...           ...      ...    ...    ...     ...                   ...   
29203       29203    42139  416.0    1.0     1.0                   1.0   
12090       12090    17405  473.0    1.0     1.0                   1.0   
48872       48872    70488  439.0    1.0     1.0                   1.0   
20142       20142    29061  383.0    0.0     0.0                   1.0   
45065       45065    65050  382.0    0.0     1.0                  10.0   

       opp_rating  opp_score  bingo  turn_type_Six-Zero Rule_sum  points_mean  \
2472

최종 모델의 RMSE : 121.1로 최솟값 산출

# Submission

In [None]:
test_label = pd.read_csv('/content/drive/MyDrive/test_label.csv의 사본')
test_label

In [None]:
test_label.drop(['rating', 'nickname'],axis=1, inplace=True)
test = pd.read_csv('/content/drive/MyDrive/test.csv의 사본')

In [None]:
test = test.fillna(-1)

In [None]:
test

In [None]:
a = test['rating']==-1
test[a]

In [None]:
final_test = test[a]

In [None]:
preds = []
preds.append(sub.predict(test_label))
preds

[array([1987.4417, 1937.7216, 1585.4785, ..., 1557.651 , 2066.2122,
        1973.1161], dtype=float32)]

In [None]:
len(final_test['rating'])

22363

In [None]:
final_test

Unnamed: 0,game_id,nickname,score,rating
0,2,Super,488,-1.0
3,7,Priya1,379,-1.0
5,11,TileRunner,462,-1.0
7,14,Anfield223,359,-1.0
8,27,friesbasil,456,-1.0
...,...,...,...,...
44717,72760,thams,494,-1.0
44718,72761,hulkbuster,232,-1.0
44721,72762,kyjo55555,367,-1.0
44723,72768,Maximilian,357,-1.0


In [None]:
print(preds)

[array([1987.4417, 1937.7216, 1585.4785, ..., 1557.651 , 2066.2122,
       1973.1161], dtype=float32)]


In [None]:
final_test['rating'] = preds[0]

In [None]:
final_test['rating']

0        1987.441650
3        1937.721558
5        1585.478516
7        1914.133789
8        2051.453125
            ...     
44717    1977.883545
44718    1676.563232
44721    1557.651001
44723    2066.212158
44725    1973.116089
Name: rating, Length: 22363, dtype: float32

In [None]:
final_test

Unnamed: 0,game_id,nickname,score,rating
0,2,Super,488,1987.441650
3,7,Priya1,379,1937.721558
5,11,TileRunner,462,1585.478516
7,14,Anfield223,359,1914.133789
8,27,friesbasil,456,2051.453125
...,...,...,...,...
44717,72760,thams,494,1977.883545
44718,72761,hulkbuster,232,1676.563232
44721,72762,kyjo55555,367,1557.651001
44723,72768,Maximilian,357,2066.212158


In [None]:
submission2 = final_test[['game_id','rating']]
submission2

Unnamed: 0,game_id,rating
0,2,1987.441650
3,7,1937.721558
5,11,1585.478516
7,14,1914.133789
8,27,2051.453125
...,...,...
44717,72760,1977.883545
44718,72761,1676.563232
44721,72762,1557.651001
44723,72768,2066.212158


In [None]:
submission2.to_csv("submission2.csv")