In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('/content/sample_data/KBO increased data_over 30 AB.csv')

In [3]:
R = df["R"].to_numpy()
R = R.reshape(-1, 1)

RBI = df["RBI"].to_numpy()
RBI = RBI.reshape(-1, 1)

WAR = df["WAR"].to_numpy()
WAR = WAR.reshape(-1, 1)

df = np.concatenate((R, RBI, WAR), axis=1)

# Split train set and test set
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)

In [4]:
X_train = train_set[:, 0:2]
y_train = train_set[:, 2]
X_test = test_set[:, 0:2]
y_test = test_set[:, 2]

In [13]:
from sklearn.ensemble import VotingRegressor, RandomForestRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


models = [
    ('linreg', LinearRegression()),
    ('ridge', Ridge(alpha=0.0001)),
    ('lasso', Lasso(alpha=0.0001)),
    ('elasticnet', ElasticNet(alpha=0.0001, l1_ratio=0.0001)),
    ('SVR', SVR(kernel='poly', C=0.012888951526111539, degree=3, epsilon=0.07498719388057506)),
    ('RandomForest', RandomForestRegressor(max_depth=6, n_estimators=500, random_state=42))
]

voting_reg = VotingRegressor(models)

pipeline = make_pipeline(StandardScaler(), voting_reg)
pipeline.fit(X_train, y_train)

In [14]:
from sklearn.metrics import root_mean_squared_error
y_pred = pipeline.predict(X_train)
rmse = root_mean_squared_error(y_train, y_pred)
rmse

0.8003500715355797

In [15]:
y_pred_test = pipeline.predict(X_test)
rmse_test = root_mean_squared_error(y_test, y_pred_test)
rmse_test

0.6216597086898991

In [16]:
# Hwang WAR(-0.30) Prediction
hwang = pipeline.predict([[60, 58]])
hwang

array([1.64157623])

In [17]:
# Rojas WAR(6.50) Prediction
Rojas = pipeline.predict([[108, 112]])
Rojas

array([5.02383852])

In [18]:
# Kim Do Yeong WAR(8.32) Prediction
DO_YEONG = pipeline.predict([[143, 109]])
DO_YEONG

array([7.12680487])

In [19]:
# Choi Jung WAR(4.55) Prediction
Choi_Jung = pipeline.predict([[93, 107]])
Choi_Jung

array([4.04615664])

In [20]:
# Kang Baek Ho WAR(2.12) Prediction
BaekHo = pipeline.predict([[92, 96]])
BaekHo

array([3.4943622])