In [1]:
from feature.make_dataset import make_data
from feature.after_dataset import accumulate
from feature.moving_average import moving_average
import pandas as pd
from glob import glob
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import glob
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import LinearRegression
import warnings
warnings.filterwarnings('ignore')


In [2]:
train_input_list = './data/train_input/*.csv'
train_target_list = './data/train_target/*.csv'

test_input_list = './data/test_input/*.csv'
test_target_list = './data/test_target/*.csv'

In [3]:
train, test, p_train, p_test = make_data(train_input_list, train_target_list, test_input_list, test_target_list)
train_rol = moving_average(train_input_list)
test_rol = moving_average(test_input_list)

In [4]:
train = train.reset_index()
test = test.reset_index()
train_rol = train_rol.reset_index()
test_rol = test_rol.reset_index()

In [5]:
accumulate(train, test, '누적분무량')
accumulate(train, test, '누적백색광량')
accumulate(train, test, '누적청색광량')
accumulate(train, test, '누적적색광량')
accumulate(train, test, '누적총광량')

In [6]:
train = pd.concat([train, train_rol], axis=1)
test = pd.concat([test, test_rol], axis=1)

In [7]:
drop_feature = ['시간당분무량', '시간당백색광량', '시간당적색광량', '시간당청색광량', '시간당총광량', 'index']
train = train.drop(drop_feature, axis=1)
test = test.drop(drop_feature, axis=1)


In [8]:
xgb = XGBRegressor(tree_method='gpu_hist', gpu_id=0, random_state=404)
cat = CatBoostRegressor(verbose=500, early_stopping_rounds=50, task_type="GPU", random_seed=113)
rf = RandomForestRegressor()
kn = KNeighborsRegressor()
lgbm = LGBMRegressor()

voting = VotingRegressor([('xgb', xgb), ('cat', cat), ('rf', rf), ('kn', kn)])

In [9]:
X = train.drop(['predicted_weight_g', 'Case', 'obs_time'], axis=1)
y = train['predicted_weight_g']
from sklearn.model_selection import StratifiedKFold, KFold

skf = KFold(n_splits=5, shuffle=True, random_state=404)

folds = []

for train_idx, val_idx in skf.split(X, y):
    folds.append((train_idx, val_idx))

voting_model = {}

for f in range(5):
    print(f'===================================={f+1}============================================')
    train_idx, val_idx = folds[f]

    x_train, x_val, y_train, y_val = X.iloc[train_idx], X.iloc[val_idx], y.iloc[train_idx], y.iloc[val_idx]

    voting = VotingRegressor([('xgb', xgb), ('cat', cat), ('rf', rf), ('kn', kn)])
    voting.fit(x_train, y_train)

    y_pred = voting.predict(x_val)
    rmse = mean_squared_error(y_val, y_pred)**0.5
    print(f"{f + 1} Fold RMSE = {rmse}")
    voting_model[f] = voting
    print(f'================================================================================\n\n')


submit = test[['DAT', 'predicted_weight_g']]
x_test = test.drop(['predicted_weight_g', 'Case', 'obs_time'], axis=1)
for fold in range(5):
    submit['predicted_weight_g'] += voting_model[fold].predict(x_test)/5
all_target_list = sorted(glob.glob('./data/test_target/*.csv'))
for idx, test_path in enumerate(all_target_list):
    submit_df = pd.read_csv(test_path)
    submit_df['predicted_weight_g'] = submit['predicted_weight_g'][idx*28:idx*28+28].values
    submit_df.to_csv(test_path, index=False)

Learning rate set to 0.037997
0:	learn: 41.5974912	total: 10.3ms	remaining: 10.2s
500:	learn: 3.9113519	total: 4.86s	remaining: 4.84s
999:	learn: 3.0885759	total: 9.66s	remaining: 0us
1 Fold RMSE = 8.924650964169036


Learning rate set to 0.037997
0:	learn: 39.9853791	total: 10.5ms	remaining: 10.5s
500:	learn: 5.0531466	total: 4.93s	remaining: 4.91s
999:	learn: 4.2513023	total: 9.78s	remaining: 0us
2 Fold RMSE = 9.462742354908134


Learning rate set to 0.037997
0:	learn: 41.1704116	total: 10.6ms	remaining: 10.5s
500:	learn: 4.6822179	total: 4.97s	remaining: 4.95s
999:	learn: 3.8146348	total: 9.65s	remaining: 0us
3 Fold RMSE = 7.0669150296425975


Learning rate set to 0.037997
0:	learn: 39.9624949	total: 10.4ms	remaining: 10.4s
500:	learn: 4.8263535	total: 4.78s	remaining: 4.76s
999:	learn: 3.6533570	total: 9.56s	remaining: 0us
4 Fold RMSE = 8.185900491774184


Learning rate set to 0.038005
0:	learn: 39.2371234	total: 10.2ms	remaining: 10.2s
500:	learn: 4.1048988	total: 4.79s	remaining: