In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import os
from sklearn.metrics import mutual_info_score

import warnings
warnings.filterwarnings("ignore")

import xgboost as xgb
import lightgbm as lgb

def load_dataset():
    [t1, t2, t3, t4] = [pd.read_csv(f'../data/train_{i}.csv', parse_dates=["时间"]) for i in range(1, 5)]
    [p1, p2, p3, p4] = [pd.read_csv(f'../data/test_{i}.csv', parse_dates=["时间"]) for i in range(1, 5)]
    [t1, t2, t3, t4] = [t.drop_duplicates().reset_index(drop=True) for t in [t1, t2, t3, t4]]
    return [t1, t2, t3, t4], [p1, p2, p3, p4]

[t1, t2, t3, t4], [p1, p2, p3, p4] = load_dataset()

In [33]:
def get_x(t):
    t['month_cat'] = t["时间"].dt.month
    t['hour_cat'] = t["时间"].dt.month
    t['irr'] = t["辐照度"]
    t['ws'] = t["风速"]
    t['wd_cat'] = pd.cut(t1["风向"], bins=list(range(0, 360, 30)))
    t['pr'] = t["压强"]
    t['hm'] = t["湿度"]
    return t.loc[:, ['month_cat', 'hour_cat', 'irr', 'ws', 'wd_cat', 'pr', 'hm']]

x = get_x(t1)
y = t1["实际功率"].values

In [39]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, GroupKFold, KFold

np.random.seed(42)
kf = KFold(n_splits=4).split(X=x, y=y)

param_grid = {
    'num_leaves': [31, 127],
    'feature_fraction': [0.5, 1.0],
    'bagging_fraction': [0.75, 0.95], 
    'reg_alpha': [0.1, 0.5],
    'num_boost_round': [50, 200, 400]
}

lgb_estimator = lgb.LGBMRegressor(boosting_type='gbdt',
                                  objective='regression',
                                  bagging_freq=5,
                                  learning_rate=0.01,
                                  categorical_feature=[0, 1, 4])#,
#                                   early_stopping_rounds=5) # REMOVING THIS ARGUMENT MAKES THE CODE RUN OKAY

gsearch = GridSearchCV(estimator=lgb_estimator, 
                       param_grid=param_grid, 
                       cv=kf) 

lgb_model = gsearch.fit(X=x, y=y)

print(lgb_model.best_params_, lgb_model.best_score_)

KeyboardInterrupt: 