## 1. Data & Library

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import lightgbm as lgb
from bayes_opt import BayesianOptimization
from sklearn.metrics import roc_auc_score, mean_squared_error
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action='ignore') 


# 파일 불러오기
train = pd.read_csv('C:/Users/Kim/Downloads/AIFrenz_Season1_dataset/train.csv',index_col = "id")
test = pd.read_csv('C:/Users/Kim/Downloads/AIFrenz_Season1_dataset/test.csv')

In [2]:
#일사량 x14 , x16, x19 제거
# def remove_sensor(df):
#     return df.drop(df.columns[df.max()==df.min()],axis=1,inplace=True)

# remove_sensor(train)
# remove_sensor(test)

In [3]:
#X feature별 해당하는 기상 관측 이름 설정

temp = train[["X00","X07","X28","X31","X32"]]# 기온
l_atm = train[["X01","X06","X22","X27","X29"]] # 현지기압
wind_s= train[["X02","X03","X18","X24","X26"]] # 풍속
water = train[["X04","X10","X21","X36","X39"]] #일일 누적강수량
sea_atm  = train[["X05","X08","X09","X23","X33"]] #해면기압
sun      = train[["X11","X34","X14","X16","X19" ]] #일일 누적일사량 (3개 센서 삭제)
humid = train[["X12","X20","X30","X37","X38"]]# 습도
wind_d  = train[["X13","X15","X17","X25","X35"]] #풍향

## 2. Data Cleaning & pre-processing

## 3. EDA 

In [4]:
train.shape #(4752, 59) : X00-X40 + Y00-Y17 +Y18

(4752, 59)

In [5]:
test.shape # (11520, 40) : X00-X40

(11520, 41)

In [6]:
#pd.merge(temp, wind_s, left_index=True, right_index=True).corr() 
# 같은 지점 연결
#습도    기온
#X20(o)  X00(b)
#X12(b)  X31(r)
#X37(r)  X07(o)
#X30(g)  X32(p)
#X38(p)  X28(g) 

In [7]:
import random

random.seed(777)
np.random.seed(777)
# tf.set_random_seed(777)

In [8]:
# Y18 결측값에 Y평균 넣어주기
tr = train["Y18"].isna()
null_index = tr[tr==True].index
train.loc[null_index,"Y18"] = train.loc[null_index,["Y09","Y15","Y16"]].mean(axis=1)

In [9]:
x_train = train.loc[:,"X00":"X39"]
y_train = train["Y18"]

In [10]:
# Apparent Temperature
at = pd.DataFrame((13.12 + 0.6215*temp.values-11.37*(wind_s**0.16).values+0.3965*(wind_s**0.16).values*temp.values),columns=["AT01","AT02","AT03","AT04","AT05"])
x_train=pd.concat([x_train,at],axis=1)

In [11]:
x_train

Unnamed: 0,X00,X01,X02,X03,X04,X05,X06,X07,X08,X09,...,X35,X36,X37,X38,X39,AT01,AT02,AT03,AT04,AT05
0,9.7,988.8,1.2,0.6,0.0,1009.3,989.6,12.2,1009.9,1009.8,...,256.4,0.0,77.2,62.6,0.0,11.401883,14.682294,15.409096,9.972844,13.390804
1,9.3,988.9,1.7,1.9,0.0,1009.3,989.6,12.1,1010.0,1009.9,...,215.4,0.0,77.3,63.5,0.0,10.536659,13.356966,14.964047,18.278450,19.521450
2,9.4,989.0,1.1,2.3,0.0,1009.2,989.7,12.1,1010.1,1010.1,...,235.2,0.0,77.3,63.9,0.0,11.201755,13.130890,14.583606,18.092000,19.148550
3,9.4,988.9,1.5,0.7,0.0,1009.2,989.6,12.0,1010.0,1010.0,...,214.0,0.0,77.5,64.5,0.0,10.806935,14.332769,16.093195,17.905550,13.054338
4,9.2,988.9,0.8,1.7,0.0,1009.2,989.7,12.0,1010.1,1010.0,...,174.9,0.0,78.0,65.0,0.0,11.386442,13.380119,20.578000,17.719100,18.962100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4747,19.9,987.6,0.9,0.8,0.0,1006.9,987.7,21.7,1007.5,1007.4,...,218.6,0.0,82.3,58.6,0.5,22.066367,23.937611,26.010718,24.120550,21.081409
4748,19.9,987.6,0.5,0.7,0.0,1006.8,987.7,21.6,1007.5,1007.4,...,161.7,0.0,82.5,59.1,0.5,22.373476,23.894427,25.774800,24.120550,21.134522
4749,19.7,987.7,0.9,0.6,0.0,1006.9,987.6,21.4,1007.4,1007.5,...,254.2,0.0,83.0,58.9,0.5,21.864093,23.761610,25.932751,24.182700,22.148583
4750,19.4,987.7,0.9,0.8,0.0,1006.9,987.8,21.3,1007.6,1007.5,...,300.0,0.0,83.2,59.8,0.5,21.560681,23.535973,25.636687,24.120550,22.543034


In [12]:
#불쾌지수
def discomfort_index(tem_v,hum_v):
    df=pd.DataFrame((9/5)*tem_v-0.55*(1-hum_v/100)*((9/5)*tem_v-26)+32) 
    return df

x_train["DI01"]=discomfort_index(temp.X00,humid.X20)
x_train["DI02"]=discomfort_index(temp.X31,humid.X12)
x_train["DI03"]=discomfort_index(temp.X07,humid.X37)
x_train["DI04"]=discomfort_index(temp.X32,humid.X30)
x_train["DI05"]=discomfort_index(temp.X28,humid.X38)
x_train

Unnamed: 0,X00,X01,X02,X03,X04,X05,X06,X07,X08,X09,...,AT01,AT02,AT03,AT04,AT05,DI01,DI02,DI03,DI04,DI05
0,9.7,988.8,1.2,0.6,0.0,1009.3,989.6,12.2,1009.9,1009.8,...,11.401883,14.682294,15.409096,9.972844,13.390804,49.882730,47.848032,54.466616,52.405463,54.934002
1,9.3,988.9,1.7,1.9,0.0,1009.3,989.6,12.1,1010.0,1009.9,...,10.536659,13.356966,14.964047,18.278450,19.521450,49.152533,48.083604,54.306867,51.758591,54.627165
2,9.4,989.0,1.1,2.3,0.0,1009.2,989.7,12.1,1010.1,1010.1,...,11.201755,13.130890,14.583606,18.092000,19.148550,49.254598,47.286820,54.306867,50.798645,54.617881
3,9.4,988.9,1.5,0.7,0.0,1009.2,989.6,12.0,1010.0,1010.0,...,10.806935,14.332769,16.093195,17.905550,13.054338,49.249604,46.681271,54.144500,50.258392,54.603955
4,9.2,988.9,0.8,1.7,0.0,1009.2,989.7,12.0,1010.1,1010.0,...,11.386442,13.380119,20.578000,17.719100,18.962100,48.866328,46.094114,54.132400,50.203458,54.447000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4747,19.9,987.6,0.9,0.8,0.0,1006.9,987.7,21.7,1007.5,1007.4,...,22.066367,23.937611,26.010718,24.120550,21.081409,67.263697,63.708519,69.788609,65.914491,71.005532
4748,19.9,987.6,0.5,0.7,0.0,1006.8,987.7,21.6,1007.5,1007.4,...,22.373476,23.894427,25.774800,24.120550,21.134522,67.285301,63.718188,69.640300,66.150404,70.772824
4749,19.7,987.7,0.9,0.6,0.0,1006.9,987.6,21.4,1007.4,1007.5,...,21.864093,23.761610,25.932751,24.182700,22.148583,66.981324,63.903798,69.349380,66.098616,70.754696
4750,19.4,987.7,0.9,0.8,0.0,1006.9,987.8,21.3,1007.6,1007.5,...,21.560681,23.535973,25.636687,24.120550,22.543034,66.502990,63.727857,69.199784,66.264107,70.415666


In [13]:
#이슬점
def dewpoint(tem_v,hum_v):
    df2=pd.DataFrame((237.7 *((17.27 * tem_v /(237.7 + tem_v)) + np.log(hum_v / 100.0))) / (17.27-((17.27 * tem_v /(237.7 + tem_v)) + np.log(hum_v/ 100.0))))
    return df2

x_train["DP01"]=dewpoint(temp.X00,humid.X20)
x_train["DP02"]=dewpoint(temp.X31,humid.X12)
x_train["DP03"]=dewpoint(temp.X07,humid.X37)
x_train["DP04"]=dewpoint(temp.X32,humid.X30)
x_train["DP05"]=dewpoint(temp.X28,humid.X38)
x_train 
                                   

Unnamed: 0,X00,X01,X02,X03,X04,X05,X06,X07,X08,X09,...,DI01,DI02,DI03,DI04,DI05,DP01,DP02,DP03,DP04,DP05
0,9.7,988.8,1.2,0.6,0.0,1009.3,989.6,12.2,1009.9,1009.8,...,49.882730,47.848032,54.466616,52.405463,54.934002,8.301774,5.381230,8.324408,5.265926,5.366306
1,9.3,988.9,1.7,1.9,0.0,1009.3,989.6,12.1,1010.0,1009.9,...,49.152533,48.083604,54.306867,51.758591,54.627165,8.050982,5.267802,8.246558,5.130281,5.382539
2,9.4,989.0,1.1,2.3,0.0,1009.2,989.7,12.1,1010.1,1010.1,...,49.254598,47.286820,54.306867,50.798645,54.617881,8.372784,5.818657,8.246558,4.797223,5.472960
3,9.4,988.9,1.5,0.7,0.0,1009.2,989.6,12.0,1010.0,1010.0,...,49.249604,46.681271,54.144500,50.258392,54.603955,8.388586,5.789588,8.187671,4.845261,5.607661
4,9.2,988.9,0.8,1.7,0.0,1009.2,989.7,12.0,1010.1,1010.0,...,48.866328,46.094114,54.132400,50.203458,54.447000,8.300243,5.690195,8.282423,5.059198,5.624113
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4747,19.9,987.6,0.9,0.8,0.0,1006.9,987.7,21.7,1007.5,1007.4,...,67.263697,63.708519,69.788609,65.914491,71.005532,18.154800,16.937340,18.545779,17.400888,15.190499
4748,19.9,987.6,0.5,0.7,0.0,1006.8,987.7,21.6,1007.5,1007.4,...,67.285301,63.718188,69.640300,66.150404,70.772824,18.225772,16.986994,18.486996,17.744963,15.135684
4749,19.7,987.7,0.9,0.6,0.0,1006.9,987.6,21.4,1007.4,1007.5,...,66.981324,63.903798,69.349380,66.098616,70.754696,18.151712,17.135992,18.388261,17.552434,15.082908
4750,19.4,987.7,0.9,0.8,0.0,1006.9,987.8,21.3,1007.6,1007.5,...,66.502990,63.727857,69.199784,66.264107,70.415666,17.977544,17.036511,18.328997,17.633537,15.037881


In [14]:
#포화수증기량
def vapor_pressure(tem_v):
    df3=pd.DataFrame(611* np.exp((17.27*tem_v)/(237.7+tem_v))/1000)
    return df3

x_train["VP01"]=vapor_pressure(temp.X00)
x_train["VP02"]=vapor_pressure(temp.X31)
x_train["VP03"]=vapor_pressure(temp.X07)
x_train["VP04"]=vapor_pressure(temp.X32)
x_train["VP05"]=vapor_pressure(temp.X28)
x_train

Unnamed: 0,X00,X01,X02,X03,X04,X05,X06,X07,X08,X09,...,DP01,DP02,DP03,DP04,DP05,VP01,VP02,VP03,VP04,VP05
0,9.7,988.8,1.2,0.6,0.0,1009.3,989.6,12.2,1009.9,1009.8,...,8.301774,5.381230,8.324408,5.265926,5.366306,1.202569,1.086805,1.419713,1.285643,1.429073
1,9.3,988.9,1.7,1.9,0.0,1009.3,989.6,12.1,1010.0,1009.9,...,8.050982,5.267802,8.246558,5.130281,5.382539,1.170685,1.094206,1.410408,1.251827,1.410408
2,9.4,989.0,1.1,2.3,0.0,1009.2,989.7,12.1,1010.1,1010.1,...,8.372784,5.818657,8.246558,4.797223,5.472960,1.178585,1.072137,1.410408,1.202569,1.410408
3,9.4,988.9,1.5,0.7,0.0,1009.2,989.6,12.0,1010.0,1010.0,...,8.388586,5.789588,8.187671,4.845261,5.607661,1.178585,1.050460,1.401156,1.178585,1.410408
4,9.2,988.9,0.8,1.7,0.0,1009.2,989.7,12.0,1010.1,1010.0,...,8.300243,5.690195,8.282423,5.059198,5.624113,1.162831,1.029171,1.401156,1.178585,1.401156
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4747,19.9,987.6,0.9,0.8,0.0,1006.9,987.7,21.7,1007.5,1007.4,...,18.154800,16.937340,18.545779,17.400888,15.190499,2.319788,2.022239,2.591032,2.207436,2.942157
4748,19.9,987.6,0.5,0.7,0.0,1006.8,987.7,21.6,1007.5,1007.4,...,18.225772,16.986994,18.486996,17.744963,15.135684,2.319788,2.022239,2.575267,2.221214,2.907017
4749,19.7,987.7,0.9,0.6,0.0,1006.9,987.6,21.4,1007.4,1007.5,...,18.151712,17.135992,18.388261,17.552434,15.082908,2.291242,2.035001,2.543988,2.221214,2.907017
4750,19.4,987.7,0.9,0.8,0.0,1006.9,987.8,21.3,1007.6,1007.5,...,17.977544,17.036511,18.328997,17.633537,15.037881,2.248997,2.022239,2.528474,2.235068,2.854994


In [15]:
#실제 강수량
def water_diff(c):
    i=0
    diff_list=[]
    diff_list.append(water.loc[:0,c][0])
    for i in range(len(water)-1):        
        diff=(water.loc[i+1:i+1,c][i+1])-(water.loc[i:i,c][i])
        diff_list.append(round(diff,1))
    df=pd.DataFrame(diff_list)
    return df
    

x_train["diff_X04"]=water_diff("X04")
x_train["diff_X10"]=water_diff("X10")
x_train["diff_X21"]=water_diff("X21")
x_train["diff_X36"]=water_diff("X36")
x_train["diff_X39"]=water_diff("X39")
x_train

Unnamed: 0,X00,X01,X02,X03,X04,X05,X06,X07,X08,X09,...,VP01,VP02,VP03,VP04,VP05,diff_X04,diff_X10,diff_X21,diff_X36,diff_X39
0,9.7,988.8,1.2,0.6,0.0,1009.3,989.6,12.2,1009.9,1009.8,...,1.202569,1.086805,1.419713,1.285643,1.429073,0.0,0.0,0.0,0.0,0.0
1,9.3,988.9,1.7,1.9,0.0,1009.3,989.6,12.1,1010.0,1009.9,...,1.170685,1.094206,1.410408,1.251827,1.410408,0.0,0.0,0.0,0.0,0.0
2,9.4,989.0,1.1,2.3,0.0,1009.2,989.7,12.1,1010.1,1010.1,...,1.178585,1.072137,1.410408,1.202569,1.410408,0.0,0.0,0.0,0.0,0.0
3,9.4,988.9,1.5,0.7,0.0,1009.2,989.6,12.0,1010.0,1010.0,...,1.178585,1.050460,1.401156,1.178585,1.410408,0.0,0.0,0.0,0.0,0.0
4,9.2,988.9,0.8,1.7,0.0,1009.2,989.7,12.0,1010.1,1010.0,...,1.162831,1.029171,1.401156,1.178585,1.401156,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4747,19.9,987.6,0.9,0.8,0.0,1006.9,987.7,21.7,1007.5,1007.4,...,2.319788,2.022239,2.591032,2.207436,2.942157,0.0,0.0,0.0,0.0,0.0
4748,19.9,987.6,0.5,0.7,0.0,1006.8,987.7,21.6,1007.5,1007.4,...,2.319788,2.022239,2.575267,2.221214,2.907017,0.0,0.0,0.0,0.0,0.0
4749,19.7,987.7,0.9,0.6,0.0,1006.9,987.6,21.4,1007.4,1007.5,...,2.291242,2.035001,2.543988,2.221214,2.907017,0.0,0.0,0.0,0.0,0.0
4750,19.4,987.7,0.9,0.8,0.0,1006.9,987.8,21.3,1007.6,1007.5,...,2.248997,2.022239,2.528474,2.235068,2.854994,0.0,0.0,0.0,0.0,0.0


In [None]:
#기온, 습도, 풍속 , 현지기압 diff
# x_train["temp_diff"]=temp.max(axis=1)-temp.min(axis=1)
# x_train["humid_diff"]=humid.max(axis=1)-humid.min(axis=1)
# x_train["wind_s_diff"]=wind_s.max(axis=1)-wind_s.min(axis=1)
# x_train["local_diff"]=l_atm.max(axis=1)-l_atm.min(axis=1)

In [None]:
# def bayes_parameter_opt_lgb(X, y, init_round=15, opt_round=25, n_folds=5, random_state=777, seed = 101, num_iterations = 200):
#     # prepare data
#     train_data = lgb.Dataset(data=x_train, label=y_train,free_raw_data=False)
#     # parameters

#     def lgb_eval(feature_fraction, num_leaves, max_depth , min_split_gain, min_child_weight, min_data_in_leaf, bagging_fraction, bagging_freq):
#         params = {
#             "objective" : "regression",
#             'num_iterations': num_iterations,
#             "min_child_samples": 20,
#             "reg_alpha": 1, 
#             "reg_lambda": 1,
#             'early_stopping_rounds':10,
#             "learning_rate" : 0.01, "subsample" : 0.8,  "metric" : 'l1'
#         } # Default parameters
#         params['feature_fraction'] = max(min(feature_fraction, 1), 0)
#         params['max_depth'] = int(round(max_depth))
#         params['num_leaves'] = int(round(num_leaves))
#         params['min_split_gain'] = min_split_gain
#         params['min_child_weight'] = min_child_weight
#         params['min_data_in_leaf'] = int(min_data_in_leaf)
#         params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
#         params['bagging_freq']= int(bagging_freq)
#         cv_result = lgb.cv(params, train_data, nfold=n_folds, seed=seed, verbose_eval =None, metrics=['l1'], stratified=False)
#         return -np.min(cv_result['l1-mean'])
    
#     #Range of hyperparameters & Surrogate model
#     lgbBO = BayesianOptimization(lgb_eval, {'feature_fraction' : (0.1, 0.9),
#                                             'max_depth': (5, 15),
#                                             'num_leaves' : (50,500),
#                                             'min_split_gain': (0.001, 0.1),
#                                             'min_child_weight': (5, 50),
#                                             'min_data_in_leaf' : (100,500),
#                                             'bagging_fraction': (0.5, 0.9),
#                                             'bagging_freq':(1,10)}, random_state=random_state)
    
#     # optimize
#     lgbBO.maximize(init_points=init_round, n_iter=opt_round)

#         # return best parameters
#     return lgbBO.max['params']
          
# opt_params = bayes_parameter_opt_lgb(x_train, y_train, init_round=5, opt_round=20, n_folds=5, random_state=777, seed = 101, num_iterations =1000)

In [None]:
# opt_params

In [None]:
# lgb_train = lgb.Dataset(x_train,label=y_train)

# lgb_param = {'num_leaves': 500,
#              'objective':'regression',
#              'max_depth': 15,
#              'learning_rate': 0.01,
#              "min_child_samples": 5,
#              "feature_fraction":  0.9,
#              "min_split_gain" : 0.001,
#              "min_child_weight" : 5.0,
#              'min_data_in_leaf': 100,
#              "bagging_freq": 1,
#              "bagging_fraction": 0.9,
#              "bagging_seed": 777, #이전 11
#              "metric": 'mae'
#              }

# print("cv start")

# cv_result= lgb.cv(lgb_param, 
#                   lgb_train,
#                   num_boost_round=10000, #이전 5000 / learninh rate=0.005: 8.6460553286
#                   nfold=5,
#                   verbose_eval=10, 
#                   stratified=False, 
#                   early_stopping_rounds=10)

# print("train start")
# lgb_model = lgb.train(lgb_param,
#                       lgb_train,
#                       num_boost_round = len(cv_result["l1-mean"]))

In [None]:
lgb_train = lgb.Dataset(x_train,label=y_train)

lgb_param = {"objective":"regression",
             "metrics":"mae",
             "learning_rate":0.01}

print("cv start")
cv_result = lgb.cv(lgb_param,
                   lgb_train,
                   num_boost_round=10000,
                   nfold=5,
                   early_stopping_rounds=10,
                   stratified=False,
                   verbose_eval=10,
                   seed = 777)

print("train start")
lgb_model = lgb.train(lgb_param,
                      lgb_train,
                      num_boost_round = len(cv_result["l1-mean"]))

In [None]:
x_test=test.loc[:,"X00":"X39"].copy()

In [None]:
x_test

In [None]:
temp_ = test[["X00","X07","X28","X31","X32"]]# 기온
l_atm_ = test[["X01","X06","X22","X27","X29"]] # 현지기압
wind_s_= test[["X02","X03","X18","X24","X26"]] # 풍속
water_ = test[["X04","X10","X21","X36","X39"]] #일일 누적강수량
sea_atm_ = test[["X05","X08","X09","X23","X33"]] #해면기압
sun_      = test[["X11","X34","X14","X16","X19"]] #일일 누적일사량
humid_ = test[["X12","X20","X30","X37","X38"]]# 습도
wind_d_  = test[["X13","X15","X17","X25","X35"]] #풍향

In [None]:
at = pd.DataFrame((13.12 + 0.6215*temp_.values-11.37*(wind_s_**0.16).values+0.3965*(wind_s_**0.16).values*temp_.values),columns=["AT01","AT02","AT03","AT04","AT05"])
x_test=pd.concat([x_test.reset_index(),at],axis=1)

In [None]:
x_test["DI01"]=discomfort_index(temp_.X00,humid_.X20)
x_test["DI02"]=discomfort_index(temp_.X31,humid_.X12)
x_test["DI03"]=discomfort_index(temp_.X07,humid_.X37)
x_test["DI04"]=discomfort_index(temp_.X32,humid_.X30)
x_test["DI05"]=discomfort_index(temp_.X28,humid_.X38)

x_test["DP01"]=dewpoint(temp_.X00,humid_.X20)
x_test["DP02"]=dewpoint(temp_.X31,humid_.X12)
x_test["DP03"]=dewpoint(temp_.X07,humid_.X37)
x_test["DP04"]=dewpoint(temp_.X32,humid_.X30)
x_test["DP05"]=dewpoint(temp_.X28,humid_.X38)

x_test["VP01"]=vapor_pressure(temp_.X00)
x_test["VP02"]=vapor_pressure(temp_.X31)
x_test["VP03"]=vapor_pressure(temp_.X07)
x_test["VP04"]=vapor_pressure(temp_.X32)
x_test["VP05"]=vapor_pressure(temp_.X28)
x_test   

In [None]:
#실제 강수량
# def water_diff(c):
#     i=0
#     diff_list=[]
#     diff_list.append(0.0)
#     for i in range(len(water_)-1):        
#         diff=(water_.loc[i+1:i+1,c][i+1])-(water_.loc[i:i,c][i])
#         diff_list.append(round(diff,1))
#     df=pd.DataFrame(diff_list)
#     return df
    

# x_test["diff_X04"]=water_diff("X04")
# x_test["diff_X10"]=water_diff("X10")
# x_test["diff_X21"]=water_diff("X21")
# x_test["diff_X36"]=water_diff("X36")
# x_test["diff_X39"]=water_diff("X39")

In [None]:
x_test=x_test.iloc[:,1:]

In [None]:
#비의 여부
# x_test["rainYN"]=np.where(water_.sum(axis=1)!=0,1,0)

#기온, 습도, 풍속 , 현지기압 diff
# x_test["temp_diff"]=temp_.max(axis=1)-temp_.min(axis=1)
# x_test["humid_diff"]=humid_.max(axis=1)-humid_.min(axis=1)
# x_test["wind_s_diff"]=wind_s_.max(axis=1)-wind_s_.min(axis=1)
# x_test["l_atm_diff"]=l_atm_.max(axis=1)-l_atm_.min(axis=1)

In [None]:
prediction = lgb_model.predict(x_test)

In [None]:
sub=pd.read_csv('C:/Users/Kim/Downloads/AIFrenz_Season1_dataset/sample_submission.csv',index_col = "id")
sub["Y18"] = prediction
sub.to_csv("C:/Users/Kim/Downloads/baseline_result_0402_2.csv")

In [None]:
sub=pd.read_csv('C:/Users/Kim/Downloads/AIFrenz_Season1_dataset/sample_submission.csv')
sub["Y18"] = prediction
sub

In [None]:
#my_best_sc=pd.read_csv("C:/Users/Kim/Downloads/lightGBM_0324.csv")
#best_sc_a=pd.read_csv("C:/Users/Kim/Downloads/tr_var_add_result.csv")
#best_sc_h=pd.read_csv("C:/Users/Kim/Downloads/tenDense_ver1.0.csv")
best_sc_chan=pd.read_csv("C:/Users/Kim/Downloads/final_result_1.csv")

plt.figure(figsize=(20,10))
plt.plot(sub.iloc[:,-1:])
plt.plot(best_sc_chan.iloc[:,-1:])

In [None]:
plt.figure(figsize=(20,10))
plt.plot(sub.iloc[4000:5000,-1:])
plt.plot(best_sc_chan.iloc[4000:5000,-1:])

In [None]:
plt.figure(figsize=(20,10))
plt.plot(temp_.iloc[4000:8000,:])

In [None]:
plt.figure(figsize=(20,10))
plt.plot(water_.iloc[4000:5000,:])

In [None]:
best_sc_chan

In [None]:
#6.23963641 
