# **ルワンダにおける CO2 排出量を予測する**
Predict CO2 Emissions in Rwanda(https://www.kaggle.com/competitions/playground-series-s3e20)

# 参考資料
**書籍**


**Kaggle**
- [Getting Started: Modeling](https://www.kaggle.com/code/inversion/getting-started-modeling)


**自分で作成したファイル**
- Home_Credit_Default_Risk
- 肝硬変の転帰の多クラス予測
- 馬の健康への影響を予測する

**その他**


## ライブラリ・データセットのインポート

In [113]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

pd.options.display.float_format = '{:.5f}'.format
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [114]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor # 特徴量の重要度評価で用いる

## 実行時間を調べるために使う

In [115]:
import datetime
import time
import math

start_time = time.time()

def changeHMS(s):
    h = math.floor(s / 3600)
    if h > 0:
        s = s - h * 3600
        indi_h = str(h) + 'h'
    else:
        indi_h = ''
    m = math.floor(s / 60)
    if m > 0:
        indi_m = str(m) + 'm'
    else:
        indi_m = ''
    s = math.floor(s % 60)
    time = indi_h + indi_m + str(s) + 's'
    return time

In [116]:
FILE_PATH = ''
OUTPUT_DIR = ''

train = pd.read_csv(FILE_PATH + 'train.csv')
test = pd.read_csv(FILE_PATH + 'test.csv')

# train_id = train['id']

# test_id = test['id']

target = train['emission']
target_col = 'emission'

target_name = str(train.iloc[:, [-1]].columns.tolist()) # カラム数-2の値が目的変数

df = pd.concat([train, test], axis=0)
df.reset_index(inplace=True)

# 説明変数をデータ型ごとに代入する
numerical_features = df._get_numeric_data().columns
categorical_features = df.drop(numerical_features, axis=1).columns

In [117]:
df.head(10)

Unnamed: 0,index,ID_LAT_LON_YEAR_WEEK,latitude,longitude,year,week_no,SulphurDioxide_SO2_column_number_density,SulphurDioxide_SO2_column_number_density_amf,SulphurDioxide_SO2_slant_column_number_density,SulphurDioxide_cloud_fraction,SulphurDioxide_sensor_azimuth_angle,SulphurDioxide_sensor_zenith_angle,SulphurDioxide_solar_azimuth_angle,SulphurDioxide_solar_zenith_angle,SulphurDioxide_SO2_column_number_density_15km,CarbonMonoxide_CO_column_number_density,CarbonMonoxide_H2O_column_number_density,CarbonMonoxide_cloud_height,CarbonMonoxide_sensor_altitude,CarbonMonoxide_sensor_azimuth_angle,CarbonMonoxide_sensor_zenith_angle,CarbonMonoxide_solar_azimuth_angle,CarbonMonoxide_solar_zenith_angle,NitrogenDioxide_NO2_column_number_density,NitrogenDioxide_tropospheric_NO2_column_number_density,NitrogenDioxide_stratospheric_NO2_column_number_density,NitrogenDioxide_NO2_slant_column_number_density,NitrogenDioxide_tropopause_pressure,NitrogenDioxide_absorbing_aerosol_index,NitrogenDioxide_cloud_fraction,NitrogenDioxide_sensor_altitude,NitrogenDioxide_sensor_azimuth_angle,NitrogenDioxide_sensor_zenith_angle,NitrogenDioxide_solar_azimuth_angle,NitrogenDioxide_solar_zenith_angle,Formaldehyde_tropospheric_HCHO_column_number_density,Formaldehyde_tropospheric_HCHO_column_number_density_amf,Formaldehyde_HCHO_slant_column_number_density,Formaldehyde_cloud_fraction,Formaldehyde_solar_zenith_angle,Formaldehyde_solar_azimuth_angle,Formaldehyde_sensor_zenith_angle,Formaldehyde_sensor_azimuth_angle,UvAerosolIndex_absorbing_aerosol_index,UvAerosolIndex_sensor_altitude,UvAerosolIndex_sensor_azimuth_angle,UvAerosolIndex_sensor_zenith_angle,UvAerosolIndex_solar_azimuth_angle,UvAerosolIndex_solar_zenith_angle,Ozone_O3_column_number_density,Ozone_O3_column_number_density_amf,Ozone_O3_slant_column_number_density,Ozone_O3_effective_temperature,Ozone_cloud_fraction,Ozone_sensor_azimuth_angle,Ozone_sensor_zenith_angle,Ozone_solar_azimuth_angle,Ozone_solar_zenith_angle,UvAerosolLayerHeight_aerosol_height,UvAerosolLayerHeight_aerosol_pressure,UvAerosolLayerHeight_aerosol_optical_depth,UvAerosolLayerHeight_sensor_zenith_angle,UvAerosolLayerHeight_sensor_azimuth_angle,UvAerosolLayerHeight_solar_azimuth_angle,UvAerosolLayerHeight_solar_zenith_angle,Cloud_cloud_fraction,Cloud_cloud_top_pressure,Cloud_cloud_top_height,Cloud_cloud_base_pressure,Cloud_cloud_base_height,Cloud_cloud_optical_depth,Cloud_surface_albedo,Cloud_sensor_azimuth_angle,Cloud_sensor_zenith_angle,Cloud_solar_azimuth_angle,Cloud_solar_zenith_angle,emission
0,0,ID_-0.510_29.290_2019_00,-0.51,29.29,2019,0,-0.00011,0.60302,-7e-05,0.25567,-98.59389,50.84356,-130.0508,35.8745,-3e-05,0.03537,1589.02454,4061.09814,829530.5,71.11198,52.77593,-149.87556,25.96521,,,,,,,,,,,,,0.00012,0.86323,4e-05,0.25567,35.8745,-130.0508,50.84356,-98.59389,-1.28076,829864.54688,-12.62898,35.63242,-138.78645,30.75213,0.11593,2.50661,0.29566,225.73114,0.59547,-12.62898,35.63242,-138.78645,30.75213,,,,,,,,0.59547,53534.73242,3664.43622,61085.80957,2615.12048,15.56853,0.27229,-12.62899,35.63242,-138.78642,30.75214,3.75099
1,1,ID_-0.510_29.290_2019_01,-0.51,29.29,2019,1,2e-05,0.72821,1e-05,0.13099,16.59286,39.13719,-140.87443,28.96513,1e-05,0.03653,1772.57441,1869.04041,829787.28713,-1.01959,38.98237,-140.15805,29.562,5e-05,2e-05,3e-05,9e-05,7311.86914,-1.93539,0.06704,829859.96037,5.47104,35.26519,-138.34391,30.05426,0.00017,1.17283,0.00014,0.20075,29.07178,-141.81483,43.05021,4.67884,-1.54812,829747.85697,16.15249,43.48533,-142.78614,28.57363,0.11678,2.6577,0.31573,226.17217,0.17517,24.46434,42.59654,-143.09787,28.21366,,,,,,,,0.21361,63790.29624,3651.19031,66969.47873,3174.57242,8.6906,0.25683,30.35938,39.55763,-145.18393,27.25178,4.02518
2,2,ID_-0.510_29.290_2019_02,-0.51,29.29,2019,2,0.00051,0.7482,0.00038,0.11002,72.79584,52.86882,-150.19176,23.20642,0.00015,0.03534,2703.2368,2809.13839,829883.82869,-54.80114,52.34438,-133.68371,31.58684,3e-05,0.0,3e-05,8e-05,7311.86914,-2.75437,0.07213,829527.125,72.79584,52.86882,-150.19176,23.20642,8e-05,1.17547,2e-05,0.27945,30.99429,-135.66716,52.81056,-41.36358,-1.03867,829892.96063,-41.55763,41.26903,-135.36463,30.2733,0.11704,2.6191,0.31083,227.46929,0.60609,-41.55763,41.26903,-135.36463,30.2733,,,,,,,,0.70354,55923.79055,4216.98649,60068.89445,3516.28267,21.10341,0.2511,15.37788,30.40182,-142.51954,26.1933,4.23138
3,3,ID_-0.510_29.290_2019_03,-0.51,29.29,2019,3,,,,,,,,,,0.03679,2076.07333,3917.70787,829657.16357,28.91654,39.67618,-142.57592,24.8107,,,,,,,,,,,,,,,,,,,,,-0.62644,829794.84821,-0.00127,34.45874,-137.4896,26.93648,0.11643,2.52582,0.29797,225.58944,0.7874,-0.00127,34.45874,-137.4896,26.93648,,,,,,,,0.78281,44569.13064,5228.50774,51064.54734,4180.97332,15.3869,0.26204,-11.2934,24.38036,-132.66583,28.82915,4.30529
4,4,ID_-0.510_29.290_2019_04,-0.51,29.29,2019,4,-8e-05,0.6763,-5e-05,0.12116,4.12127,35.51559,-137.40916,24.33197,-3e-05,0.03467,2053.60849,2667.31001,829735.09375,-12.50166,33.70307,-134.85426,24.62959,5e-05,2e-05,3e-05,9e-05,7637.26246,-1.45056,0.04939,829744.84375,-13.4318,35.07862,-136.25795,24.72903,0.00027,0.86908,0.00015,0.16009,25.97794,-134.82656,39.94907,-12.8374,-1.5849,829736.14286,-0.60432,41.79471,-136.44852,25.04579,0.11737,2.57224,0.30669,228.95584,0.21574,-0.60432,41.79471,-136.44852,25.04579,,,,,,,,0.18934,59904.31484,3980.59812,63751.12578,3355.71011,8.11469,0.23585,38.53226,37.39298,-141.50981,22.20461,4.34732
5,5,ID_-0.510_29.290_2019_05,-0.51,29.29,2019,5,0.00029,0.87171,0.00024,0.22766,-13.45369,57.09712,-136.61686,23.40887,8e-05,0.0353,2057.96924,3109.71302,829657.72917,15.00916,43.19827,-135.15841,23.12598,1e-05,-2e-05,3e-05,8e-05,8613.46884,-1.65902,0.09959,829323.8125,71.69062,63.3518,-151.40437,16.27865,0.00022,1.29148,0.00019,0.26406,23.15157,-135.07378,43.09036,16.44096,-1.28224,829750.40776,-12.69494,42.61212,-131.54667,24.38323,0.11713,2.5892,0.30785,226.40462,0.49316,-12.69494,42.61212,-131.54667,24.38323,,,,,,,,0.45018,43494.92174,5530.35455,49846.00101,4495.30136,7.55614,0.25023,30.1284,37.00702,-137.38872,21.15782,4.31082
6,6,ID_-0.510_29.290_2019_06,-0.51,29.29,2019,6,-0.00029,0.79196,-0.00023,0.1194,72.19865,58.49637,-143.72691,15.76777,-9e-05,0.03287,1381.99023,3032.11523,829465.34375,71.43174,47.93819,-138.46049,17.42694,4e-05,1e-05,3e-05,0.00011,7311.86914,-1.47539,0.07432,829375.5625,72.19865,58.49637,-143.72691,15.76777,0.00019,1.03896,9e-05,0.27728,19.23594,-134.52548,32.51637,74.70527,-0.72985,829785.28571,-24.96466,35.59495,-125.18545,23.93305,0.11884,2.49074,0.30061,228.65882,0.63264,-24.96466,35.59495,-125.18545,23.93305,,,,,,,,0.52499,53483.78398,4378.61465,58804.27656,3537.05066,10.49311,0.24083,4.58537,30.49154,-128.19634,21.7212,4.26933
7,7,ID_-0.510_29.290_2019_07,-0.51,29.29,2019,7,-2e-05,0.97631,-2e-05,0.1068,-100.95605,23.73984,-114.88765,24.02423,-1e-05,0.03654,2089.30731,2575.83502,829674.5625,-13.22689,35.09267,-120.48166,19.83178,4e-05,1e-05,3e-05,9e-05,7311.86914,-1.30238,0.04842,829881.0625,-100.95605,23.73984,-114.88765,24.02423,0.00017,0.98508,0.00011,0.1068,24.02423,-114.88765,23.73984,-100.95605,-1.02143,829698.98958,16.37118,35.88049,-122.25839,20.42453,0.12044,2.34714,0.28738,228.49413,0.75236,5.08749,32.48287,-119.68354,21.41335,,,,,,,,0.81322,57524.05825,3468.92415,63697.00727,2597.07156,20.5336,0.25017,16.37483,35.84025,-122.2507,20.43201,4.25136
8,8,ID_-0.510_29.290_2019_08,-0.51,29.29,2019,8,-0.00011,0.79694,-0.0001,0.16497,-40.17907,31.16156,-109.5956,25.04636,-5e-05,0.03254,1979.94604,2830.62272,829662.8125,6.50865,31.70761,-115.44589,19.23351,4e-05,1e-05,3e-05,9e-05,8613.45036,-1.14058,0.09385,829890.89583,-40.17907,31.16156,-109.5956,25.04636,0.00014,1.07456,0.0001,0.19957,21.80306,-113.43489,37.99534,-12.08478,-1.2041,829704.87273,0.00659,44.95882,-115.80242,20.89168,0.11846,2.6999,0.3248,226.83287,0.51851,0.00659,44.95882,-115.80242,20.89168,,,,,,,,0.53434,61137.69752,3160.26467,67222.55616,2530.85148,24.25393,0.26054,-12.02175,42.68879,-114.29637,22.3656,4.28194
9,9,ID_-0.510_29.290_2019_09,-0.51,29.29,2019,9,6e-05,0.99854,1e-05,0.15773,-100.10874,33.59202,-101.94415,24.67541,1e-05,0.03734,2215.80103,2116.4955,829798.72169,-40.95582,31.67825,-104.09601,22.18379,5e-05,2e-05,3e-05,9e-05,8613.4212,-1.28621,0.01615,829876.33369,-100.9563,23.73952,-99.82277,23.0962,2e-05,1.30469,-2e-05,0.15773,24.67543,-101.94618,33.59202,-100.1087,-1.19481,829792.37951,-50.66635,31.55996,-105.13085,21.46764,0.12119,2.33847,0.28776,228.82943,0.65079,-50.66704,31.55128,-105.13105,21.46661,,,,,,,,0.66389,54818.475,4141.07334,60298.71797,3510.29365,13.81338,0.28929,-66.25839,28.59621,-105.68718,21.08402,4.35293


## 前処理

### 欠損値の補完

In [118]:
def missing_values(df):
    df = df.drop(['location'], axis=1).fillna(0)

    return df

### 外れ値の修正

In [119]:
def outlier(df):

    return df

### スケーリング（標準化）

- MinMaxScaler(正規化)

In [120]:
def scaling_MinMaxScaler(df):
    df_scale = df[numerical_features]
    sc = MinMaxScaler()
    df[numerical_features] = pd.DataFrame(sc.fit_transform(df_scale), columns = df_scale.columns)

    return df

## 特徴量エンジニアリング

### 特徴量の作成

In [121]:
def create_new_features(df):
    # 緯度経度ごとの平均
    df['location'] = [str(x) + '_' + str(y) for x, y in zip(df.latitude, df.longitude)]

    # 周期ごとの平均
    df_roll_mean = df.sort_values(by=['location', 'year', 'week_no']).groupby(['location'])[df.columns[5:].tolist()].rolling(window=2).mean().reset_index()
    df_roll_mean.drop(['level_1', 'emission', 'location'], axis=1, inplace=True)
    df_roll_mean.columns = [col + '_roll_mean' for col in df_roll_mean.columns]

    df = df.sort_values(by=['location', 'year', 'week_no'], ignore_index=True).merge(df_roll_mean, how='left', left_index=True, right_index=True)

    return df

### カテゴリ変数のエンコーディング

* One-HotEncoding

In [122]:
def one_hot_encoding(df, cat_cols):
    df = pd.get_dummies(df, columns=cat_cols)

    return df

- LabelEncoding

In [123]:
def label_encoder(df):
    categorical_columns = df.select_dtypes(include=['object']).columns

    for column in categorical_columns:
        df[column] = df[column].fillna('').astype('str') # 欠損値の補完をする
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])

    return df

### 特徴量の選択

#### 特徴量の重要度評価

In [124]:
def feature_importance_evaluation(df):
    # 訓練データをX(説明変数)とy（目的変数）に分割する
    X = df.select_dtypes(include=['float', 'int'])
    X = X.drop(['emission'], axis=1) # 目的変数を指定する
    y = target # 目的変数を指定する

    # for column in X.columns.tolist():
    #     X[column] = X[column].apply(lambda x: np.log(x + 1))

    # 特徴量の重要度評価
    clf = RandomForestRegressor(
        random_state=42,
    )

    clf.fit(X, y)
    importance = clf.feature_importances_

    feature_importance = pd.DataFrame(data=importance, index=X.columns, columns=['importance']) \
        .sort_values(ascending=True, by='importance')

    return feature_importance

#### 特徴量の削除

In [125]:
def drop_columns(df):
    drop_list = [

    ]
    dropped_df = df.drop(columns=drop_list)

    return dropped_df

### データセットの更新

In [126]:
# 前処理
# df = missing_values(df) # 今回は特徴量エンジニアリングのあとに指定する
df = outlier(df)
# df = scaling_MinMaxScaler(df) # 標準化したい時のみ実行する

# 特徴量エンジニアリング
df = create_new_features(df)
df = missing_values(df) # 今回はここに設定した
df = drop_columns(df)

ohe_cols = [] # One-Hot Encodingしたい水準数の少ないカラムを指定する
df = one_hot_encoding(df, ohe_cols)
df = label_encoder(df)

In [127]:
train = df[:79023]
test = df[79023:]

train_x = train.drop(columns=['emission'])
train_y = target
test_x = test.drop(columns=['emission'])

X = train_x.values
y = train_y.values
# y = y.astype(int)

df.head()

Unnamed: 0,index,ID_LAT_LON_YEAR_WEEK,latitude,longitude,year,week_no,SulphurDioxide_SO2_column_number_density,SulphurDioxide_SO2_column_number_density_amf,SulphurDioxide_SO2_slant_column_number_density,SulphurDioxide_cloud_fraction,SulphurDioxide_sensor_azimuth_angle,SulphurDioxide_sensor_zenith_angle,SulphurDioxide_solar_azimuth_angle,SulphurDioxide_solar_zenith_angle,SulphurDioxide_SO2_column_number_density_15km,CarbonMonoxide_CO_column_number_density,CarbonMonoxide_H2O_column_number_density,CarbonMonoxide_cloud_height,CarbonMonoxide_sensor_altitude,CarbonMonoxide_sensor_azimuth_angle,CarbonMonoxide_sensor_zenith_angle,CarbonMonoxide_solar_azimuth_angle,CarbonMonoxide_solar_zenith_angle,NitrogenDioxide_NO2_column_number_density,NitrogenDioxide_tropospheric_NO2_column_number_density,NitrogenDioxide_stratospheric_NO2_column_number_density,NitrogenDioxide_NO2_slant_column_number_density,NitrogenDioxide_tropopause_pressure,NitrogenDioxide_absorbing_aerosol_index,NitrogenDioxide_cloud_fraction,NitrogenDioxide_sensor_altitude,NitrogenDioxide_sensor_azimuth_angle,NitrogenDioxide_sensor_zenith_angle,NitrogenDioxide_solar_azimuth_angle,NitrogenDioxide_solar_zenith_angle,Formaldehyde_tropospheric_HCHO_column_number_density,Formaldehyde_tropospheric_HCHO_column_number_density_amf,Formaldehyde_HCHO_slant_column_number_density,Formaldehyde_cloud_fraction,Formaldehyde_solar_zenith_angle,Formaldehyde_solar_azimuth_angle,Formaldehyde_sensor_zenith_angle,Formaldehyde_sensor_azimuth_angle,UvAerosolIndex_absorbing_aerosol_index,UvAerosolIndex_sensor_altitude,UvAerosolIndex_sensor_azimuth_angle,UvAerosolIndex_sensor_zenith_angle,UvAerosolIndex_solar_azimuth_angle,UvAerosolIndex_solar_zenith_angle,Ozone_O3_column_number_density,Ozone_O3_column_number_density_amf,Ozone_O3_slant_column_number_density,Ozone_O3_effective_temperature,Ozone_cloud_fraction,Ozone_sensor_azimuth_angle,Ozone_sensor_zenith_angle,Ozone_solar_azimuth_angle,Ozone_solar_zenith_angle,UvAerosolLayerHeight_aerosol_height,UvAerosolLayerHeight_aerosol_pressure,UvAerosolLayerHeight_aerosol_optical_depth,UvAerosolLayerHeight_sensor_zenith_angle,UvAerosolLayerHeight_sensor_azimuth_angle,UvAerosolLayerHeight_solar_azimuth_angle,UvAerosolLayerHeight_solar_zenith_angle,Cloud_cloud_fraction,Cloud_cloud_top_pressure,Cloud_cloud_top_height,Cloud_cloud_base_pressure,Cloud_cloud_base_height,Cloud_cloud_optical_depth,Cloud_surface_albedo,Cloud_sensor_azimuth_angle,Cloud_sensor_zenith_angle,Cloud_solar_azimuth_angle,Cloud_solar_zenith_angle,emission,week_no_roll_mean,SulphurDioxide_SO2_column_number_density_roll_mean,SulphurDioxide_SO2_column_number_density_amf_roll_mean,SulphurDioxide_SO2_slant_column_number_density_roll_mean,SulphurDioxide_cloud_fraction_roll_mean,SulphurDioxide_sensor_azimuth_angle_roll_mean,SulphurDioxide_sensor_zenith_angle_roll_mean,SulphurDioxide_solar_azimuth_angle_roll_mean,SulphurDioxide_solar_zenith_angle_roll_mean,SulphurDioxide_SO2_column_number_density_15km_roll_mean,CarbonMonoxide_CO_column_number_density_roll_mean,CarbonMonoxide_H2O_column_number_density_roll_mean,CarbonMonoxide_cloud_height_roll_mean,CarbonMonoxide_sensor_altitude_roll_mean,CarbonMonoxide_sensor_azimuth_angle_roll_mean,CarbonMonoxide_sensor_zenith_angle_roll_mean,CarbonMonoxide_solar_azimuth_angle_roll_mean,CarbonMonoxide_solar_zenith_angle_roll_mean,NitrogenDioxide_NO2_column_number_density_roll_mean,NitrogenDioxide_tropospheric_NO2_column_number_density_roll_mean,NitrogenDioxide_stratospheric_NO2_column_number_density_roll_mean,NitrogenDioxide_NO2_slant_column_number_density_roll_mean,NitrogenDioxide_tropopause_pressure_roll_mean,NitrogenDioxide_absorbing_aerosol_index_roll_mean,NitrogenDioxide_cloud_fraction_roll_mean,NitrogenDioxide_sensor_altitude_roll_mean,NitrogenDioxide_sensor_azimuth_angle_roll_mean,NitrogenDioxide_sensor_zenith_angle_roll_mean,NitrogenDioxide_solar_azimuth_angle_roll_mean,NitrogenDioxide_solar_zenith_angle_roll_mean,Formaldehyde_tropospheric_HCHO_column_number_density_roll_mean,Formaldehyde_tropospheric_HCHO_column_number_density_amf_roll_mean,Formaldehyde_HCHO_slant_column_number_density_roll_mean,Formaldehyde_cloud_fraction_roll_mean,Formaldehyde_solar_zenith_angle_roll_mean,Formaldehyde_solar_azimuth_angle_roll_mean,Formaldehyde_sensor_zenith_angle_roll_mean,Formaldehyde_sensor_azimuth_angle_roll_mean,UvAerosolIndex_absorbing_aerosol_index_roll_mean,UvAerosolIndex_sensor_altitude_roll_mean,UvAerosolIndex_sensor_azimuth_angle_roll_mean,UvAerosolIndex_sensor_zenith_angle_roll_mean,UvAerosolIndex_solar_azimuth_angle_roll_mean,UvAerosolIndex_solar_zenith_angle_roll_mean,Ozone_O3_column_number_density_roll_mean,Ozone_O3_column_number_density_amf_roll_mean,Ozone_O3_slant_column_number_density_roll_mean,Ozone_O3_effective_temperature_roll_mean,Ozone_cloud_fraction_roll_mean,Ozone_sensor_azimuth_angle_roll_mean,Ozone_sensor_zenith_angle_roll_mean,Ozone_solar_azimuth_angle_roll_mean,Ozone_solar_zenith_angle_roll_mean,UvAerosolLayerHeight_aerosol_height_roll_mean,UvAerosolLayerHeight_aerosol_pressure_roll_mean,UvAerosolLayerHeight_aerosol_optical_depth_roll_mean,UvAerosolLayerHeight_sensor_zenith_angle_roll_mean,UvAerosolLayerHeight_sensor_azimuth_angle_roll_mean,UvAerosolLayerHeight_solar_azimuth_angle_roll_mean,UvAerosolLayerHeight_solar_zenith_angle_roll_mean,Cloud_cloud_fraction_roll_mean,Cloud_cloud_top_pressure_roll_mean,Cloud_cloud_top_height_roll_mean,Cloud_cloud_base_pressure_roll_mean,Cloud_cloud_base_height_roll_mean,Cloud_cloud_optical_depth_roll_mean,Cloud_surface_albedo_roll_mean,Cloud_sensor_azimuth_angle_roll_mean,Cloud_sensor_zenith_angle_roll_mean,Cloud_solar_azimuth_angle_roll_mean,Cloud_solar_zenith_angle_roll_mean
0,0,0,-0.51,29.29,2019,0,-0.00011,0.60302,-7e-05,0.25567,-98.59389,50.84356,-130.0508,35.8745,-3e-05,0.03537,1589.02454,4061.09814,829530.5,71.11198,52.77593,-149.87556,25.96521,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00012,0.86323,4e-05,0.25567,35.8745,-130.0508,50.84356,-98.59389,-1.28076,829864.54688,-12.62898,35.63242,-138.78645,30.75213,0.11593,2.50661,0.29566,225.73114,0.59547,-12.62898,35.63242,-138.78645,30.75213,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.59547,53534.73242,3664.43622,61085.80957,2615.12048,15.56853,0.27229,-12.62899,35.63242,-138.78642,30.75214,3.75099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1,-0.51,29.29,2019,1,2e-05,0.72821,1e-05,0.13099,16.59286,39.13719,-140.87443,28.96513,1e-05,0.03653,1772.57441,1869.04041,829787.28713,-1.01959,38.98237,-140.15805,29.562,5e-05,2e-05,3e-05,9e-05,7311.86914,-1.93539,0.06704,829859.96037,5.47104,35.26519,-138.34391,30.05426,0.00017,1.17283,0.00014,0.20075,29.07178,-141.81483,43.05021,4.67884,-1.54812,829747.85697,16.15249,43.48533,-142.78614,28.57363,0.11678,2.6577,0.31573,226.17217,0.17517,24.46434,42.59654,-143.09787,28.21366,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.21361,63790.29624,3651.19031,66969.47873,3174.57242,8.6906,0.25683,30.35938,39.55763,-145.18393,27.25178,4.02518,0.5,-4e-05,0.66562,-3e-05,0.19333,-41.00051,44.99038,-135.46262,32.41981,-1e-05,0.03595,1680.79947,2965.06928,829658.89357,35.04619,45.87915,-145.01681,27.76361,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00014,1.01803,9e-05,0.22821,32.47314,-135.93281,46.94689,-46.95752,-1.41444,829806.20192,1.76176,39.55887,-140.78629,29.66288,0.11635,2.58216,0.3057,225.95166,0.38532,5.91768,39.11448,-140.94216,29.48289,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.40454,58662.51433,3657.81326,64027.64415,2894.84645,12.12957,0.26456,8.86519,37.59502,-141.98518,29.00196
2,2,2,-0.51,29.29,2019,2,0.00051,0.7482,0.00038,0.11002,72.79584,52.86882,-150.19176,23.20642,0.00015,0.03534,2703.2368,2809.13839,829883.82869,-54.80114,52.34438,-133.68371,31.58684,3e-05,0.0,3e-05,8e-05,7311.86914,-2.75437,0.07213,829527.125,72.79584,52.86882,-150.19176,23.20642,8e-05,1.17547,2e-05,0.27945,30.99429,-135.66716,52.81056,-41.36358,-1.03867,829892.96063,-41.55763,41.26903,-135.36463,30.2733,0.11704,2.6191,0.31083,227.46929,0.60609,-41.55763,41.26903,-135.36463,30.2733,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.70354,55923.79055,4216.98649,60068.89445,3516.28267,21.10341,0.2511,15.37788,30.40182,-142.51954,26.1933,4.23138,1.5,0.00027,0.73821,0.0002,0.1205,44.69435,46.00301,-145.5331,26.08577,8e-05,0.03593,2237.9056,2339.0894,829835.55791,-27.91037,45.66337,-136.92088,30.57442,4e-05,1e-05,3e-05,9e-05,7311.86914,-2.34488,0.06959,829693.54268,39.13344,44.06701,-144.26783,26.63034,0.00012,1.17415,8e-05,0.2401,30.03304,-138.74099,47.93039,-18.34237,-1.2934,829820.4088,-12.70257,42.37718,-139.07538,29.42347,0.11691,2.6384,0.31328,226.82073,0.39063,-8.54665,41.93279,-139.23125,29.24348,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.45857,59857.0434,3934.0884,63519.18659,3345.42755,14.89701,0.25397,22.86863,34.97973,-143.85174,26.72254
3,3,3,-0.51,29.29,2019,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03679,2076.07333,3917.70787,829657.16357,28.91654,39.67618,-142.57592,24.8107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.62644,829794.84821,-0.00127,34.45874,-137.4896,26.93648,0.11643,2.52582,0.29797,225.58944,0.7874,-0.00127,34.45874,-137.4896,26.93648,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.78281,44569.13064,5228.50774,51064.54734,4180.97332,15.3869,0.26204,-11.2934,24.38036,-132.66583,28.82915,4.30529,2.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03606,2389.65507,3363.42313,829770.49613,-12.9423,46.01028,-138.12981,28.19877,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.83255,829843.90442,-20.77945,37.86389,-136.42711,28.60489,0.11674,2.57246,0.3044,226.52937,0.69674,-20.77945,37.86389,-136.42711,28.60489,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.74317,50246.46059,4722.74711,55566.72089,3848.628,18.24515,0.25657,2.04224,27.39109,-137.59269,27.51123
4,4,4,-0.51,29.29,2019,4,-8e-05,0.6763,-5e-05,0.12116,4.12127,35.51559,-137.40916,24.33197,-3e-05,0.03467,2053.60849,2667.31001,829735.09375,-12.50166,33.70307,-134.85426,24.62959,5e-05,2e-05,3e-05,9e-05,7637.26246,-1.45056,0.04939,829744.84375,-13.4318,35.07862,-136.25795,24.72903,0.00027,0.86908,0.00015,0.16009,25.97794,-134.82656,39.94907,-12.8374,-1.5849,829736.14286,-0.60432,41.79471,-136.44852,25.04579,0.11737,2.57224,0.30669,228.95584,0.21574,-0.60432,41.79471,-136.44852,25.04579,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.18934,59904.31484,3980.59812,63751.12578,3355.71011,8.11469,0.23585,38.53226,37.39298,-141.50981,22.20461,4.34732,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03573,2064.84091,3292.50894,829696.12866,8.20744,36.68963,-138.71509,24.72015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.10567,829765.49554,-0.3028,38.12672,-136.96906,25.99113,0.1169,2.54903,0.30233,227.27264,0.50157,-0.3028,38.12672,-136.96906,25.99113,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.48607,52236.72274,4604.55293,57407.83656,3768.34171,11.7508,0.24894,13.61943,30.88667,-137.08782,25.51688


In [128]:
# # 特徴量の重要度評価
# train_importance = train
# feature_importance = feature_importance_evaluation(train_importance)
# feature_importance

In [129]:
# feature_importance.plot(kind='barh', figsize=(10, 15), color='b')

In [130]:
df.to_csv(OUTPUT_DIR + 'data.csv', index=False)

In [131]:
def file_to_xy(filename):
    data = pd.read_csv(filename, index_col=0)
    print(f'読み込み完了 {filename}')
    train = data[:79023].reset_index(drop=True)
    test = data[79023:].reset_index(drop=True).drop('emission', axis=1) # 目的変数を指定する
    # 目的変数と説明変数に分割
    X = train.drop(['emission', 'ID_LAT_LON_YEAR_WEEK'], axis=1) # 目的変数を指定する
    y = train['emission'].values # 目的変数を指定する

    return data,test,train,X,y

In [132]:
filename = 'data.csv'
data,test,train,X,y = file_to_xy(OUTPUT_DIR + filename)

読み込み完了 data.csv
