# ライブラリのインポート

In [None]:
import numpy as np
import pandas as pd
from datetime import datetime as dt
from matplotlib import pyplot
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

# データの前処理

### CSV読込

In [None]:
df = pd.read_csv('2017_電力and気温学習データ.csv',engine='python')

### カラムの設定

In [None]:
df.columns = ['DATE','TIME','TEMP','ELECTRIC']

### DATE列の文字列データを日付データに変換

In [None]:
tmp_date = []
df['DATE'] = df['DATE'].replace('(\s)0:00','',regex=True)
for x in df['DATE']:
    tmp_date.append(dt.strptime(x, '%Y/%m/%d'))
df['DATE'] = tmp_date

### DATE列から曜日を抽出し、新しいカラムを設定する

In [None]:
yobi = ["月","火","水","木","金","土","日"]
df_yobi = []
for x in df['DATE']:
    df_yobi.append(yobi[x.weekday()])
df['YOBI'] = df_yobi

### 定性⇨定量にするために一時的にtmpXにTIME列とYOBI列を格納

In [None]:
#説明変数を取得
tmpX = df.loc[:,['TIME','YOBI']]
y = df.loc[:,['ELECTRIC']]

### 定性データを数値に変換

In [None]:
le = []
enc_columns = np.array([])
for i in np.arange(0,2):
    le = np.append(le,LabelEncoder())
    le[i].fit(tmpX.iloc[:,i])
    enc_columns = np.append(enc_columns,le[i].classes_)
    tmpX.iloc[:,i] = le[i].transform(tmpX.iloc[:,i])

### 定性データから変換した数値データをダミー変数に変換

In [None]:
one_hot_encoder = OneHotEncoder()
one_hot_encoder.fit(tmpX)
enc_data = one_hot_encoder.transform(tmpX).toarray() # numpyマトリックス型で返されるデータをnumpy.array型に変換
enc_df = pd.DataFrame(enc_data)
enc_df.columns = enc_columns

### ダミー変数のカラムを確認

In [None]:
enc_columns

# 学習

### テストデータと学習データに分割

In [None]:
X = pd.concat([df.loc[:,['TEMP']],enc_df], axis=1)
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.2)

### 学習

In [None]:
regr = RandomForestRegressor()
regr.fit(X_train,y_train)

# 評価

### 学習モデルに学習データを適用し、MSEを計算

In [None]:
train_pred = regr.predict(X_train)
train_mse = mean_squared_error(y_true=y_train , y_pred=train_pred)

### RMSEを計算

In [None]:
print(np.sqrt(train_mse))

###テストデータでRMSEを計算

In [None]:
test_pred = regr.predict(X_test)
test_mse = mean_squared_error(y_true=y_test , y_pred=test_pred)
print(np.sqrt(test_mse))

### グラフに表示

In [None]:
pyplot.plot(np.arange(100),test_pred[:100],label='pred')
pyplot.plot(np.arange(100),y_test[:100],label='k')
pyplot.title('2017')
pyplot.xlabel('date')
pyplot.ylabel('ele_used')
pyplot.show()