# 線形モデル

## import

In [1]:
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

## 教師データ

In [3]:
datas = pd.read_csv("train_all.csv")
drop_list = ["id", "tv","referee","home_01","home_02","home_03","home_04","home_05","home_06","home_07","home_08","home_09","home_10","home_11","away_01","away_02","away_03","away_04","away_05","away_06","away_07","away_08","away_09","away_10","away_11"]
datas = datas.drop(drop_list, axis=1)

### 欠損値の確認

In [4]:
missing_values = datas.isnull().sum()
missing_values

y              0
year           0
stage          0
match          0
gameday        0
time           0
home           0
away           0
stadium        0
home_score     0
away_score     0
weather        0
temperature    0
humidity       0
home_team      0
away_team      0
address        0
capa           0
dtype: int64

## 前処理

In [5]:
datas["MONTH"] = datas["gameday"].apply(lambda x : x[0:2])
datas["MONTH"] = datas["MONTH"].astype(int)
datas["DAY"] = datas["gameday"].apply(lambda x : x[3:5])
datas["DAY"] = datas["DAY"].astype(int)
datas["stage"] = datas["stage"].apply(lambda x : x[1])
datas = datas.drop("gameday", axis=1)

In [6]:
datas.head(1)

Unnamed: 0,y,year,stage,match,time,home,away,stadium,home_score,away_score,weather,temperature,humidity,home_team,away_team,address,capa,MONTH,DAY
0,18250,2012,１,第１節第１日,14:04,ベガルタ仙台,鹿島アントラーズ,ユアテックスタジアム仙台,1,0,雨,3.8,66%,ベガルタ仙台,鹿島アントラーズ,宮城県仙台市泉区七北田字柳78,19694,3,10


## 説明変数

In [None]:
# 説明変数を設定
select_cols = ["year", "stage", "MONTH", "DAY", "temperature", "capa"]

In [None]:
X = datas[select_cols]
y = datas["y"]

## テスト用データにのみ教師データがあるのでテスト用データを訓練データとテストデータに分ける

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## 学習とスコアの表示

### Ridge回帰

In [None]:
ridge = Ridge().fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge.score(X_test, y_test)))

In [None]:
ridge10 = Ridge(alpha=10).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge10.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge10.score(X_test, y_test)))

In [None]:
ridge01 = Ridge(alpha=0.1).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge01.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge01.score(X_test, y_test)))

### 線形回帰

In [None]:
lr = LinearRegression().fit(X_train, y_train)
print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))