In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.autograd import Variable
import torch.optim as optim
import pandas as pd

# データ生成 -data generation-

In [5]:
input_path = "plays_2_to_MDN.csv"
data_df = pd.read_csv(input_path)

# play_typeの数値化
data_df["play_type"] = "nan"
data_df.loc[data_df["play"]=="guard",  "play_type"] = 0
data_df.loc[data_df["play"]=="tackle", "play_type"] = 1
data_df.loc[data_df["play"]=="end",    "play_type"] = 2
data_df.loc[data_df["play"]=="short",  "play_type"] = 3
data_df.loc[data_df["play"]=="deep",   "play_type"] = 4

# distanceの数値化
data_df["dist"] = ""
data_df.loc[data_df["yardsToGo"]<=3, "dist"] = 3
data_df.loc[(data_df["yardsToGo"]<=6)&(data_df["yardsToGo"]>=4), "dist"] = 2
data_df.loc[data_df["yardsToGo"]>=7, "dist"] = 1
data_df["dist"] = data_df["dist"].astype("float")

data_df = data_df[data_df["play_type"]!="nan"]
data_df["play_type"] = data_df["play_type"].astype("float")
data_df.head()

# 正規化の関数
def min_max_norm(x, axis=None): # 正規化
    x_min = x.min(axis=axis)
    x_max = x.max(axis=axis)
    result = (x-x_min)/(x_max-x_min)
    return result

## 複数の入力に対するデータ

In [8]:
# データセットの関数定義
def create_input_data(input_path, norm=True, sample=True, sample_num=100):
    
    """
    norm : 正規化するか否か(デフォルト：True)
    sample : データのsample（決められた数）するか否か（デフォルト：True）
    sample_num : sample=Trueの時のサンプル数（デフォルト：１００）
    """
    
    df = data_df
    
    # sample
    if sample: #True
        tmp_df = df.sample(sample_num)    
    else:
        tmp_df = df.copy()

    if norm: # True -> 正規化
        d1 = tmp_df[['down', 'yardsToGo', 'play_type']]
        d2 = min_max_norm(tmp_df[["PlayResult"]])
        d = pd.concat([d1, d2], axis=1)            
    
    # 入力データの型変換 
    x_data = d[['down', 'yardsToGo', 'play_type']].values

    # 出力データの型変換
    y_data = d["PlayResult"].values
    y_data = y_data.reshape(-1, 1).astype(np.float32)
    return x_data, y_data

# 全体
x_data, y_data = create_input_data(input_path, norm=True, sample=False, sample_num=100)

In [15]:
n = y_data.size
d = 3 # 入力値のサイズ
t = 1 # 出力値のサイズ（次元数）
h = 50 # 隠れ層のノード数
k = 30 # 正規分布の山の数
iter_num = 1500

d_pi = k
d_sigmasq = k
d_mu = t * k

In [17]:
import MDN_model
model = MDN_model.MDN_NN(d,t,h,k)

In [18]:
opt = optim.Adam(model.parameters(), lr=0.008)

x = Variable(torch.from_numpy(x_data))
y = Variable(torch.from_numpy(y_data))

loss_list = []
loss_list_abs = []
for e in range(iter_num):
    opt.zero_grad()
    pi, sigmasq, mu = model(x)
    loss = model.loss(pi, sigmasq, mu, y, n)
    loss_list.append(loss)
    loss_list_abs.append(abs(loss))
    if e % 100 == 0:
        print(loss.data)
    loss.backward()
    opt.step()

tensor(1.0981)
tensor(-1.2278)
tensor(-1.2953)
tensor(-1.4449)
tensor(-1.2865)
tensor(-1.4405)
tensor(-1.2303)
tensor(-1.4007)
tensor(-1.3936)
tensor(-1.4194)
tensor(-1.2617)
tensor(-1.3752)
tensor(-1.2878)
tensor(-1.4412)
tensor(-1.4110)


In [19]:
 # save
torch.save(model.state_dict(), 'save.pth')

tensor([ 1.0454, -0.5013, -0.8887, -0.3365,  0.5345, -0.0249,  0.4015, -1.2762,
         1.8783,  1.2032,  1.3076,  1.7079, -0.3452,  1.0102,  0.7578,  0.8786,
        -0.9921,  1.3441,  0.2122,  2.3509,  0.7508,  2.9876,  2.6947,  0.9233,
         2.1274,  0.4656,  1.8005, -1.0575,  0.6720,  0.2375],
       grad_fn=<SelectBackward0>)


torch.Size([9218, 30])