In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn

from sklearn.metrics import mean_squared_error

ramen = pd.read_csv('C:\\DeepLearning\\DataSets\\ramen-ratings.csv')

In [3]:
ramen.head()

ramen.shape

(2580, 7)

In [11]:
ramen['Country'].value_counts()[0:10]

mask = ramen.index[ramen['Stars'] == 'Unrated']
ramen = ramen.drop(index = mask)
ramen.shape

(2577, 7)

In [12]:
print(ramen['Stars'].dtype)

ramen['Stars'] = ramen['Stars'].astype(float)

print(ramen['Stars'].dtype)

object
float64


In [13]:
ramen = ramen.drop(columns=['Review #', 'Top Ten', 'Variety'])
ramen.head()


Unnamed: 0,Brand,Style,Country,Stars
0,New Touch,Cup,Japan,3.75
1,Just Way,Pack,Taiwan,1.0
2,Nissin,Cup,USA,2.25
3,Wei Lih,Pack,Taiwan,2.75
4,Ching's Secret,Pack,India,3.75


In [14]:
# 特徴量のダミー変数化
Country = pd.get_dummies(ramen['Country'], prefix='Country', drop_first=True)
Brand = pd.get_dummies(ramen['Brand'], prefix='Brand',drop_first=True)
Style = pd.get_dummies(ramen['Style'], prefix='Style',drop_first=True)
 
# ダミー変数化した特徴量を結合
ramendf = pd.concat([Country, Brand,Style], axis=1)
 
# 確認
ramendf.head()

Unnamed: 0,Country_Bangladesh,Country_Brazil,Country_Cambodia,Country_Canada,Country_China,Country_Colombia,Country_Dubai,Country_Estonia,Country_Fiji,Country_Finland,...,Brand_Yum-Mie,Brand_Zow Zow,Brand_iMee,Brand_iNoodle,Style_Bowl,Style_Box,Style_Can,Style_Cup,Style_Pack,Style_Tray
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [15]:
# 特徴量とターゲットへ分割
X = np.array(ramendf, dtype=np.float32) 
y = np.array(ramen[['Stars']], dtype=np.float32)

In [16]:
model = nn.Linear(397,1)

loss = nn.MSELoss()

optimizer = torch.optim.SGD(model.parameters(), lr=0.6)

In [17]:
# モデル訓練
for epoch in range(1000):
    # ステージ1 Numpy配列からテンソルへ変換
    inputs = torch.from_numpy(X)
    targets = torch.from_numpy(y)
    
    # ステージ2 推測値を出力して誤差（コスト）を算出
    outputs = model(inputs)
    cost = loss(outputs, targets)
    
    # ステージ3 誤差逆伝播（バックプロパゲーション）
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # ステージ4 50回毎にコストを表示
    if (epoch+1) % 100 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 1000, cost.item()))

Epoch [100/1000], Loss: 0.7752
Epoch [200/1000], Loss: 0.7268
Epoch [300/1000], Loss: 0.6981
Epoch [400/1000], Loss: 0.6783
Epoch [500/1000], Loss: 0.6636
Epoch [600/1000], Loss: 0.6522
Epoch [700/1000], Loss: 0.6428
Epoch [800/1000], Loss: 0.6351
Epoch [900/1000], Loss: 0.6286
Epoch [1000/1000], Loss: 0.6230


In [18]:
y_pred = model(torch.from_numpy(X)).data.numpy()

print(y_pred[0:5])
print(y[0:5])

[[4.1111665]
 [2.926288 ]
 [3.4569106]
 [3.420286 ]
 [3.7045467]]
[[3.75]
 [1.  ]
 [2.25]
 [2.75]
 [3.75]]


In [19]:
mean_squared_error(y, y_pred)

0.6229471