In [3]:
# ライブラリのインポート
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 
 
# PyTorchのインポート
import torch 
import torch.nn as nn
 
# 評価指標（Scikit-learn）
from sklearn.metrics import mean_squared_error

In [4]:
df_ramen = pd.read_csv('../data/ramen-ratings.csv')

In [6]:
print(df_ramen.shape)
df_ramen.head()

(2580, 7)


Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
0,2580,New Touch,T's Restaurant Tantanmen,Cup,Japan,3.75,
1,2579,Just Way,Noodles Spicy Hot Sesame Spicy Hot Sesame Guan...,Pack,Taiwan,1.0,
2,2578,Nissin,Cup Noodles Chicken Vegetable,Cup,USA,2.25,
3,2577,Wei Lih,GGE Ramen Snack Tomato Flavor,Pack,Taiwan,2.75,
4,2576,Ching's Secret,Singapore Curry,Pack,India,3.75,


In [9]:
df_ramen.Country.value_counts().head()

Japan          352
USA            323
South Korea    309
Taiwan         224
Thailand       191
Name: Country, dtype: int64

In [11]:
df_ramen.nunique()

Review #    2580
Brand        355
Variety     2413
Style          7
Country       38
Stars         51
Top Ten       38
dtype: int64

In [14]:
df_ramen.isnull().sum()

Review #       0
Brand          0
Variety        0
Style          2
Country        0
Stars          0
Top Ten     2539
dtype: int64

In [20]:
df_ramen[df_ramen.Style.isnull()]

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
2152,428,Kamfen,E Menm Chicken,,China,3.75,
2442,138,Unif,100 Furong Shrimp,,Taiwan,3.0,


In [38]:
df_ramen_drop_unrated = df_ramen[df_ramen.Stars!='Unrated'].reset_index(drop=True)
df_ramen_drop_unrated.shape

(2577, 7)

In [39]:
df_ramen_drop_unrated.dtypes

Review #     int64
Brand       object
Variety     object
Style       object
Country     object
Stars       object
Top Ten     object
dtype: object

In [40]:
df_ramen_drop_unrated.Stars = df_ramen_drop_unrated.Stars.astype(np.float64)

In [43]:
df_ramen_drop_unrated.head()

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
0,2580,New Touch,T's Restaurant Tantanmen,Cup,Japan,3.75,
1,2579,Just Way,Noodles Spicy Hot Sesame Spicy Hot Sesame Guan...,Pack,Taiwan,1.0,
2,2578,Nissin,Cup Noodles Chicken Vegetable,Cup,USA,2.25,
3,2577,Wei Lih,GGE Ramen Snack Tomato Flavor,Pack,Taiwan,2.75,
4,2576,Ching's Secret,Singapore Curry,Pack,India,3.75,


In [62]:
X = df_ramen_drop_unrated[['Brand', 'Style', 'Country']]
y = df_ramen_drop_unrated.Stars

In [63]:
Country = pd.get_dummies(X.Country, prefix='Country', drop_first=True)
Brand = pd.get_dummies(X.Brand, prefix='Brand', drop_first=True)
Style = pd.get_dummies(X.Style, prefix='Style', drop_first=True)
X = pd.concat([Country, Brand, Style], axis=1)
X.head()

Unnamed: 0,Country_Bangladesh,Country_Brazil,Country_Cambodia,Country_Canada,Country_China,Country_Colombia,Country_Dubai,Country_Estonia,Country_Fiji,Country_Finland,...,Brand_Yum-Mie,Brand_Zow Zow,Brand_iMee,Brand_iNoodle,Style_Bowl,Style_Box,Style_Can,Style_Cup,Style_Pack,Style_Tray
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [64]:
FEATURE_SIZE = X.shape[1]
FEATURE_SIZE

397

In [93]:
model = nn.Linear(FEATURE_SIZE, 1)
loss_func = nn.MSELoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=0.6)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [94]:
n_epoch = 1000

In [95]:
for epoch in range(n_epoch):
    inputs = torch.from_numpy(X.astype('float64').values).float()
    targets = torch.from_numpy(y.values).float()
    
    outputs = model(inputs)
    loss = loss_func(outputs, targets)
    
    optimizer.zero_grad()
    # grad計算
    loss.backward()
    # weight更新
    optimizer.step()
    
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{n_epoch}], Loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 1.4945
Epoch [200/1000], Loss: 1.0351
Epoch [300/1000], Loss: 1.0324
Epoch [400/1000], Loss: 1.0319
Epoch [500/1000], Loss: 1.0318
Epoch [600/1000], Loss: 1.0318
Epoch [700/1000], Loss: 1.0317
Epoch [800/1000], Loss: 1.0317
Epoch [900/1000], Loss: 1.0317
Epoch [1000/1000], Loss: 1.0317


In [85]:
y_pred = model(torch.from_numpy(X.values).float()).data.numpy().flatten()
y_pred

array([3.656149 , 3.6550937, 3.6593351, ..., 3.6550026, 3.6550026,
       3.671972 ], dtype=float32)

In [86]:
mean_squared_error(y.values, y_pred.flatten())

1.0315641849721895

## activation使ってないやん