# Kannada MNIST first train sample

## import ライブラリの列挙

In [1]:
import pandas as pd
from sklearn import model_selection

import torch
import torch.nn as nn
import torch.nn.functional as F

## Configの定義

In [2]:
class Config:
    trainData_path = '/kaggle/input/Kannada-MNIST/train.csv'
    testData_path = '/kaggle/input/Kannada-MNIST/test.csv'
    fold_size = 3 
    fold = 0
    random_seed = 42
    input_size = 784
    class_num = 10
    
    learning_rate = 1.0e-3
    batch_size = 32
    

In [3]:
cfg = Config()
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## 入力データの読み込み

In [4]:
data = pd.read_csv(cfg.trainData_path)

In [5]:
data

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59998,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## 入力データをTrainとValidationに分割

In [6]:
data['kfold'] = -1

# 交差検証用にデータセットをcfg.fold_size分割（3-fold）してフォールドインデックスを格納
kf = model_selection.KFold(n_splits=cfg.fold_size, shuffle=True, random_state=cfg.random_seed)
for fold, (train_indices, valid_indices) in enumerate(kf.split(X=data)):
  data.loc[valid_indices, 'kfold'] = fold

In [7]:
data

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,kfold
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
59996,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
59997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
59998,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [8]:
# フォールドインデックスに一致するものを検証データとして使う
X_valid = data[data.kfold == cfg.fold].reset_index(drop=True)

# フォールドインデックスに一致しないものを訓練データとして使う
X_train =  data[data.kfold != cfg.fold].reset_index(drop=True)

In [9]:
X_valid

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,kfold
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19996,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19997,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19998,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
X_train

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,kfold
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
2,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
4,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39995,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
39996,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
39997,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
39998,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


## NW構造の定義

In [11]:
class SampleNW(nn.Module):
    def __init__(self, input_size, output_size):
        super(SampleNW, self).__init__()
        self.fc1 = nn.Linear(input_size, output_size)
        
    def forward(self, x):
        x = self.fc1(x)
        
        return F.log_softmax(x, dim=1)

## Train & Validation ループ

In [12]:
model = SampleNW(cfg.input_size, cfg.class_num).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=cfg.learning_rate)