In [1]:
import numpy as np
import pandas as pd
import torch
import random

random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
train = pd.read_csv('mnist_train.csv')
test = pd.read_csv('mnist_test.csv')
submit = pd.read_csv('submission.csv')

In [3]:
train

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,775,776,777,778,779,780,781,782,783,784
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0
1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
3,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,59995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0
59996,59996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
59997,59997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0
59998,59998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0


In [4]:
train_x = train.drop(['Unnamed: 0', '784'], axis = 1) #해당 레이블은 각각 index. label 이므로 제거
test_x = test.drop(['Unnamed: 0'], axis = 1)
train_y = (train['784'])

In [5]:
train_x_tensor = torch.FloatTensor(train_x.values).to(device)
train_y_tensor = torch.LongTensor(train_y.values).to(device)
test_x_tensor = torch.FloatTensor(test_x.values).to(device)

In [6]:
train_dataset = torch.utils.data.TensorDataset(train_x_tensor, train_y_tensor)
data_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=2000,
                                          shuffle=True,
                                          drop_last=True)

In [7]:
linear1 = torch.nn.Linear(784,512,bias=True)
linear2 = torch.nn.Linear(512,512,bias=True)
linear3 = torch.nn.Linear(512,512,bias=True)
linear4 = torch.nn.Linear(512,512,bias=True)
linear5 = torch.nn.Linear(512,10,bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.3)

In [8]:
# Random Init => Xavier Init
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)
torch.nn.init.xavier_uniform_(linear4.weight)
torch.nn.init.xavier_uniform_(linear5.weight)

Parameter containing:
tensor([[ 0.0002, -0.0084,  0.0340,  ...,  0.0659,  0.0017,  0.0082],
        [ 0.0732, -0.0465, -0.0325,  ...,  0.1050,  0.0456, -0.0786],
        [ 0.0072, -0.0874,  0.0975,  ...,  0.0975,  0.0515,  0.0245],
        ...,
        [ 0.0794,  0.0860, -0.0077,  ...,  0.0270,  0.0412,  0.0247],
        [ 0.0821,  0.0229,  0.0508,  ..., -0.0335, -0.0429, -0.0587],
        [-0.0008,  0.0162,  0.0755,  ...,  0.1017, -0.0163,  0.0126]],
       requires_grad=True)

In [9]:
model = torch.nn.Sequential(linear1,relu,dropout,
                            linear2,relu,dropout,
                            linear3,relu,dropout,
                            linear4,relu,dropout,
                            linear5).to(device)

In [10]:
loss = torch.nn.CrossEntropyLoss().to(device) # softmax 내부적으로 계산
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 

In [11]:

total_batch = len(data_loader)
model.train() # 주의사항 drop_out = True
for epoch in range(80):
    avg_cost = 0

    for X, Y in data_loader:


        # 그래디언트 초기화
        optimizer.zero_grad()
        # Forward 계산
        hypothesis = model(X)
        # Error 계산
        cost = loss(hypothesis, Y)
        # Backparopagation
        cost.backward()
        # 가중치 갱신
        optimizer.step()

        # 평균 Error 계산
        avg_cost += cost / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 12.465585709
Epoch: 0002 cost = 0.984031379
Epoch: 0003 cost = 0.682807446
Epoch: 0004 cost = 0.552272975
Epoch: 0005 cost = 0.473981023
Epoch: 0006 cost = 0.419978589
Epoch: 0007 cost = 0.375250459
Epoch: 0008 cost = 0.338312387
Epoch: 0009 cost = 0.308492362
Epoch: 0010 cost = 0.285027623
Epoch: 0011 cost = 0.263787240
Epoch: 0012 cost = 0.246811911
Epoch: 0013 cost = 0.231308982
Epoch: 0014 cost = 0.215484187
Epoch: 0015 cost = 0.202350900
Epoch: 0016 cost = 0.190964177
Epoch: 0017 cost = 0.180360273
Epoch: 0018 cost = 0.171786368
Epoch: 0019 cost = 0.164351046
Epoch: 0020 cost = 0.153207928
Epoch: 0021 cost = 0.148563653
Epoch: 0022 cost = 0.141443074
Epoch: 0023 cost = 0.132705227
Epoch: 0024 cost = 0.127190679
Epoch: 0025 cost = 0.120553724
Epoch: 0026 cost = 0.119382806
Epoch: 0027 cost = 0.110643640
Epoch: 0028 cost = 0.107890993
Epoch: 0029 cost = 0.102779418
Epoch: 0030 cost = 0.101014197
Epoch: 0031 cost = 0.097010590
Epoch: 0032 cost = 0.091675624
Epoch: 

In [12]:
with torch.no_grad():
    model.eval()

    prediction = model(test_x_tensor)
    
    train_prediction = model(train_x_tensor)
    correct_prediction = torch.argmax(train_prediction, 1) == train_y_tensor
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

Accuracy: 0.9992499947547913


In [13]:
predict = torch.argmax(prediction, dim = 1)

In [14]:
submit.Label = predict.detach().cpu().numpy()

In [15]:
submit.to_csv('2021_10_13_5.csv', index= False) # 실습 그대로 한 결과  epoch만 20 0.95370

1차 시도 : 실습 그대로 한 결과  epoch 20, dropout 0.5  score : 0.95370  
2차 시도 : 모든 레이어 파라미터 값 2배씩 증가후 코스트 기반으로 epcoh 4로 감소 score : 0.95760  
3차 시도 : 실습과 모두 같게하기 score : 0.96840  
4차 시도 : 3차시도 중 cost가 가장 낮았던 epoch 4까지 진행 score : 0.96690  
5차 시도 : 3차시도에서 epoch 100으로 증가 score : 0.12100
6차 시도 : 배치 사이즈 1000 및 epoch 30으로 진행 : 0.97810  
7차 시도 : 배치 사이즈 2000 및 epoch 30으로 진행 : 0.97370  
train best score : 0.99924

=> 0.981333 일 경우 0.97090이 나옴 :baseline