# 데이터 다운로드

In [None]:
!wget https://raw.githubusercontent.com/TeamAIoT/deep-learning/main/Dataset/boston_description.txt
!wget https://raw.githubusercontent.com/TeamAIoT/deep-learning/main/Dataset/boston_train.csv
!wget https://raw.githubusercontent.com/TeamAIoT/deep-learning/main/Dataset/boston_test.csv
!wget https://raw.githubusercontent.com/TeamAIoT/deep-learning/main/Dataset/diabetes_description.txt
!wget https://raw.githubusercontent.com/TeamAIoT/deep-learning/main/Dataset/diabetes_train.csv
!wget https://raw.githubusercontent.com/TeamAIoT/deep-learning/main/Dataset/diabetes_test.csv

# 모듈 임포트

In [None]:
import torch
import os

import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader, random_split
from torch.cuda import is_available

# GPU 사용을 위한 Device 세팅

In [None]:
device='cuda' if is_available() else 'cpu'
device

# Boston Housing Dataset

## Boston Housing Dataset 정의

In [None]:
class BostonDataset(Dataset):
    def __init__(self,mode='train'):
        with open('boston_description.txt') as f:
            print(f.read())
        self.mode=mode
        if self.mode=='train':
            if os.path.exists('boston_train.csv'):
                self.data=pd.read_csv('boston_train.csv')
            else:
                raise FileNotFoundError
        elif self.mode=='test':
            if os.path.exists('boston_test.csv'):
                self.data=pd.read_csv('boston_test.csv')
            else:
                raise FileNotFoundError
        else:
            raise ValueError('Invaild argument at \'mode\'. expected \'train\' or \'test\'')
            
    def __len__(self):
        return len(self.data)

    def __getitem__(self,idx):
        return torch.FloatTensor(self.data.iloc[idx,:13].values),torch.FloatTensor(self.data.iloc[idx,[-1]].values)

## 학습을 위한 데이터 준비

In [None]:
batch_size=32
train_ratio=0.8

all_data=BostonDataset(mode='train')

train_data_len=int(len(all_data)*0.8)
valid_data_len=len(all_data)-train_data_len

train_data,valid_data=random_split(all_data,[train_data_len,valid_data_len])

train_loader=DataLoader(train_data,batch_size=batch_size,shuffle=True)
valid_loader=DataLoader(valid_data,batch_size=batch_size,shuffle=False)

## BostonModel 구현

In [None]:
class BostonModel(nn.Module):
    def __init__(self):
        super(BostonModel,self).__init__()
        self.layer1=nn.Linear(13,16)
        self.layer2=nn.Linear(16,1)
        self.activation=nn.Sigmoid()

    def forward(self,x):
        x=self.layer1(x)
        x=self.activation(x)
        x=self.layer2(x)
        return x

## 모델, 손실 함수, 최적화 알고리즘 정의

In [None]:
lr=0.001

model=BostonModel()
criterion=nn.MSELoss()
optimizer=optim.SGD(model.parameters(),lr=lr)

model=model.to(device)
criterion=criterion.to(device)

## 학습

In [None]:
def train(model,criterion,optimizer,train_loader,valid_loader,num_epochs=10,print_every=1,early_stop=None,model_path='boston.pth'):
    train_logs=[]
    valid_logs=[]
    patience=0
    best_loss=np.inf
    for epoch in range(num_epochs):
        train_loss=0
        valid_loss=0
        # training step
        model.train()
        for data,target in train_loader:
            optimizer.zero_grad()
            data,target=data.to(device),target.to(device)
            pred=model(data)
            loss=criterion(pred,target)
            loss.backward()
            optimizer.step()
            train_loss+=loss.item()*data.size(0)
        train_loss/=len(train_data)
        train_logs.append(train_loss)
        if (epoch+1)%print_every==0:
            print('Training   Epoch {} - Loss : {:.8f}'.format(epoch,train_loss))
        # validation step
        with torch.no_grad():
            model.eval()
            for data,target in valid_loader:
                data,target=data.to(device),target.to(device)
                pred=model(data)
                loss=criterion(pred,target)
                valid_loss+=loss.item()*data.size(0)
            valid_loss/=len(valid_data)
            valid_logs.append(valid_loss)
            if (epoch+1)%print_every==0:
                print('Validation Epoch {} - Loss : {:.8f}'.format(epoch,valid_loss))
            if valid_loss<best_loss:
                best_loss=valid_loss
                torch.save(model.state_dict(),model_path)
                if early_stop is not None:
                    patience=0
            elif early_stop is not None:
                patience+=1
                if patience>=early_stop:
                    print('Training finished by early stopping')
                    return train_logs,valid_logs
    return train_logs,valid_logs

In [None]:
train_logs,valid_logs = train(model=model,
                              criterion=criterion,
                              optimizer=optimizer,
                              train_loader=train_loader,
                              valid_loader=valid_loader,
                              num_epochs=100,
                              print_every=10,
                              early_stop=None,
                              model_path='boston.pth')

In [None]:
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot([i for i in range(len(train_logs))],train_logs,label='train_loss')
plt.plot([i for i in range(len(valid_logs))],valid_logs,label='valid_loss')
plt.legend()
plt.show()

## 평가

In [None]:
test_data=BostonDataset(mode='test')
test_loader=DataLoader(test_data,batch_size=1,shuffle=False)

model.load_state_dict(torch.load('boston.pth'))
model=model.to(device)

In [None]:
def test(model,criterion,test_loader):
    test_loss=0
    result_table=pd.DataFrame(columns=['Prediction','Target'])
    with torch.no_grad():
        model.eval()
        for data,target in test_loader:
            data,target=data.to(device),target.to(device)
            pred=model(data)
            loss=criterion(pred,target)
            test_loss+=loss.item()*data.size(0)
            for p,t in zip(pred.view(-1),target.view(-1)):
                result_table=result_table.append({'Prediction':p.item(),'Target':t.item()},ignore_index=True)
        test_loss/=len(test_data)
    return test_loss,result_table

In [None]:
test_loss,result_table=test(model=model,
                            criterion=criterion,
                            test_loader=test_loader)

In [None]:
print('Test Loss : {}'.format(test_loss))

In [None]:
result_table

# Diabetes Dataset

## Diabetes Dataset 정의

In [None]:
class DiabetesDataset(Dataset):
    def __init__(self,mode='train'):
        with open('diabetes_description.txt') as f:
            print(f.read())
        self.mode=mode
        if self.mode=='train':
            if os.path.exists('diabetes_train.csv'):
                self.data=pd.read_csv('diabetes_train.csv')
            else:
                raise FileNotFoundError
        elif self.mode=='test':
            if os.path.exists('diabetes_test.csv'):
                self.data=pd.read_csv('diabetes_test.csv')
            else:
                raise FileNotFoundError
        else:
            raise ValueError('Invaild argument at \'mode\'. expected \'train\' or \'test\'')
            
    def __len__(self):
        return len(self.data)

    def __getitem__(self,idx):
        return torch.FloatTensor(self.data.iloc[idx,:10].values),torch.FloatTensor(self.data.iloc[idx,[-1]].values)

## 학습을 위한 데이터 준비

In [None]:
batch_size=32
train_ratio=0.8

all_data=DiabetesDataset(mode='train')

train_data_len=int(len(all_data)*0.8)
valid_data_len=len(all_data)-train_data_len

train_data,valid_data=random_split(all_data,[train_data_len,valid_data_len])

train_loader=DataLoader(train_data,batch_size=batch_size,shuffle=True)
valid_loader=DataLoader(valid_data,batch_size=batch_size,shuffle=False)

## DiabetesModel 구현

In [None]:
class DiabetesModel(nn.Module):
    def __init__(self):
        super(DiabetesModel,self).__init__()
        self.layer1=nn.Linear(10,16)
        self.layer2=nn.Linear(16,1)
        self.activation=nn.Sigmoid()

    def forward(self,x):
        x=self.layer1(x)
        x=self.activation(x)
        x=self.layer2(x)
        return x

## 모델, 손실 함수, 최적화 알고리즘 정의

In [None]:
lr=0.001

model=DiabetesModel()
criterion=nn.MSELoss()
optimizer=optim.SGD(model.parameters(),lr=lr)

model=model.to(device)
criterion=criterion.to(device)

## 학습

In [None]:
def train(model,criterion,optimizer,train_loader,valid_loader,num_epochs=10,print_every=1,early_stop=None,model_path='diabetes.pth'):
    train_logs=[]
    valid_logs=[]
    patience=0
    best_loss=np.inf
    for epoch in range(num_epochs):
        train_loss=0
        valid_loss=0
        # training step
        model.train()
        for data,target in train_loader:
            optimizer.zero_grad()
            data,target=data.to(device),target.to(device)
            pred=model(data)
            loss=criterion(pred,target)
            loss.backward()
            optimizer.step()
            train_loss+=loss.item()*data.size(0)
        train_loss/=len(train_data)
        train_logs.append(train_loss)
        if (epoch+1)%print_every==0:
            print('Training   Epoch {} - Loss : {:.8f}'.format(epoch,train_loss))
        # validation step
        with torch.no_grad():
            model.eval()
            for data,target in valid_loader:
                data,target=data.to(device),target.to(device)
                pred=model(data)
                loss=criterion(pred,target)
                valid_loss+=loss.item()*data.size(0)
            valid_loss/=len(valid_data)
            valid_logs.append(valid_loss)
            if (epoch+1)%print_every==0:
                print('Validation Epoch {} - Loss : {:.8f}'.format(epoch,valid_loss))
            if valid_loss<best_loss:
                best_loss=valid_loss
                torch.save(model.state_dict(),model_path)
                if early_stop is not None:
                    patience=0
            elif early_stop is not None:
                patience+=1
                if patience>=early_stop:
                    print('Training finished by early stopping')
                    return train_logs,valid_logs
    return train_logs,valid_logs

In [None]:
train_logs,valid_logs = train(model=model,
                              criterion=criterion,
                              optimizer=optimizer,
                              train_loader=train_loader,
                              valid_loader=valid_loader,
                              num_epochs=100,
                              print_every=10,
                              early_stop=None,
                              model_path='diabetes.pth')

In [None]:
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot([i for i in range(len(train_logs))],train_logs,label='train_loss')
plt.plot([i for i in range(len(valid_logs))],valid_logs,label='valid_loss')
plt.legend()
plt.show()

## 평가

In [None]:
test_data=DiabetesDataset(mode='test')
test_loader=DataLoader(test_data,batch_size=1,shuffle=False)

model.load_state_dict(torch.load('diabetes.pth'))
model=model.to(device)

In [None]:
def test(model,criterion,test_loader):
    test_loss=0
    result_table=pd.DataFrame(columns=['Prediction','Target'])
    with torch.no_grad():
        model.eval()
        for data,target in test_loader:
            data,target=data.to(device),target.to(device)
            pred=model(data)
            loss=criterion(pred,target)
            test_loss+=loss.item()*data.size(0)
            for p,t in zip(pred.view(-1),target.view(-1)):
                result_table=result_table.append({'Prediction':p.item(),'Target':t.item()},ignore_index=True)
        test_loss/=len(test_data)
    return test_loss,result_table

In [None]:
test_loss,result_table=test(model=model,
                            criterion=criterion,
                            test_loader=test_loader)

In [None]:
print('Test Loss : {}'.format(test_loss))

In [None]:
result_table