In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns

import collections

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import DataLoader, Dataset, TensorDataset

import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [None]:
planets_train = pd.read_csv("C:/Users/user/.jupyter/주피터 파일/data/planets_train.csv")
planets_train.drop(['Unnamed: 0'], axis = 1, inplace = True)

planets_test = pd.read_csv("C:/Users/user/.jupyter/주피터 파일/data/planets_test.csv")
planets_test.drop(['Unnamed: 0'], axis = 1, inplace = True)

In [None]:
train_x = planets_train.iloc[:, 1:6]
train_y = planets_train.iloc[:, 0]

test_x = planets_test.iloc[:, 1:6]
test_y = planets_test.iloc[:, 0]

In [None]:
train_y = train_y.where((train_y == 'Radial Velocity') | (train_y == 'Transit'), 'Others')
test_y = test_y.where((test_y == 'Radial Velocity') | (test_y == 'Transit'), 'Others')

In [None]:
train_x[['orbital_period', 'distance', 'mass']] = np.log(train_x[['orbital_period', 'distance', 'mass']])
test_x[['orbital_period', 'distance', 'mass']] = np.log(test_x[['orbital_period', 'distance', 'mass']])

In [None]:
train_x = train_x.interpolate()
test_x = test_x.interpolate()

In [None]:
from sklearn.preprocessing import RobustScaler, LabelEncoder

train_x = RobustScaler().fit_transform(train_x)
test_x = RobustScaler().fit_transform(test_x)

In [None]:
from imblearn.over_sampling import SMOTE

x_train, y_train = SMOTE(random_state=0).fit_resample(train_x, train_y)
x_test, y_test = SMOTE(random_state=0).fit_resample(test_x, test_y)

In [None]:
y_train = LabelEncoder().fit_transform(y_train)
y_test = LabelEncoder().fit_transform(y_test)

In [None]:
y_train_oh = pd.get_dummies(y_train).values
y_test_oh = pd.get_dummies(y_test).values

In [None]:
x_train = torch.from_numpy(x_train).float()
y_train_oh = torch.from_numpy(y_train_oh).float()

x_test = torch.from_numpy(x_test).float()
y_test_oh = torch.from_numpy(y_test_oh).float()

y_train = torch.from_numpy(y_train).float()
y_test = torch.from_numpy(y_test).float()

In [None]:
class BasicDataset(Dataset):
    def __init__(self, x_tensor, y_tensor):
        super(BasicDataset, self).__init__()

        self.x = x_tensor
        self.y = y_tensor
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return len(self.x)

In [None]:
tr_dataset = BasicDataset(x_train, y_train_oh)
val_dataset = BasicDataset(x_train, y_train)
ts_dataset = BasicDataset(x_test, y_test)

testloader = DataLoader(ts_dataset, batch_size=16, shuffle=True)

In [None]:
from torch.utils.data.sampler import SubsetRandomSampler

batch_size = 16

validation_split = .2  # 비율을 얼마로 줄것인지?

shuffle_dataset = True

random_seed= 4

dataset_size = len(tr_dataset)

indices = list(range(dataset_size))

split = int(np.floor(validation_split * dataset_size))

if shuffle_dataset :

    np.random.seed(random_seed)

    np.random.shuffle(indices)

train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)

valid_sampler = SubsetRandomSampler(val_indices)

train_loader = DataLoader(tr_dataset, batch_size=batch_size, sampler=train_sampler)

validation_loader = DataLoader(val_dataset, batch_size=batch_size,sampler=valid_sampler)

In [None]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(5, 64),
            #nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, 256),
            #nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 3),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.fc(x)
        
        return x 

In [None]:
net = Net()

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(net.parameters(), lr=0.001) # weight_decay=0.001)
#optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=0.001)

In [None]:
import pytorch_model_summary

print(pytorch_model_summary.summary(net, torch.zeros(1, 5), show_input=True))

In [None]:
loss_list = []
accuracy_list = []
epochs = np.arange(1,31)
total_batch = len(train_loader)

for epoch in epochs:
    cost=0 
    
    for inputs, labels in train_loader:
        
        optimizer.zero_grad()
        
        #forward 
        outputs = net(inputs)
        loss = criterion(outputs, torch.max(labels, 1)[1])
        
        #backward
        loss.backward()
        
        #Update
        optimizer.step()
        
        cost += loss
        
    with torch.no_grad():
        total = 0 
        correct=0 
            
        for inputs, labels in validation_loader:

            outputs = net(inputs)
            _, predict = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predict==labels).sum().item()
    
    avg_cost = cost / total_batch 
    accuracy = 100*correct/total 
    
    loss_list.append(avg_cost.detach().numpy())
    accuracy_list.append(accuracy) 
    
    print("epoch : {} | loss : {:.6f}" .format(epoch, avg_cost))
    print("Accuracy : {:.2f}".format(100*correct/total))
    print("------")

In [None]:
correct = 0
total = 0
pred_list = []
label_list = []

with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        
        outputs = net(inputs)

        _, predicted = torch.max(outputs, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()


print('Accuracy of the network on test data: %d %%' % (100 * correct / total))

In [None]:
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(epochs,loss_list)
plt.subplot(1,2,2)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.plot(epochs, accuracy_list)
plt.show()