In [187]:
import numpy as np
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [188]:
from sklearn.model_selection import train_test_split

def load_dataset():
    df = pd.read_csv("/content/hwdataset.csv", delimiter=',')

    df.Sex=df.Sex.replace({"M":0,"F":1})
    df.BP=df.BP.replace({"NORMAL":0,"LOW":1,"HIGH":2})
    df.Cholesterol=df.Cholesterol.replace({"NORMAL":0,"HIGH":1})
    df.Drug=df.Drug.replace({"DrugY":0,"drugC":1,"drugX":2,"drugA":3,"drugB":4})
    
    X, y = df.iloc[:,:-1], df.iloc[:,-1]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=0)
    
    return X_train, y_train, X_val, y_val, X_test, y_test

In [369]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets

class FNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU() 
        self.fc2 = nn.Linear(hidden_dim, output_dim)  
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

In [370]:
input_dim = 5
hidden_dim = 15
output_dim = 5

model = FNN(input_dim, hidden_dim, output_dim)

In [371]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [372]:
batch_size = 32
n_iters = 5000
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()


train_tensor = torch.utils.data.TensorDataset(torch.tensor(X_train.values.astype(np.float32)) , torch.tensor(y_train.values.astype(np.int32)).type(torch.LongTensor)) 
train_loader = torch.utils.data.DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)

val_tensor = torch.utils.data.TensorDataset(torch.tensor(X_val.values.astype(np.float32)) , torch.tensor(y_val.values.astype(np.int32)).type(torch.LongTensor)) 
val_loader = torch.utils.data.DataLoader(dataset = val_tensor, batch_size = batch_size, shuffle = False)

In [373]:
num_epochs = 3000
for epoch in range(num_epochs):

    running_loss = 0.0

    for i, data in enumerate(train_loader):
        inputs, labels = data
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    running_loss/=i
    correct = 0
    total = 0
    iter = 0
    
    if (epoch)%500 == 0:
      for inputs, labels in val_loader:
          iter+=1
          outputs = model(inputs)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum()
          accuracy = 100 * correct / total

      print('Epoch: {}. Loss: {}. Accuracy: {}'.format(epoch, running_loss, accuracy))
print('Finished Training')

Epoch: 0. Loss: 2.3467841148376465. Accuracy: 8.333333015441895
Epoch: 500. Loss: 1.471220890680949. Accuracy: 77.77777862548828
Epoch: 1000. Loss: 1.378138542175293. Accuracy: 83.33333587646484
Epoch: 1500. Loss: 1.3138115406036377. Accuracy: 88.88888549804688
Epoch: 2000. Loss: 1.2732056379318237. Accuracy: 91.66666412353516
Epoch: 2500. Loss: 1.259815752506256. Accuracy: 97.22222137451172
Finished Training


In [374]:
correct = 0
total = 0
iter = 0

val_tensor = torch.utils.data.TensorDataset(torch.tensor(X_val.values.astype(np.float32)) , torch.tensor(y_val.values.astype(np.int32)).type(torch.LongTensor)) 
val_loader = torch.utils.data.DataLoader(dataset = val_tensor, batch_size = batch_size, shuffle = False)

for inputs, labels in val_loader:
    iter+=1
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    accuracy = 100 * correct / total
    print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

Iteration: 1. Loss: 0.9089741110801697. Accuracy: 90.625
Iteration: 2. Loss: 0.9089741110801697. Accuracy: 91.66666412353516


In [375]:
correct = 0
total = 0
iter = 0
test_tensor = torch.utils.data.TensorDataset(torch.tensor(X_test.values.astype(np.float32)) , torch.tensor(y_test.values.astype(np.int32)).type(torch.LongTensor)) 
test_loader = torch.utils.data.DataLoader(dataset = test_tensor, batch_size = batch_size, shuffle = False)

for inputs, labels in test_loader:
    iter+=1
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    accuracy = 100 * correct / total

    print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

Iteration: 1. Loss: 0.9089741110801697. Accuracy: 87.5
Iteration: 2. Loss: 0.9089741110801697. Accuracy: 86.11111450195312


In [378]:
torch.save(model.state_dict(), "/content/model")

In [387]:
def count_accuracy_on_file(root_to_the_file):
  #load model
  model.load_state_dict(torch.load("/content/model"))

  #read from file
  df = pd.read_csv(root_to_the_file, delimiter=',')
  df.Sex=df.Sex.replace({"M":0,"F":1})
  df.BP=df.BP.replace({"NORMAL":0,"LOW":1,"HIGH":2})
  df.Cholesterol=df.Cholesterol.replace({"NORMAL":0,"HIGH":1})
  df.Drug=df.Drug.replace({"DrugY":0,"drugC":1,"drugX":2,"drugA":3,"drugB":4})
  X, y = df.iloc[:,:-1], df.iloc[:,-1]
  new_test_tensor = torch.utils.data.TensorDataset(torch.tensor(X.values.astype(np.float32)) , torch.tensor(y.values.astype(np.int32)).type(torch.LongTensor)) 
  new_test_loader = torch.utils.data.DataLoader(dataset = test_tensor, batch_size = batch_size, shuffle = False)

  #count accuracy of the model on the data from the file
  correct = 0
  total = 0
  iter = 0
  tot_acc = 0
  tot_los = 0
  for inputs, labels in new_test_loader:
    iter+=1
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    accuracy = 100 * correct / total
    tot_acc += accuracy
    tot_los += loss.item()
  
  # Print Loss
  print('Loss: {}. Accuracy: {}'.format(tot_los/iter, tot_acc/iter))

  return accuracy


In [388]:
#write root to the file below
filename = '/content/hwdataset.csv'

count_accuracy_on_file(filename)

Loss: 0.9089741110801697. Accuracy: 86.80555725097656


tensor(86.1111)