In [1]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from torch.autograd import Variable
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader

In [2]:
dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU

In [3]:
sortedFeatureCorr = pd.read_csv("dataset/corr.csv")
data = pd.read_csv("dataset/preprocessed.csv")

In [4]:
first_feature_count =           22
last_feature_count =            21

In [5]:
a = sortedFeatureCorr.iloc[:, 0].values

In [6]:
for i in range(first_feature_count , sortedFeatureCorr.shape[0] - last_feature_count):
    data = data.drop(a[i], axis=1)
data=data.drop(list(data.columns)[0], axis=1)


In [7]:
data_npWNan = data.iloc[:, :].values

data_np = data_npWNan[data_npWNan[:,-1]!= -1 ]
unique_elements, counts_elements = np.unique(data_np[:,-1], return_counts=True)
print("Frequency of unique values of the said array:")
print(np.asarray((unique_elements, counts_elements)))

Frequency of unique values of the said array:
[[     1      2      3      4      5]
 [ 78815  87257 136412 183844 275766]]


In [8]:
feature = data_np[:, :-1]
# target = data_np[:, -1].reshape(data_np.shape[0], 1)
target = data_np[:, -1]


In [9]:
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight('balanced', np.unique(target), target)
class_weights = torch.from_numpy(class_weights)
class_weights

tensor([1.9339, 1.7468, 1.1173, 0.8291, 0.5527], dtype=torch.float64)

In [10]:
target -= 1


In [11]:
X_trainAndVal, X_test, y_trainAndVal, y_test = train_test_split(feature, target, test_size = 0.2, random_state = 0)

In [12]:
X_train, X_val, y_train, y_val = train_test_split(X_trainAndVal, y_trainAndVal, test_size = 0.2, random_state = 0)

In [13]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_val = sc.transform(X_val)
X_test = sc.transform(X_test)

In [14]:
batch_size = 500
epochs = 50
input_dim = first_feature_count + last_feature_count - 1
output_dim = 5
lr_rate = 0.001

In [15]:
class datasetLoad(Dataset):
    def __init__(self, features,labels):
        self.features = features
        self.labels = labels
    def __len__(self):
        return len(self.features)
    def __getitem__(self, index):
        return self.features[index], self.labels[index]

In [16]:
X_train = datasetLoad(X_train, y_train)
X_val = datasetLoad(X_val, y_val)
X_test = datasetLoad(X_test, y_test)

In [17]:
train_loader = torch.utils.data.DataLoader(dataset = X_train, batch_size = batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset = X_val, batch_size = batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset = X_test, batch_size = batch_size, shuffle=False)

In [18]:
import torch.nn as nn
import torch.nn.functional as F
class neuralNetwork(torch.nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(neuralNetwork, self).__init__()
        
        self.hidden1 = nn.Linear(input_dim, hidden1_dim)
        self.bnhidden1 = nn.BatchNorm1d(hidden1_dim)
        
        self.hidden2 = nn.Linear(hidden1_dim, hidden2_dim)
        self.bnhidden2 = nn.BatchNorm1d(hidden2_dim)
        
        self.output = nn.Linear(hidden2_dim, output_dim)

    def forward(self, x):
        
        x = self.hidden1(x)
#         x = F.relu(x)
#         x = F.elu_(x, alpha=1.) 
        x = F.leaky_relu_(self.bnhidden1(x), negative_slope=0.01)
    
        x = self.hidden2(x)
        x = F.leaky_relu_(self.bnhidden2(x), negative_slope=0.01)
        
        
        
        outputs = self.output(x)
        return outputs

In [19]:
model = neuralNetwork(input_dim, 20,20, output_dim)

In [20]:
criterion = torch.nn.CrossEntropyLoss(weight=class_weights.float())

In [21]:
# sgd ya da adam kullanilabilir
# optimizer = torch.optim.SGD(model.parameters(), lr=lr_rate)
from torch.optim.lr_scheduler import LambdaLR
from torch.optim.lr_scheduler import MultiStepLR
optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9)

# learning rate'in 20 ve 40. epochta 10da 1'ine dusmesini sagladim
scheduler = MultiStepLR(optimizer, milestones=[5,10,20,40], gamma=0.4)

In [22]:
import datetime
a = datetime.datetime.now().replace(microsecond=0)
for epoch in range(epochs):
    for i, (features, labels) in enumerate(train_loader):
        

#         print(type(features))
#         print(type(labels))
        features = Variable(features)
        labels = Variable(labels)
#         print(type(features))
#         print(type(labels))
#         print(features.shape)
#         print(labels.shape)

        optimizer.zero_grad()
        outputs = model(features.float())
#         print("output ",outputs.shape)
#         print("label ",labels.shape)
#         print("output ",outputs[0])
#         print("label ",labels[0])
        loss = criterion(outputs.float(), labels)
        loss.backward()
        optimizer.step()

        
        if i % (len(X_train)// (4 * batch_size))  == 0:
            # calculate Accuracy
            correct = 0
            total = 0
            for features, labels in val_loader:
                features = Variable(features)
                outputs = model(features.float())
                _, predicted = torch.max(outputs.data, 1)
                total+= labels.size(0)
                # for gpu, bring the predicted and labels back to cpu fro python operations to work
                correct+= (predicted == labels).sum()
            accuracy = 100 * correct/total
            if(epoch % 5 == 0 and i == 0):
                print("Epoch: {}. Iteration: {}. Loss: {}. Accuracy: {}.".format(epoch, i, loss.item(), accuracy))

Epoch: 0. Iteration: 0. Loss: 1.6847076416015625. Accuracy: 18.
Epoch: 0. Iteration: 243. Loss: 1.3781368732452393. Accuracy: 39.
Epoch: 0. Iteration: 486. Loss: 1.370155692100525. Accuracy: 40.
Epoch: 0. Iteration: 729. Loss: 1.3735897541046143. Accuracy: 40.
Epoch: 0. Iteration: 972. Loss: 1.357505202293396. Accuracy: 40.
Epoch: 1. Iteration: 0. Loss: 1.3236922025680542. Accuracy: 41.
Epoch: 1. Iteration: 243. Loss: 1.3903530836105347. Accuracy: 41.
Epoch: 1. Iteration: 486. Loss: 1.3614193201065063. Accuracy: 40.
Epoch: 1. Iteration: 729. Loss: 1.3563145399093628. Accuracy: 40.
Epoch: 1. Iteration: 972. Loss: 1.3353302478790283. Accuracy: 40.
Epoch: 2. Iteration: 0. Loss: 1.3200013637542725. Accuracy: 39.
Epoch: 2. Iteration: 243. Loss: 1.3373721837997437. Accuracy: 41.
Epoch: 2. Iteration: 486. Loss: 1.3484879732131958. Accuracy: 40.
Epoch: 2. Iteration: 729. Loss: 1.3249375820159912. Accuracy: 40.
Epoch: 2. Iteration: 972. Loss: 1.333161473274231. Accuracy: 40.
Epoch: 3. Iteration

Epoch: 25. Iteration: 0. Loss: 1.4077187776565552. Accuracy: 41.
Epoch: 25. Iteration: 243. Loss: 1.3704936504364014. Accuracy: 41.
Epoch: 25. Iteration: 486. Loss: 1.3154747486114502. Accuracy: 41.
Epoch: 25. Iteration: 729. Loss: 1.3121947050094604. Accuracy: 41.
Epoch: 25. Iteration: 972. Loss: 1.3786165714263916. Accuracy: 39.
Epoch: 26. Iteration: 0. Loss: 1.3229299783706665. Accuracy: 40.
Epoch: 26. Iteration: 243. Loss: 1.2954442501068115. Accuracy: 41.
Epoch: 26. Iteration: 486. Loss: 1.3598051071166992. Accuracy: 41.
Epoch: 26. Iteration: 729. Loss: 1.3671915531158447. Accuracy: 40.
Epoch: 26. Iteration: 972. Loss: 1.3609373569488525. Accuracy: 41.
Epoch: 27. Iteration: 0. Loss: 1.3556997776031494. Accuracy: 40.
Epoch: 27. Iteration: 243. Loss: 1.3732348680496216. Accuracy: 40.
Epoch: 27. Iteration: 486. Loss: 1.3496559858322144. Accuracy: 41.
Epoch: 27. Iteration: 729. Loss: 1.318342924118042. Accuracy: 41.
Epoch: 27. Iteration: 972. Loss: 1.370898962020874. Accuracy: 41.
Epo

Epoch: 49. Iteration: 972. Loss: 1.3058476448059082. Accuracy: 40.


In [23]:
b = datetime.datetime.now().replace(microsecond=0)
print(b-a)



0:09:39
