In [16]:
import torch
from torch.autograd import Variable
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from sklearn.model_selection import train_test_split
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score

In [17]:
device  = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [18]:
from sklearn.datasets import make_moons

X,y = make_moons((9000,1000),noise=0.1,)

In [4]:
# from sklearn.datasets import make_classification
# X,y = make_classification(n_samples=10000, n_features=2, n_informative=2, 
#                     n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2,
#                           class_sep=2,
#                    flip_y=0,weights=[0.5,0.5], random_state=17)



# f, (ax1,ax2) = plt.subplots(nrows=1, ncols=2,figsize=(20,8))
# sns.scatterplot(X[:,0],X[:,1],hue=y,ax=ax1);
# ax1.set_title("No Imbalance");

# X,y = make_classification(n_samples=1000, n_features=2, n_informative=2, 
#                     n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2,
#                           class_sep=2,
#                    flip_y=0,weights=[0.99,0.01], random_state=17)
# sns.scatterplot(X[:,0],X[:,1],hue=y,ax=ax2);
# ax2.set_title("Imbalance 9:1 :: Negative:Postive");

# plt.show();

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42,stratify=y)

train_dataset = torch.utils.data.TensorDataset(torch.Tensor(X_train),torch.Tensor(y_train).long())
test_dataset = torch.utils.data.TensorDataset(torch.Tensor(X_test),torch.Tensor(y_test).long())

In [20]:
np.unique(y_test,return_counts=True)

(array([0, 1]), array([2970,  330]))

In [21]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=20, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

In [22]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim)

    def forward(self, x):
        outputs = self.linear(x)
        return outputs

In [23]:
batch_size = 100
n_iters = 3000
epochs = n_iters / (len(X) / batch_size)
input_dim = 2
output_dim = 2
lr_rate = 0.001

In [24]:
def my_loss(pred, true, weight_list, device):

    values = torch.unique(true,return_counts = True)[0].cpu().numpy()
    counts = torch.unique(true,return_counts = True)[1].cpu().numpy()
    count_dict = {values[i]:counts[i] for i in range(len(values))}
    if (0 not in count_dict.keys()):
      count_dict[0] = 0
    if (1 not in count_dict.keys()):
      count_dict[1] = 0

    n1,n2 = count_dict[0],count_dict[1]
    w1,w2 = weight_list
    loss1 = torch.nn.CrossEntropyLoss(weight=torch.tensor([w1,w2], dtype=torch.float32).to(device))
    return (loss1(pred, true) * (torch.tensor(w1) + torch.tensor(w2)))/((torch.tensor(n1)*torch.tensor(w1)) + (torch.tensor(n2)*torch.tensor(w2)))

In [25]:
torch.manual_seed(0)
import random
random.seed(0)
import numpy as np
np.random.seed(0)

In [26]:
model = LogisticRegression(input_dim, output_dim)

In [27]:
criterion = my_loss

In [28]:
optimizer = torch.optim.SGD(model.parameters(), lr=lr_rate)

In [33]:
iter = 0
for epoch in range(int(epochs)):
    for i, (images, labels) in enumerate(train_loader):
        labels = labels.long()
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels, [1,9], device)
        loss.backward()
        optimizer.step()

        iter+=1
        if iter%500==0:
            # calculate Accuracy
            correct = 0
            total = 0
            for images, labels in train_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total+= labels.size(0)
                # for gpu, bring the predicted and labels back to cpu fro python operations to work
                correct+= (predicted == labels).sum()
            tr_accuracy = 100 * correct/total
            predictions = []
            test_labels = []
            for images, labels in test_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total+= labels.size(0)
                # for gpu, bring the predicted and labels back to cpu fro python operations to work
                correct+= (predicted == labels).sum()
                predictions.extend(predicted)
                test_labels.extend(labels)
            accuracy = 100 * correct/total
            f1_score_val = f1_score(test_labels, predictions)
            print("Iteration: {:05d} | Train Loss: {:.4f} | Train Accuracy: {:.4f} | Accuracy: {:.4f} | F1_score: {:.4f}".
                  format(iter,loss.item(),tr_accuracy, accuracy, f1_score_val))

Iteration: 00500 | Train Loss: 0.1174 | Train Accuracy: 90.6269 | Accuracy: 90.5600 | F1_score: 0.6317
Iteration: 01000 | Train Loss: 0.1171 | Train Accuracy: 90.5522 | Accuracy: 90.5000 | F1_score: 0.6310
Iteration: 01500 | Train Loss: 0.0999 | Train Accuracy: 90.5224 | Accuracy: 90.4700 | F1_score: 0.6302
Iteration: 02000 | Train Loss: 0.1277 | Train Accuracy: 90.4776 | Accuracy: 90.4300 | F1_score: 0.6295
Iteration: 02500 | Train Loss: 0.1490 | Train Accuracy: 90.4478 | Accuracy: 90.4100 | F1_score: 0.6295
Iteration: 03000 | Train Loss: 0.0625 | Train Accuracy: 90.4328 | Accuracy: 90.4000 | F1_score: 0.6295
Iteration: 03500 | Train Loss: 0.1067 | Train Accuracy: 90.4328 | Accuracy: 90.3900 | F1_score: 0.6279
Iteration: 04000 | Train Loss: 0.1583 | Train Accuracy: 90.3582 | Accuracy: 90.3300 | F1_score: 0.6272
Iteration: 04500 | Train Loss: 0.1445 | Train Accuracy: 90.3582 | Accuracy: 90.3200 | F1_score: 0.6265
Iteration: 05000 | Train Loss: 0.0412 | Train Accuracy: 90.3433 | Accurac

In [55]:
torch.manual_seed(0)
import random
random.seed(0)
import numpy as np
np.random.seed(0)

In [56]:
model1 = LogisticRegression(input_dim, output_dim)

In [57]:
criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor((1,9), dtype=torch.float32).to(device))

In [58]:
optimizer = torch.optim.SGD(model1.parameters(), lr=lr_rate)

In [59]:
iter = 0
for epoch in range(int(epochs)):
    for i, (images, labels) in enumerate(train_loader):
        labels = labels.long()
        optimizer.zero_grad()
        outputs = model1(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        iter+=1
        if iter%500==0:
            # calculate Accuracy
            correct = 0
            total = 0
            for images, labels in train_loader:
                outputs = model1(images)
                _, predicted = torch.max(outputs.data, 1)
                total+= labels.size(0)
                # for gpu, bring the predicted and labels back to cpu fro python operations to work
                correct+= (predicted == labels).sum()
            tr_accuracy = 100 * correct/total
            predictions = []
            test_labels = []
            for images, labels in test_loader:
                outputs = model1(images)
                _, predicted = torch.max(outputs.data, 1)
                total+= labels.size(0)
                # for gpu, bring the predicted and labels back to cpu fro python operations to work
                correct+= (predicted == labels).sum()
                predictions.extend(predicted)
                test_labels.extend(labels)
            accuracy = 100 * correct/total
            f1_score_val = f1_score(test_labels, predictions)
            print("Iteration: {:05d} | Train Loss: {:.4f} | Train Accuracy: {:.4f} | Accuracy: {:.4f} | F1_score: {:.4f}".
                  format(iter,loss.item(),tr_accuracy, accuracy, f1_score_val))

Iteration: 00500 | Train Loss: 0.6496 | Train Accuracy: 75.2388 | Accuracy: 75.0300 | F1_score: 0.3783
Iteration: 01000 | Train Loss: 0.5271 | Train Accuracy: 80.8060 | Accuracy: 80.4400 | F1_score: 0.4463
Iteration: 01500 | Train Loss: 0.4101 | Train Accuracy: 85.1642 | Accuracy: 85.0100 | F1_score: 0.5311
Iteration: 02000 | Train Loss: 0.5463 | Train Accuracy: 87.6418 | Accuracy: 87.4700 | F1_score: 0.5771
Iteration: 02500 | Train Loss: 0.5361 | Train Accuracy: 88.6567 | Accuracy: 88.4700 | F1_score: 0.5936
Iteration: 03000 | Train Loss: 0.3250 | Train Accuracy: 88.5373 | Accuracy: 88.4500 | F1_score: 0.5956
Iteration: 03500 | Train Loss: 0.4099 | Train Accuracy: 88.2687 | Accuracy: 88.2800 | F1_score: 0.5954
Iteration: 04000 | Train Loss: 0.5511 | Train Accuracy: 87.9701 | Accuracy: 87.9800 | F1_score: 0.5884
Iteration: 04500 | Train Loss: 0.4340 | Train Accuracy: 87.9701 | Accuracy: 87.9500 | F1_score: 0.5848
Iteration: 05000 | Train Loss: 0.2449 | Train Accuracy: 87.8955 | Accurac

In [50]:
torch.manual_seed(0)
import random
random.seed(0)
import numpy as np
np.random.seed(0)

In [51]:
model2 = LogisticRegression(input_dim, output_dim)

In [52]:
criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor((1,9), dtype=torch.float32).to(device))

In [53]:
optimizer = torch.optim.Adam(model2.parameters(), lr=lr_rate)

In [54]:
iter = 0
for epoch in range(int(epochs)):
    for i, (images, labels) in enumerate(train_loader):
        labels = labels.long()
        optimizer.zero_grad()
        outputs = model2(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        iter+=1
        if iter%500==0:
            # calculate Accuracy
            correct = 0
            total = 0
            for images, labels in train_loader:
                outputs = model2(images)
                _, predicted = torch.max(outputs.data, 1)
                total+= labels.size(0)
                # for gpu, bring the predicted and labels back to cpu fro python operations to work
                correct+= (predicted == labels).sum()
            tr_accuracy = 100 * correct/total
            predictions = []
            test_labels = []
            for images, labels in test_loader:
                outputs = model2(images)
                _, predicted = torch.max(outputs.data, 1)
                total+= labels.size(0)
                # for gpu, bring the predicted and labels back to cpu fro python operations to work
                correct+= (predicted == labels).sum()
                predictions.extend(predicted)
                test_labels.extend(labels)
            accuracy = 100 * correct/total
            f1_score_val = f1_score(test_labels, predictions)
            print("Iteration: {:05d} | Train Loss: {:.4f} | Train Accuracy: {:.4f} | Accuracy: {:.4f} | F1_score: {:.4f}".
                  format(iter,loss.item(),tr_accuracy, accuracy, f1_score_val))

Iteration: 00500 | Train Loss: 0.4900 | Train Accuracy: 88.1493 | Accuracy: 88.0300 | F1_score: 0.5744
Iteration: 01000 | Train Loss: 0.2515 | Train Accuracy: 89.8955 | Accuracy: 89.8400 | F1_score: 0.6254
Iteration: 01500 | Train Loss: 0.1621 | Train Accuracy: 89.4328 | Accuracy: 89.4000 | F1_score: 0.6123
Iteration: 02000 | Train Loss: 0.5454 | Train Accuracy: 89.2388 | Accuracy: 89.2000 | F1_score: 0.6059
Iteration: 02500 | Train Loss: 0.5545 | Train Accuracy: 89.6119 | Accuracy: 89.5000 | F1_score: 0.6093
Iteration: 03000 | Train Loss: 0.1411 | Train Accuracy: 89.7164 | Accuracy: 89.5700 | F1_score: 0.6093
Iteration: 03500 | Train Loss: 0.2545 | Train Accuracy: 89.8806 | Accuracy: 89.7900 | F1_score: 0.6176
Iteration: 04000 | Train Loss: 0.6825 | Train Accuracy: 89.8806 | Accuracy: 89.8400 | F1_score: 0.6228
Iteration: 04500 | Train Loss: 0.2525 | Train Accuracy: 90.0597 | Accuracy: 90.0300 | F1_score: 0.6285
Iteration: 05000 | Train Loss: 0.0870 | Train Accuracy: 90.0448 | Accurac