<a href="https://colab.research.google.com/github/STEMlib/deeplearning/blob/main/LogRegColab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [153]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import transforms, datasets
import matplotlib.pyplot as plt
import numpy as np


def class_weights(dataset):    
    '''
    Creates weights for each class. 
    Used to balance sampling
    '''       
    target = dataset.targets
    class_sample_count = np.unique(target, return_counts=True)[1]
    weight = 1. / class_sample_count
    samples_weight = np.array([weight[t] for t in target])
    samples_weight = torch.from_numpy(samples_weight)
    samples_weight = weight[target]
    return samples_weight


def balance_checker(dataset):
    '''
    Checks the balance of the classes
    '''
    total = 0
    counter_dict = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0}
    for data in trainset:
        Xs, ys = data
        for y in ys:
            counter_dict[int(y)] += 1
            total += 1
    for i in counter_dict:
        print(f"{i}: {counter_dict[i]/total*100.0}%")


def import_data():
    '''
    Classes defined as
    airplane : 0
    automobile : 1
    bird : 2
    cat : 3
    deer : 4
    dog : 5
    frog : 6
    horse : 7
    ship : 8
    truck : 9
    '''
    chosen_class = 3

    # import data
    transform_train = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))])
    train = torchvision.datasets.CIFAR10('./data',train=True,download=False,transform = transform_train)
    test = torchvision.datasets.CIFAR10('./data',train=False,download=False,transform = transform_train)

    train.targets = [(target == chosen_class)*1 for target in train.targets] # convert train target to [0,1] for chosen class
    test.targets = [(target == chosen_class)*1 for target in test.targets] # convert test targets to [0,1] for chosen class

    # For unbalanced dataset we create a weighted sampler   
    ## train set                    
    train_weights = class_weights(train)                                         
    train_sampler = torch.utils.data.sampler.WeightedRandomSampler(train_weights, len(train_weights))      
    trainset = torch.utils.data.DataLoader(train, batch_size = 32, sampler = train_sampler)
    print("\n ~~~ Train Set Balance ~~~ \n")
    balance_checker(trainset)
    ## test set
    test_weights = class_weights(test)                                                                                                     
    test_sampler = torch.utils.data.sampler.WeightedRandomSampler(test_weights, len(test_weights)) 
    testset = torch.utils.data.DataLoader(test, batch_size = 32, sampler = test_sampler)
    print("\n ~~~ Test Set Balance ~~~ \n")
    balance_checker(testset)

    
    return trainset, testset
    
class logreg_net(nn.Module):
    '''
    Logistic Regression using a neural net
      - One input layer
      - One hidden layer
      - One output layer
    '''

    #initialize
    def __init__(self):    
        super().__init__()
        self.layer1 = nn.Linear(3*32*32, 64)
        self.layer2 = nn.Linear(64, 64)
        self.layer3 = nn.Linear(64, 2)


    # forward
    def forward(self,x):        
        x = F.relu(self.layer1(x)) 
        x = F.relu(self.layer2(x))
        x = self.layer3(x)
        return F.log_softmax(x,dim=1)


def train_model(num_epoches):
    # train
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = logreg_net().to(device)
    optimizer = optim.Adam(model.parameters(),lr=0.0005)
    EPOCHS = num_epoches
    for epoch in range(EPOCHS):
        for data in trainset:
            X, y = data
            model.zero_grad()
            output = model(X.view(-1,img_shape).to(device))
            loss = F.nll_loss(output,y.to(device))
            loss.backward()
            optimizer.step()
        print("Loss = ", loss)


def eval_model():
    # Evaluate
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = logreg_net().to(device)
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testset:
            X, y = data
            output = model(X.view(-1,img_shape).to(device))
            #print(output)
            for idx, i in enumerate(output):
                #print(torch.argmax(i), y[idx])
                if torch.argmax(i) == y[idx]:
                    correct += 1
                total += 1
    print("Accuracy: ", round(correct/total, 3))


def main():

    print("Device Count: ",torch.cuda.device_count())
    print("Device Name: ", torch.cuda.get_device_name(0))

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    trainset, testset = import_data()
    train_model(num_epoches=20)
    eval_model(model=model)
    


# Import Data (CIFAR10)

In [135]:
print("Device Count: ",torch.cuda.device_count())
print("Device Name: ", torch.cuda.get_device_name(0))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
trainset, testset = import_data()


Device Count:  1
Device Name:  Tesla T4

 ~~~ Train Set Balance ~~~ 

0: 50.086%
1: 49.913999999999994%
2: 0.0%
3: 0.0%
4: 0.0%
5: 0.0%
6: 0.0%
7: 0.0%
8: 0.0%
9: 0.0%

 ~~~ Test Set Balance ~~~ 

0: 49.826%
1: 50.174%
2: 0.0%
3: 0.0%
4: 0.0%
5: 0.0%
6: 0.0%
7: 0.0%
8: 0.0%
9: 0.0%


# Train Model

In [154]:
train_model(num_epoches=7)

Loss =  tensor(0.4088, device='cuda:0', grad_fn=<NllLossBackward>)
Loss =  tensor(0.5285, device='cuda:0', grad_fn=<NllLossBackward>)
Loss =  tensor(0.2246, device='cuda:0', grad_fn=<NllLossBackward>)
Loss =  tensor(0.2711, device='cuda:0', grad_fn=<NllLossBackward>)
Loss =  tensor(0.2632, device='cuda:0', grad_fn=<NllLossBackward>)
Loss =  tensor(0.3271, device='cuda:0', grad_fn=<NllLossBackward>)
Loss =  tensor(0.1764, device='cuda:0', grad_fn=<NllLossBackward>)


# Evaluate Model

In [155]:
eval_model()

Accuracy:  0.51
