The objective of this task is to predict whether it will rain or not based on Month, cloud cover, humidity, pressure, radiation, sunshine, and temperature (avg, min and max)

In [170]:
import numpy as np
import matplotlib.pyplot as plt
import csv

np.random.seed(0)

In [171]:
""" IMPORTING DATA """

def import_data(file_name):
    # temporarily store data in python lists
    data = []
    labels = []
    classes = 2

    with open(file_name, 'r') as csvfile:
        csvreader = csv.reader(csvfile)

        # skip first row because it has labels
        next(csvreader)
        data_indices = np.arange(1, 11)
        data_indices = np.delete(data_indices, 5)
        class_index = 6

        for row in csvreader:
            data.append([row[i] for i in data_indices])
            labels.append(float(row[class_index]))

    data_array = np.array(data)
    data_array = data_array.astype(np.float32)
    label_array = np.zeros((len(labels), classes))

    # add bias
    bias = np.ones((len(data_array), 1))
    data_array = np.hstack((bias, data_array))
    
    # set one hot encoding
    for i, label in enumerate(labels):
        if label > 0.01:
            label = 1
        else:
            label = 0
        label_array[i][label] = 1

    class_a_count = np.sum(label_array[:, 0])
    class_b_count = np.sum(label_array[:, 1])
    # baseline accuracy is the percentage of majority class, which is accuracy score we compare to indicate the model actually learned something useful
    print(f'label statistics: class A {class_a_count}, class B {class_b_count}, baseline accuracy: {np.max(np.array([class_a_count, class_b_count]))/(len(data_array))}')
    return data_array, label_array

def split_data(X, Y, test_ratio):
    num_test = int(len(X) * test_ratio)
    test_indices = np.random.choice(len(X), num_test, replace=False)
    Xtest = X[test_indices, :]
    Ytest = Y[test_indices, :]

    X = np.delete(X, test_indices, axis=0)
    Y = np.delete(Y, test_indices, axis=0)
    return X, Y, Xtest, Ytest

In [189]:
class NeuralNetwork():

    def __init__(self, layer_sizes):
        self.weights = []

        # initializing weights to small random values
        for i in range(1, len(layer_sizes)):
            self.weights.append(np.random.rand(layer_sizes[i], layer_sizes[i - 1]) * 0.1)
    
    def tanh(self, x):
        return np.tanh(x)

    def d_tanh(self, x):
        return 1 - np.power(np.tanh(x), 2)

    # returns the final classification result
    def predict(self, X):
        A_0 = X.T
        A_1 = self.weights[0] @ A_0
        A_1 = self.tanh(A_1)
        A_2 = self.weights[1] @ A_1
        A_2 = A_2.T
        return A_2
    
    # returns full history of forward so we can perform backpropagation
    def forward(self, X):
        A_0 = X
        A_1 = self.weights[0] @ A_0
        A_1 = self.tanh(A_1)
        A_2 = self.weights[1] @ A_1
        return (A_0, A_1, A_2)
    
    def backward(self, X, Y):
        A_0 = X
        A_0 = A_0.reshape((len(A_0), 1))

        # forward propagation
        A_0, A_1, A_2 = self.forward(A_0)
        #print(f'shape: a0: {A_0.shape} a1: {A_1.shape} a2: {A_2.shape} y: {Y.shape}')

        # backward: error of output layer
        Y = Y.reshape((len(Y), 1))
        dA_2 = A_2 - Y

        # backward: compute weight gradient of layers
        dW_1 = dA_2 @ A_1.T
        dA_1 = self.weights[1].T @ dA_2 * self.d_tanh(A_1)
        dW_0 = dA_1 @ A_0.T

        # final gradient
        weight_gradients = [dW_0, dW_1]
        return weight_gradients
    
    
    def train(self, X, Y, total_epoch, learning_rate=0.03, learning_rate_decay=0.8):
        print("beginning training")
        for epoch in range(total_epoch):

            # variable learning rate adjustment
            # TODO: better decay algorithm, such as checking error slope
            if epoch % (total_epoch // 10) == 0:
                learning_rate *= learning_rate_decay
                print(f"epoch {epoch} with learning rate {np.around(learning_rate, 4)}")

            print(f'hstack shape: {np.hstack((X, Y)).shape}')
            all_weight_gradients = np.apply_along_axis(lambda x: self.backward(x[:len(X[0])], x[len(X[0]):]), 1, np.hstack((X, Y)))
            print(f'all weight gradients shape: {all_weight_gradients.shape}')
            continue

            for i in range(0, len(X)):

                weight_gradients = self.backward(X[i], Y[i])
                # subtract the gradient from the weights
                for j in range(len(self.weights)):
                    self.weights[j] -= learning_rate * weight_gradients[j]

            if epoch % (total_epoch // 10) == 0:
                accuracy = self.test(X, Y)
                print(f'training accuracy: {np.around(accuracy, 5)}')
            

    def test(self, X, Y):
        target = np.argmax(Y, axis=1)
        predict = np.argmax(self.predict(X), axis=1)
        num_incorrect = np.sum(target != predict)
        accuracy = 1 - (num_incorrect/len(X))
        self.confusion(X, Y)
        return accuracy
    
    def confusion(self, X, Y):
        confusion_matrix = np.zeros((len(Y[0]), len(Y[0])))
        target = np.argmax(Y, axis=1)
        predict = np.argmax(self.predict(X), axis=1)
        for i in range(len(X)):
            confusion_matrix[target[i], predict[i]] += 1
        print(f'confusion matrix: \n{confusion_matrix}')

In [190]:
"""TESTING"""

file = '../../Data/weather/weather_prediction_dataset.csv'
X, Y = import_data(file)

def custom_nn():
    Xtrain, Ytrain, Xtest, Ytest = split_data(X, Y, 0.2)
    print(f'Shapes: X: {Xtrain.shape}, Y: {Ytrain.shape}, Xtest: {Xtest.shape}, Ytest: {Ytest.shape}')
    network1 = NeuralNetwork([len(Xtrain[0]), 50, len(Ytrain[0])])
    total_epoch = 100
    network1.train(Xtrain, Ytrain, total_epoch, 0.01, 0.6)

    accuracy = network1.test(Xtest, Ytest)
    print(f"TESTING ACCURACY: {accuracy}")

custom_nn()

label statistics: class A 2079.0, class B 1575.0, baseline accuracy: 0.5689655172413793
Shapes: X: (2924, 10), Y: (2924, 2), Xtest: (730, 10), Ytest: (730, 2)
beginning training
epoch 0 with learning rate 0.006
hstack shape: (2924, 12)


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

Below I will be implementing the same classification task but with PyTorch. I will be using a fully connected neural network with 2 hidden layers. This is done by creating an object that inherents the nn.Module object and defining a forward function. My implementation allows the neural network to be initialized with an array specifing the dimensions of each layer, with index 0 being the input and last index the output.

The following resources were used to help me write this code:
https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html (functions to call for initializing network, backpropagation, updating the weights and for inferencing)
https://medium.com/analytics-vidhya/creating-a-custom-dataset-and-dataloader-in-pytorch-76f210a1df5d (creating dataset and dataloader objects to load my dataset)
https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html (using the cross entropy loss for classification)

Additionally, the train_test_split function from sklearn is used to split the data between testing and training randomly.

In [174]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

device: cpu


In [175]:
class SimpleDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [176]:
class TorchNetwork(nn.Module):

    def __init__(self, dimensions):
        super().__init__()

        self.dimensions = dimensions
        self.layer_list = nn.ModuleList()
        for layer_num in range(len(dimensions) - 1):
            self.layer_list.append(nn.Linear(dimensions[layer_num], dimensions[layer_num + 1]))

    def forward(self, x):
        x = x.view(-1, self.dimensions[0])
        for i in range(len(self.layer_list)):
            x = F.relu(self.layer_list[i](x))
        return x

In [177]:
def test(net, loader, device):
    net.eval()
    correct = 0
    confusion = None

    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)

            output = net(data)

            output_np = output.cpu().detach().numpy()
            answer = target.cpu().detach().numpy()
            correct += get_num_correct(output_np, answer)

            if confusion is None:
                confusion = get_confusion_matrix(output_np, answer)
            else:
                confusion += get_confusion_matrix(output_np, answer)
    
    accuracy = (correct / len(loader.dataset))
    print(f'confusion matrix: \n{confusion}')
    return accuracy


def get_num_correct(results, Y):
    target = np.argmax(Y, axis=1)
    predict = np.argmax(results, axis=1)
    num_incorrect = np.sum(target != predict)
    return len(results) - num_incorrect


def get_confusion_matrix(results, Y):
    confusion_matrix = np.zeros((len(Y[0]), len(Y[0])))
    target = np.argmax(Y, axis=1)
    predict = np.argmax(results, axis=1)
    for i in range(len(results)):
        confusion_matrix[target[i], predict[i]] += 1
    return confusion_matrix


def train(net, loader, optimizer, device):
    criterion = nn.CrossEntropyLoss()
    net.train()

    for data, target in loader:

        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        output = net(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

    accuracy = test(net, loader, device)
    print(f'\ttraining accuracy: {accuracy}')

In [178]:
def torch_nn():
    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.1, random_state=0)
    print(f'Shapes: Xtrain: {Xtrain.shape}, Ytrain: {Ytrain.shape}, Xtest: {Xtest.shape}, Ytest: {Ytest.shape}')
    
    # hyperparameters
    dimensions = [len(Xtrain[0]), 5, 5, len(Ytrain[0])]
    train_batch_size = 10
    test_batch_size = 10
    total_epoch = 10
    learning_rate = 0.01

    # PREPARING DATA

    # Convert to torch tensors
    Xtrain = torch.tensor(Xtrain, dtype=torch.float32)
    Ytrain = torch.tensor(Ytrain, dtype=torch.float32)
    Xtest = torch.tensor(Xtest, dtype=torch.float32)
    Ytest = torch.tensor(Ytest, dtype=torch.float32)

    # Create Dataset objects, then create torch dataloader
    train_dataset = SimpleDataset(Xtrain, Ytrain)
    test_dataset = SimpleDataset(Xtest, Ytest)

    train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

    # CREATING NEURAL NETWORK OBJECT
    network = TorchNetwork(dimensions)
    network = network.to(device)
    optimizer = optim.Adam(network.parameters(), lr=learning_rate)

    # TRAINING
    for epoch in range(total_epoch):
        print(f'Training epoch: {epoch}')
        train(network, train_loader, optimizer, device)

    # TESTING
    print("Testing")
    accuracy = test(network, test_loader, device)
    print(f'TEST ACCURACY: {accuracy}')
    

torch_nn()

Shapes: Xtrain: (3288, 10), Ytrain: (3288, 2), Xtest: (366, 10), Ytest: (366, 2)
Training epoch: 0


confusion matrix: 
[[1372.  501.]
 [ 352. 1063.]]
	training accuracy: 0.7405717761557178
Training epoch: 1
confusion matrix: 
[[1594.  279.]
 [ 677.  738.]]
	training accuracy: 0.7092457420924574
Training epoch: 2
confusion matrix: 
[[1301.  572.]
 [ 293. 1122.]]
	training accuracy: 0.7369221411192214
Training epoch: 3
confusion matrix: 
[[1379.  494.]
 [ 355. 1060.]]
	training accuracy: 0.7417883211678832
Training epoch: 4
confusion matrix: 
[[1242.  631.]
 [ 263. 1152.]]
	training accuracy: 0.7281021897810219
Training epoch: 5
confusion matrix: 
[[1428.  445.]
 [ 401. 1014.]]
	training accuracy: 0.7427007299270073
Training epoch: 6
confusion matrix: 
[[1293.  580.]
 [ 283. 1132.]]
	training accuracy: 0.7375304136253041
Training epoch: 7
confusion matrix: 
[[1467.  406.]
 [ 455.  960.]]
	training accuracy: 0.7381386861313869
Training epoch: 8
confusion matrix: 
[[1262.  611.]
 [ 266. 1149.]]
	training accuracy: 0.7332725060827251
Training epoch: 9
confusion matrix: 
[[1424.  449.]
 [ 