<a href="https://colab.research.google.com/github/Darshak910/NLP/blob/master/NLP_A01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Regression using a multi layered 1D convolution network in PyTorch**

In [0]:
# from google.colab import drive
# drive.mount('/content/drive')

In [0]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [0]:
ds = pd.read_csv('/content/housing.csv')
# dropping the NA/empty cells
ds = ds.dropna() 

ds.head(10)

In [0]:
Y = ds['median_house_value']
X = ds.loc[:, 'longitude':'median_income']
# X.head()
# Y.head()

**Splitting it into training and testing**

In [643]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)

x_train_np = x_train.to_numpy()
y_train_np = y_train.to_numpy()
x_test_np = x_test.to_numpy()
y_test_np = y_test.to_numpy()

print("Training: " + str(x_train_np.size) + "\nTesting: " + str(x_test_np.size))

Training: 114424
Testing: 49040


**Creating the network**

In [0]:
import torch

from torch.nn import Conv1d
from torch.nn import MaxPool1d
from torch.nn import Flatten
from torch.nn import Linear
from torch.nn.functional import relu

# Importing DataLoader and TensorDataset libraries from PyTorch to work with our datasets
from torch.utils.data import DataLoader, TensorDataset

**Defining our Model**

In [0]:
class CNNRegression(torch.nn.Module):
  
  def __init__(self, batch_size, inputs, outputs):
    super(CNNRegression, self).__init__()
    self.batch_size = batch_size
    self.inputs = inputs
    self.outputs = outputs

    # (input channels, output channels, kernel size)
    self.input_layer = Conv1d(inputs, batch_size, 1)

    # maxpooling layer (kernel size)
    self.max_pooling_layer = MaxPool1d(1)

    # Another convolution layer
    self.conv_layer = Conv1d(batch_size, 32, 1)
    self.max_pooling_layer = MaxPool1d(1)
    self.conv_layer = Conv1d(32, 64, 1)
    self.max_pooling_layer = MaxPool1d(1)
    self.conv_layer = Conv1d(64, 128, 1)
    self.max_pooling_layer = MaxPool1d(1)
    self.flatten_layer = Flatten()

    # (inputs, outputs)
    self.linear_layer = Linear(128, 64)

    # output layer
    self.output_layer = Linear(64, outputs)

  def feed(self, input):
    # reshaping the entry
    # although we are using 1D conv, it still expects a 3D array to process in a 1D fashion
    input = input.reshape((self.batch_size, self.inputs, 1))

    output = relu(self.input_layer(input))

    output = self.max_pooling_layer(output)

    output = relu(self.conv_layer(output))
  
    output = self.flatten_layer(output)

    output = self.linear_layer(output)

    output = self.output_layer(output)
    return output

**Training the model**

In [0]:
from torch.optim import SGD
from torch.optim import Adam
from torch.optim import Adamax
from torch.nn import L1Loss

### !pip install pytorch-ignite
!pip install pytorch-ignite
### importing R^2 package
from ignite.contrib.metrics.regression.r2_score import R2Score

**Defining our model**

In [0]:
batch_size = 64
# (batch_size, X columns, Y columns)
model = CNNRegression(batch_size, X.shape[1], 1)

# setting the mode for GPU
model.cuda()

**Training and testing the model**

In [0]:
def model_loss(model, ds, train = False, optimizer = None):
  performance = L1Loss()
  score_metric = R2Score()

  avg_loss = 0
  avg_score = 0
  count = 0
  for input, output in iter(ds):
    # model's predictions for training model
    predictions = model.feed(input)

    # get model's loss
    loss = performance(predictions, output)

    # get model's R2 score
    score_metric.update([predictions, output])
    score = score_metric.compute()

    if(train):
      # clear any errors
      optimizer.zero_grad()

      # compute gradients for our optimizer
      loss.backward()

      # use the optimizer to update the model's parameters based on the gradients
      optimizer.step()

    # store the loss and update the counter
    avg_loss += loss.item()
    avg_score += score
    count += 1
  
  return avg_loss / count, avg_score / count

In [0]:
epochs = 200

optimizer = Adam(model.parameters(), lr = 1e-3)

# converting training set into torch variables using GPU as floats. Reshape is 
# to remove a warning pytorch outputs
inputs = torch.from_numpy(x_train_np).cuda().float()
outputs = torch.from_numpy(y_train_np.reshape(y_train_np.shape[0], 1)).cuda().float()

# Creating a DataLoader
tensor = TensorDataset(inputs, outputs)
loader = DataLoader(tensor, batch_size, shuffle=True, drop_last=True)
epoch_history = []
r2_history = []
loss_history = []
# Start the training loop
for epoch in range(epochs):
  avg_loss, avg_r2_score = model_loss(model, loader, train=True, optimizer=optimizer)
  epoch_history.append(epoch + 1)
  r2_history.append(avg_r2_score)
  loss_history.append(avg_loss  )
  print("Epoch " + str(epoch + 1) + ":\n\tLoss = " + str(avg_loss) + "\n\tR^2 Score = " + str(avg_r2_score))

In [0]:
plt.plot(epoch_history, r2_history, label='Epoch to R2', color='red')
plt.legend()
plt.show()

plt.plot(epoch_history, loss_history, label='Loss w.r.t Epochs')
plt.legend()
plt.show()

**Testing the model**

In [0]:
inputs = torch.from_numpy(x_test_np).cuda().float()
outputs = torch.from_numpy(y_test_np.reshape(y_test_np.shape[0], 1)).cuda().float()

tensor = TensorDataset(inputs, outputs)
loader = DataLoader(tensor, batch_size, shuffle=True, drop_last=True)

avg_loss, avg_r2_score = model_loss(model, loader)
print("The model's l1 loss is: " + str(avg_loss))
print("The model's R^2 score is: " + str(avg_r2_score))

In [0]:
plt.subplot(9, 1, 1)
plt.plot(ds['longitude'])
plt.legend(['longitude'])
plt.subplot(9, 1, 2)
plt.plot(ds['latitude'], color='red')
plt.legend(['latitude'])
plt.subplot(9, 1, 3)
plt.plot(ds['housing_median_age'], color='green')
plt.legend(['housing_median_age'])
plt.subplot(9, 1, 4)
plt.plot(ds['total_rooms'], color='yellow')
plt.legend(['total_rooms'])
plt.subplot(9, 1, 5)
plt.plot(ds['total_bedrooms'], color='orange')
plt.legend(['total_bedrooms'])
plt.subplot(9, 1, 6)
plt.plot(ds['population'], color='black')
plt.legend(['population'])
plt.subplot(9, 1, 7)
plt.plot(ds['households'], color='purple')
plt.legend(['households'])
plt.subplot(9, 1, 8)
plt.plot(ds['median_income'], color='brown')
plt.legend(['median_income'])
plt.subplot(9, 1, 9)
plt.plot(ds['median_house_value'], color='cyan')
plt.legend(['median_house_value'])