In [None]:
import torch
from torch import nn
import numpy as np
import pandas as pd 
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, r2_score

In [None]:
cuda = True if torch.cuda.is_available() else False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


#**sections A + B + C**

In [None]:
class my_dataset(Dataset):
  def __init__(self):
    self.data = pd.read_csv('/content/drive/MyDrive/diabetes.csv', delimiter = '\t', dtype= np.float32)

    y = self.data["Y"]
    x = self.data.drop(["Y"], axis=1)
    x = np.array(x, dtype=np.float32)

    max_val = x.max(axis=0)
    min_val = x.min(axis=0)

    x_new = (x - min_val)/(max_val-min_val)
  
    self.x = torch.from_numpy(x_new)
    self.y = torch.from_numpy(np.expand_dims(np.array(y, dtype=np.float32), axis=1))

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    return self.x[idx], self.y[idx]

my_data = my_dataset()

In [None]:
#printing the output of the __getitem__ method
print(my_data.__getitem__(2))

(tensor([0.8833, 1.0000, 0.5165, 0.4366, 0.2892, 0.2590, 0.2468, 0.2821, 0.4966,
        0.4091]), tensor([141.]))


In [None]:
#printing the length of the data set
print(my_data.__len__())

442


In [None]:
#splitting the data into training set and testing set
tarin_size = int(0.8 * len(my_data))
test_size = len(my_data) - tarin_size
train_dataset, test_dataset = torch.utils.data.random_split(my_data, [tarin_size, test_size])
print(len(train_dataset), len(test_dataset), sep='\n')

353
89


#**section D**

In [None]:
#loading the data through DataLoader to iterate over it
train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=10, shuffle=False)

train_features, train_Y = next(iter(train_dataloader))
test_features, test_Y = next(iter(test_dataloader))

#**section E (printing one batch)**

In [None]:
print(f'the features:\n{train_features}',f'the Y value:\n{train_Y}', sep = '\n\n')

the features:
tensor([[0.5167, 0.0000, 0.2066, 0.5493, 0.4657, 0.4173, 0.3896, 0.2821, 0.3624,
         0.3333],
        [0.5833, 0.0000, 0.3636, 0.6479, 0.3873, 0.1942, 0.5844, 0.1410, 0.5959,
         0.7273],
        [0.2500, 1.0000, 0.2769, 0.7887, 0.7696, 0.7102, 0.2208, 0.7052, 0.6244,
         0.3485],
        [0.4500, 0.0000, 0.2851, 0.7465, 0.4951, 0.4382, 0.4156, 0.2821, 0.3575,
         0.6818],
        [0.4500, 1.0000, 0.2273, 0.3521, 0.4118, 0.3645, 0.2857, 0.2821, 0.5095,
         0.6061],
        [0.2167, 1.0000, 0.1653, 0.3662, 0.1961, 0.1843, 0.3377, 0.1410, 0.2433,
         0.3030],
        [0.2833, 1.0000, 0.5909, 0.7465, 0.9265, 0.7859, 0.2208, 0.7052, 0.7772,
         0.8182],
        [0.5333, 1.0000, 0.3264, 0.1972, 0.7010, 0.6345, 0.2208, 0.5642, 0.6378,
         0.5758],
        [0.8167, 1.0000, 0.3926, 0.6901, 0.5735, 0.5249, 0.2208, 0.4231, 0.5909,
         0.5000],
        [0.8333, 1.0000, 0.2686, 0.6479, 0.7157, 0.4721, 0.2338, 0.5642, 0.8951,
         0.636

#**section F**

In [None]:
class my_model(nn.Module):
    def __init__(self):
        super(my_model, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(10,320),
            nn.ReLU(),
            nn.Linear(320,200),
            nn.ReLU(),
            nn.Linear(200,140),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(140,1)
        )

    def forward(self, x):
        y = self.linear_relu_stack(x)
        return y

model = my_model().to(device)

In [None]:
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001) 

In [None]:
epochs = 5000
losses = []
lr = 0.001
for epoch in range(epochs):
    epoch_loss = []
    for batch_idx, (data, labels) in enumerate(train_dataloader):
        
        preds = model(data.to(device))
        loss = loss_function(preds, labels.to(device))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        bach_loss = loss.item()
        epoch_loss.append(bach_loss)

    losses.append(np.mean(epoch_loss))
    
    if epoch % 100 == 0:
        print(f'Epoch number: {epoch} and the loss: {losses[-1]}')

Epoch number: 0 and the loss: 27256.069281684027
Epoch number: 100 and the loss: 3153.172702365451
Epoch number: 200 and the loss: 2797.997480604384
Epoch number: 300 and the loss: 2395.112875196669
Epoch number: 400 and the loss: 1982.0045344034831
Epoch number: 500 and the loss: 1239.1340039571126
Epoch number: 600 and the loss: 1041.6144468519424
Epoch number: 700 and the loss: 1225.8902136484783
Epoch number: 800 and the loss: 871.9232343037924
Epoch number: 900 and the loss: 1153.6411730448406
Epoch number: 1000 and the loss: 720.2321275075277
Epoch number: 1100 and the loss: 702.7676853603787
Epoch number: 1200 and the loss: 747.398819817437
Epoch number: 1300 and the loss: 606.9750679863823
Epoch number: 1400 and the loss: 674.1663470798069
Epoch number: 1500 and the loss: 603.4952701992459
Epoch number: 1600 and the loss: 636.7071064843071
Epoch number: 1700 and the loss: 722.9214312235514
Epoch number: 1800 and the loss: 471.3799828423394
Epoch number: 1900 and the loss: 497.3

In [None]:
predictions = []
test_Y = []
with torch.no_grad():
    for i,(data, labels) in enumerate(test_dataloader):
        y_pred = model(data.to(device))
        predictions.extend(y_pred.cpu().detach().numpy().astype(int))
        test_Y.extend(labels.detach().numpy().astype(int))

In [None]:
test_Y = np.array(test_Y).reshape(-1, 1).squeeze()
predictions = np.array(predictions).reshape(-1, 1).squeeze()

In [None]:
#regression score function.
score = r2_score(test_Y ,predictions)
print(score)

0.021959812263826817
