<a href="https://colab.research.google.com/github/ThisIsFarhan/pytorch-codes/blob/main/07_TrainingPipeline_NN_Dataset_Dataloader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv")

In [3]:
encoder = LabelEncoder()
encoder.fit(df["species"])
df["encoded_species"] = encoder.transform(df["species"])


In [4]:
df = df[df["encoded_species"] != 2]

In [5]:
df["encoded_species"].value_counts()

Unnamed: 0_level_0,count
encoded_species,Unnamed: 1_level_1
0,50
1,50


In [6]:
df.drop(columns=["species"],inplace=True)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, :-1], df.iloc[:, -1], test_size=0.2)

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
X_train_tensor = torch.from_numpy(X_train.astype(np.float32))
X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
y_train_tensor = torch.from_numpy(y_train.to_numpy().astype(np.float32))
y_test_tensor = torch.from_numpy(y_test.to_numpy().astype(np.float32))

In [10]:
X_train_tensor.shape

torch.Size([80, 4])

In [11]:
y_train_tensor.shape

torch.Size([80])

In [12]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = features
    self.labels = labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self, idx):
    return self.features[idx], self.labels[idx]

In [13]:
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor)

In [14]:
train_dataset[10]

(tensor([-0.1175,  1.6107, -0.7294, -0.6072]), tensor(0.))

In [15]:
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True)

In [16]:
import torch.nn as nn

class Model(nn.Module):
  def __init__(self,num_features):
    super().__init__()
    self.linear = nn.Linear(num_features,1)
    self.sigmoid = nn.Sigmoid()

  def forward(self,features):
    out = self.linear(features)
    out = self.sigmoid(out)
    return out

In [17]:
learning_rate = 0.1
epochs = 25
loss_function = nn.BCELoss()

In [18]:
model = Model(X_train_tensor.shape[1])
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
  for batch_features, batch_labels in train_loader:
    #forward pass
    y_pred = model(batch_features)

    #loss
    loss = loss_function(y_pred.squeeze(), batch_labels)

    # clear gradients
    optimizer.zero_grad()

    #backward pass
    loss.backward()

    #params update
    optimizer.step()

  print(f'Epoch: {epoch + 1}, Loss: {loss.item()}')

Epoch: 1, Loss: 0.2527620494365692
Epoch: 2, Loss: 0.15562912821769714
Epoch: 3, Loss: 0.10858918726444244
Epoch: 4, Loss: 0.11066722869873047
Epoch: 5, Loss: 0.05220940709114075
Epoch: 6, Loss: 0.0816882997751236
Epoch: 7, Loss: 0.062170349061489105
Epoch: 8, Loss: 0.06587722897529602
Epoch: 9, Loss: 0.0412796325981617
Epoch: 10, Loss: 0.03371352702379227
Epoch: 11, Loss: 0.0534648559987545
Epoch: 12, Loss: 0.04808369651436806
Epoch: 13, Loss: 0.023700619116425514
Epoch: 14, Loss: 0.044142045080661774
Epoch: 15, Loss: 0.057055722922086716
Epoch: 16, Loss: 0.0326772965490818
Epoch: 17, Loss: 0.025732703506946564
Epoch: 18, Loss: 0.018430661410093307
Epoch: 19, Loss: 0.0320720449090004
Epoch: 20, Loss: 0.01117154210805893
Epoch: 21, Loss: 0.009677734225988388
Epoch: 22, Loss: 0.03380732983350754
Epoch: 23, Loss: 0.023144425824284554
Epoch: 24, Loss: 0.021796274930238724
Epoch: 25, Loss: 0.008971888571977615


In [19]:
# Model evaluation using test_loader
model.eval()  # Set the model to evaluation mode
accuracy_list = []

with torch.no_grad():
    for batch_features, batch_labels in test_loader:
        # Forward pass
        y_pred = model(batch_features)
        y_pred = (y_pred > 0.8).float()  # Convert probabilities to binary predictions

        # Calculate accuracy for the current batch
        batch_accuracy = (y_pred.view(-1) == batch_labels).float().mean().item()
        accuracy_list.append(batch_accuracy)

# Calculate overall accuracy
overall_accuracy = sum(accuracy_list) / len(accuracy_list)
print(f'Accuracy: {overall_accuracy:.4f}')


Accuracy: 1.0000
