In [100]:
#tabular data classification

In [101]:
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

#pytorch cant detect cpu directly
device =  'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [102]:
dataset = pd.read_csv('/content/riceClassification.csv')

In [103]:
dataset.head()

Unnamed: 0,id,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,1,4537,92.229316,64.012769,0.719916,4677,76.004525,0.657536,273.085,0.76451,1.440796,1
1,2,2872,74.691881,51.400454,0.725553,3015,60.471018,0.713009,208.317,0.831658,1.453137,1
2,3,3048,76.293164,52.043491,0.731211,3132,62.296341,0.759153,210.012,0.868434,1.46595,1
3,4,3073,77.033628,51.928487,0.738639,3157,62.5513,0.783529,210.657,0.870203,1.483456,1
4,5,3693,85.124785,56.374021,0.749282,3802,68.571668,0.769375,230.332,0.874743,1.51,1


In [104]:
dataset.dropna(inplace=True)

In [105]:
dataset =dataset.drop(['id'], axis = 1)

In [106]:
#normalization
original = dataset.copy()

In [107]:
for column in dataset.columns:
  dataset[column] = (dataset[column]/ (dataset[column].abs().max()))

In [108]:
dataset.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,0.444368,0.503404,0.775435,0.744658,0.424873,0.66661,0.741661,0.537029,0.844997,0.368316,1.0
1,0.281293,0.407681,0.622653,0.750489,0.273892,0.53037,0.80423,0.409661,0.919215,0.371471,1.0
2,0.298531,0.416421,0.630442,0.756341,0.28452,0.54638,0.856278,0.412994,0.959862,0.374747,1.0
3,0.300979,0.420463,0.629049,0.764024,0.286791,0.548616,0.883772,0.414262,0.961818,0.379222,1.0
4,0.361704,0.464626,0.682901,0.775033,0.345385,0.601418,0.867808,0.452954,0.966836,0.386007,1.0


In [109]:
x = np.array(dataset.iloc[:,:-1]) #until last
y = np.array(dataset.iloc[:,-1]) #only last

In [110]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [111]:
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42)

In [112]:
print(x_train.shape)
print(x_test.shape)
print(x_val.shape)

(12729, 10)
(2728, 10)
(2728, 10)


In [113]:
#convert the data into py torch data

In [114]:
class dataset(Dataset):
  def __init__(self,x,y):
    self.x=torch.tensor(x,dtype = torch.float32).to(device)
    self.y=torch.tensor(y,dtype = torch.float32).to(device)
  def __len__(self):
    return len(self.x)
  def __getitem__(self,idx):
    return self.x[idx],self.y[idx]

In [115]:
training_data = dataset(x_train,y_train)
validation_data = dataset(x_val,y_val)
testing_data = dataset(x_test,y_test)

In [116]:
#data loader to loop through
train_dataloader = DataLoader(training_data,batch_size=32,shuffle=True)
val_dataloader = DataLoader(validation_data,batch_size=32,shuffle=True)
test_dataloader = DataLoader(testing_data,batch_size=32,shuffle=True)

In [117]:
#build own model
HIDDEN_NEURONS =10
class mymodel(nn.Module):
  def __init__(self):
    super(mymodel, self).__init__()

    self.input_layer = nn.Linear(x.shape[1], HIDDEN_NEURONS)
    self.linear = nn.Linear(HIDDEN_NEURONS, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self,x):
    x = self.input_layer(x)
    x = self.linear(x)
    x = self.sigmoid(x)
    return x


model = mymodel().to(device)

In [118]:
summary(model, (x.shape[1],))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 10]             110
            Linear-2                    [-1, 1]              11
           Sigmoid-3                    [-1, 1]               0
Total params: 121
Trainable params: 121
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [120]:
optimizer = Adam(model.parameters(), lr = 1e-3)

# ✅ Use BCEWithLogitsLoss for binary classification
criterion = nn.BCEWithLogitsLoss()

total_loss_train_plot = []
total_loss_val_plot = []
total_acc_train_plot = []
total_acc_val_plot = []

for epoch in range(100):
    total_loss_train = 0
    total_loss_val = 0
    total_acc_train = 0
    total_acc_val = 0

    # ---------------- TRAIN ----------------
    model.train()
    for inputs, labels in train_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        # forward pass
        prediction = model(inputs)

        # match dimensions: [batch_size, 1]
        labels = labels.float().unsqueeze(1)

        batch_loss = criterion(prediction, labels)
        total_loss_train += batch_loss.item()

        # accuracy: sigmoid + round for binary classification
        predicted = torch.sigmoid(prediction).round()
        acc = (predicted == labels).sum().item()
        total_acc_train += acc

        # backward + optimize
        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

    # ---------------- VALIDATION ----------------
    model.eval()
    with torch.no_grad():
        for inputs, labels in val_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            prediction = model(inputs)
            labels = labels.float().unsqueeze(1)

            batch_loss = criterion(prediction, labels)
            total_loss_val += batch_loss.item()

            predicted = torch.sigmoid(prediction).round()
            acc = (predicted == labels).sum().item()
            total_acc_val += acc

    # ---------------- LOGGING ----------------
    avg_train_loss = total_loss_train / len(train_dataloader)
    avg_val_loss   = total_loss_val / len(val_dataloader)

    avg_train_acc = total_acc_train / len(training_data) * 100
    avg_val_acc   = total_acc_val / len(validation_data) * 100

    total_loss_train_plot.append(round(avg_train_loss, 4))
    total_loss_val_plot.append(round(avg_val_loss, 4))
    total_acc_train_plot.append(round(avg_train_acc, 4))
    total_acc_val_plot.append(round(avg_val_acc, 4))

    print(f'Epoch {epoch+1}: '
          f'Train Loss = {avg_train_loss:.4f}, Train Acc = {avg_train_acc:.2f}%, '
          f'Val Loss = {avg_val_loss:.4f}, Val Acc = {avg_val_acc:.2f}%')
    print("=" * 40)


Epoch 1: Train Loss = 0.4956, Train Acc = 55.02%, Val Loss = 0.4916, Val Acc = 56.30%
Epoch 2: Train Loss = 0.4948, Train Acc = 55.83%, Val Loss = 0.4914, Val Acc = 58.72%
Epoch 3: Train Loss = 0.4943, Train Acc = 57.68%, Val Loss = 0.4898, Val Acc = 60.04%
Epoch 4: Train Loss = 0.4940, Train Acc = 60.61%, Val Loss = 0.4912, Val Acc = 61.11%
Epoch 5: Train Loss = 0.4938, Train Acc = 63.33%, Val Loss = 0.4904, Val Acc = 67.30%
Epoch 6: Train Loss = 0.4936, Train Acc = 66.68%, Val Loss = 0.4892, Val Acc = 68.44%
Epoch 7: Train Loss = 0.4935, Train Acc = 69.56%, Val Loss = 0.4899, Val Acc = 70.56%
Epoch 8: Train Loss = 0.4934, Train Acc = 72.10%, Val Loss = 0.4908, Val Acc = 77.68%
Epoch 9: Train Loss = 0.4932, Train Acc = 74.56%, Val Loss = 0.4897, Val Acc = 76.54%
Epoch 10: Train Loss = 0.4932, Train Acc = 76.60%, Val Loss = 0.4903, Val Acc = 80.32%
Epoch 11: Train Loss = 0.4930, Train Acc = 78.54%, Val Loss = 0.4900, Val Acc = 79.99%
Epoch 12: Train Loss = 0.4931, Train Acc = 80.01%, V

In [122]:
with torch.no_grad():
  total_loss_test =0;
  total_acc_test =0;
  for data in test_dataloader:
    inputs, labels = data
    prediction = model(inputs).squeeze(1)

    batch_loss = criterion(prediction, labels)
    total_loss_test += batch_loss.item()

    predicted = torch.sigmoid(prediction).round()
    acc = (predicted == labels).sum().item()
    total_acc_test += acc
print("acc : ",round(total_acc_test/len(testing_data)*100.4))

acc :  97
