# Importing Libraries

In [4]:
import torch 
import torch.nn as nn 
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader 
from torchsummary import summary 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score 
import matplotlib.pyplot as plt 
import pandas as pd
import numpy as np 

In [5]:
from sklearn.preprocessing import StandardScaler

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Data Loading and Exploration

In [7]:
df = pd.read_csv("riceClassification.csv")
df.head()

Unnamed: 0,id,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,1,4537,92.229316,64.012769,0.719916,4677,76.004525,0.657536,273.085,0.76451,1.440796,1
1,2,2872,74.691881,51.400454,0.725553,3015,60.471018,0.713009,208.317,0.831658,1.453137,1
2,3,3048,76.293164,52.043491,0.731211,3132,62.296341,0.759153,210.012,0.868434,1.46595,1
3,4,3073,77.033628,51.928487,0.738639,3157,62.5513,0.783529,210.657,0.870203,1.483456,1
4,5,3693,85.124785,56.374021,0.749282,3802,68.571668,0.769375,230.332,0.874743,1.51,1


In [8]:
df.shape

(18185, 12)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18185 entries, 0 to 18184
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               18185 non-null  int64  
 1   Area             18185 non-null  int64  
 2   MajorAxisLength  18185 non-null  float64
 3   MinorAxisLength  18185 non-null  float64
 4   Eccentricity     18185 non-null  float64
 5   ConvexArea       18185 non-null  int64  
 6   EquivDiameter    18185 non-null  float64
 7   Extent           18185 non-null  float64
 8   Perimeter        18185 non-null  float64
 9   Roundness        18185 non-null  float64
 10  AspectRation     18185 non-null  float64
 11  Class            18185 non-null  int64  
dtypes: float64(8), int64(4)
memory usage: 1.7 MB


In [10]:
df.Class.value_counts() # To check Class imbalance

Class
1    9985
0    8200
Name: count, dtype: int64

# Data Preprocessing

In [11]:
X = df.drop(columns="Class", axis=1)
y = df["Class"]

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(14548, 11)
(14548,)
(3637, 11)
(3637,)


In [13]:
scaler = StandardScaler()

scaled_train = scaler.fit_transform(X_train)
X_train = pd.DataFrame(scaled_train, columns=X_train.columns)

X_train.head()

Unnamed: 0,id,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation
0,-1.7085,-1.94261,-3.798442,-0.75267,-1.603407,-1.962882,-2.131433,-0.661549,-3.217686,1.359629,-1.37252
1,-1.639981,-1.215735,-1.768125,-0.709684,-0.048594,-1.205721,-1.246396,-0.240483,-1.785956,0.463359,-0.311069
2,-1.566894,-1.066545,-1.293598,-0.74285,0.234282,-1.075979,-1.076228,-0.596847,-1.309548,-0.085909,-0.016378
3,-0.478969,-0.374413,0.666214,-0.690304,0.928796,-0.36007,-0.327951,1.138451,0.212934,-1.065322,0.981206
4,0.089165,-0.602626,0.780478,-1.027162,1.191702,-0.602255,-0.567751,-0.272407,0.204604,-1.541874,1.523601


After this we need to change the df to tensors

`Method #1` -- quick and simple  

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)  
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)

`Method #2` -- Standard way - `TensorDataset + DataLoader`

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)   
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

`Method #3` -- By creating our own Dataset Class    

from torch.utils.data import Dataset    
class MyDataset(Dataset):  # Dataset is already a class, we are subclassing and modifying some of the functions
    def __init__(self, X, y):  
        self.X = torch.tensor(X.values, dtype=torch.float32)  
        self.y = torch.tensor(y.values, dtype=torch.float32)    
    def __len__(self):  
        return len(self.X)    
    def __getitem__(self, idx):  
        return self.X[idx], self.y[idx]  
train_dataset = MyDataset(X_train, y_train)

In [14]:
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32).to(device)
        self.y = torch.tensor(y.values, dtype=torch.float32).to(device)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [15]:
train_dataset = MyDataset(X_train, y_train)

In [16]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
for x,y in train_loader:  # sample of how data in X and y are stored in data loader
    print(x)
    print("----------")
    print(y)
    break

tensor([[ 0.5378,  1.2728,  0.4003,  1.3694, -1.0668,  1.3040,  1.2454, -0.2457,
          0.9536,  1.0088, -1.0820],
        [-0.2281, -0.8942,  0.0170, -1.0889,  1.0290, -0.8877, -0.8838,  0.1184,
         -0.5045, -1.0933,  1.1732],
        [ 0.7568,  1.5112,  0.7497,  1.4200, -0.9039,  1.5056,  1.4545,  0.9443,
          1.1641,  1.0789, -0.9810],
        [-1.6634, -0.5325, -1.0114, -0.1412, -0.2522, -0.4911, -0.4933, -0.4362,
         -0.7119,  0.1685, -0.4966],
        [ 0.4401,  0.9043, -0.0141,  1.2133, -1.1643,  1.0278,  0.9141, -0.1255,
          0.7244,  0.7047, -1.1393],
        [-0.0456, -0.7620,  0.3676, -1.0705,  1.1142, -0.7792, -0.7391, -1.5806,
         -0.2813, -1.1448,  1.3502],
        [-1.0781, -0.1448,  0.5708, -0.3795,  0.6653, -0.1385, -0.0929, -0.0086,
          0.2617, -0.6533,  0.5440],
        [ 1.5446,  0.6747,  0.3468,  0.6816, -0.3710,  0.7630,  0.7022, -0.2546,
          0.6852,  0.3192, -0.5963],
        [-1.5922, -0.7586, -0.8648, -0.4706,  0.1671, -0