In [1]:
import opendatasets as od
od.download("https://www.kaggle.com/datasets/mssmartypants/rice-type-classification")

Skipping, found downloaded files in ".\rice-type-classification" (use force=True to force download)


In [2]:
import torch    
import torch.nn as nn #nn stands for neural network
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset 
from torchsummary import summary
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [3]:
data_df = pd.read_csv(r"rice-type-classification\riceClassification.csv")
data_df.head()

Unnamed: 0,id,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,1,4537,92.229316,64.012769,0.719916,4677,76.004525,0.657536,273.085,0.76451,1.440796,1
1,2,2872,74.691881,51.400454,0.725553,3015,60.471018,0.713009,208.317,0.831658,1.453137,1
2,3,3048,76.293164,52.043491,0.731211,3132,62.296341,0.759153,210.012,0.868434,1.46595,1
3,4,3073,77.033628,51.928487,0.738639,3157,62.5513,0.783529,210.657,0.870203,1.483456,1
4,5,3693,85.124785,56.374021,0.749282,3802,68.571668,0.769375,230.332,0.874743,1.51,1


In [4]:
data_df.dropna(inplace=True)
data_df.drop(columns=['id'], axis=1, inplace=True)
print(data_df.shape)

(18185, 11)


In [5]:
data_df.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,4537,92.229316,64.012769,0.719916,4677,76.004525,0.657536,273.085,0.76451,1.440796,1
1,2872,74.691881,51.400454,0.725553,3015,60.471018,0.713009,208.317,0.831658,1.453137,1
2,3048,76.293164,52.043491,0.731211,3132,62.296341,0.759153,210.012,0.868434,1.46595,1
3,3073,77.033628,51.928487,0.738639,3157,62.5513,0.783529,210.657,0.870203,1.483456,1
4,3693,85.124785,56.374021,0.749282,3802,68.571668,0.769375,230.332,0.874743,1.51,1


In [6]:
print(data_df['Class'].unique())

[1 0]


In [7]:
print(data_df['Class'].value_counts())

Class
1    9985
0    8200
Name: count, dtype: int64


In [8]:
original_df = data_df.copy()
for col in data_df.columns:
    data_df[col] = data_df[col]/data_df[col].abs().max()

data_df.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,0.444368,0.503404,0.775435,0.744658,0.424873,0.66661,0.741661,0.537029,0.844997,0.368316,1.0
1,0.281293,0.407681,0.622653,0.750489,0.273892,0.53037,0.80423,0.409661,0.919215,0.371471,1.0
2,0.298531,0.416421,0.630442,0.756341,0.28452,0.54638,0.856278,0.412994,0.959862,0.374747,1.0
3,0.300979,0.420463,0.629049,0.764024,0.286791,0.548616,0.883772,0.414262,0.961818,0.379222,1.0
4,0.361704,0.464626,0.682901,0.775033,0.345385,0.601418,0.867808,0.452954,0.966836,0.386007,1.0


In [9]:
X = np.array(data_df.iloc[:,:-1])
Y= np.array(data_df.iloc[:,-1])

In [10]:
X_train , X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.3)

In [11]:
X_test , X_val, Y_test, Y_val = train_test_split(X_test,Y_test, test_size = 0.5)

In [12]:
X_train.shape, X_test.shape, X_val.shape

((12729, 10), (2728, 10), (2728, 10))

In [13]:
class dataset(Dataset):
    def __init__(self, X, Y):
        self.X = torch.tensor(X, dtype = torch.float32).to(device)
        self.Y = torch.tensor(Y, dtype = torch.float32).to(device)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        return self.X[index], self.Y[index]

In [14]:
training_data = dataset(X_train, Y_train)
validation_data = dataset(X_val, Y_val)
testing_data = dataset(X_test, Y_test)

In [15]:
train_dataloader = DataLoader(training_data, batch_size=8, shuffle = True)
test_dataloader = DataLoader(testing_data, batch_size=8, shuffle = False)
val_dataloader = DataLoader(validation_data, batch_size=8, shuffle = False)

In [16]:
for x,y in train_dataloader:
    print(x)
    print(y)
    break

tensor([[0.8025, 0.8158, 0.8633, 0.9092, 0.7552, 0.8958, 0.6583, 0.7078, 0.8785,
         0.5361],
        [0.5137, 0.7840, 0.5766, 0.9759, 0.4902, 0.7167, 0.5258, 0.6345, 0.6997,
         0.7715],
        [0.5422, 0.7746, 0.6182, 0.9652, 0.5166, 0.7364, 0.8109, 0.6316, 0.7453,
         0.7109],
        [0.4685, 0.7357, 0.5563, 0.9725, 0.4475, 0.6844, 0.5994, 0.5882, 0.7426,
         0.7504],
        [0.6011, 0.8321, 0.6305, 0.9722, 0.5707, 0.7753, 0.5808, 0.6653, 0.7446,
         0.7488],
        [0.5948, 0.7563, 0.6840, 0.9446, 0.5779, 0.7712, 0.6472, 0.7078, 0.6512,
         0.6274],
        [0.8833, 0.8817, 0.8677, 0.9271, 0.8411, 0.9399, 0.6430, 0.7627, 0.8327,
         0.5765],
        [0.6392, 0.8561, 0.6537, 0.9712, 0.6080, 0.7995, 0.5706, 0.6901, 0.7360,
         0.7430]], device='cuda:0')
tensor([0., 1., 1., 1., 1., 1., 0., 1.], device='cuda:0')


In [17]:
HIDDEN_NEURONS = 16
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        
        self.input_layer = nn.Linear(X.shape[1], HIDDEN_NEURONS)
        self.linear = nn.Linear(HIDDEN_NEURONS, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.input_layer(x)
        x = self.linear(x)
        x = self.sigmoid(x)
        return x

model = MyModel().to(device)

In [18]:
summary(model, (X.shape[1],))

Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 16]                  176
├─Linear: 1-2                            [-1, 1]                   17
├─Sigmoid: 1-3                           [-1, 1]                   --
Total params: 193
Trainable params: 193
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 16]                  176
├─Linear: 1-2                            [-1, 1]                   17
├─Sigmoid: 1-3                           [-1, 1]                   --
Total params: 193
Trainable params: 193
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [19]:
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr = 1e-3)

In [None]:
total_loss_train_plot = []
total_loss_validation_plot = []
total_acc_train_plot=[]
total_acc_validation_plot=[]

epochs = 10
for epoch in range(epochs):
    total_acc_train = 0
    total_loss_train = 0 
    total_acc_val= 0 
    total_loss_val = 0
    
    for data in train_dataloader:
        inputs, labels = data
        prediction = model(inputs).squeeze(1)
        batch_loss = criterion(prediction, labels)
        total_loss_train += batch_loss.item()
        acc = ((prediction).round() == labels).sum().item()
        total_acc_train += acc
        batch_loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    with torch.no_grad():
        for data in val_dataloader:
            inputs, labels = data
            prediction = model(inputs).squeeze(1)
            batch_loss = criterion(prediction, labels)
            total_loss_val += batch_loss.item()
            acc = ((prediction).round() == labels).sum().item()
            total_acc_val += acc

total_loss_train_plot.append(round(total_loss_train/1000,4))
total_loss_validation_plot.append(round(total_loss_val/1000,4))
total_acc_train_plot.append(round(total_acc_train/1000,4))
total_acc_validation_plot.append(round(total_acc_val/1000,4))
   