In [1]:
!pip install opendatasets --quiet

In [2]:
import opendatasets as od
od.download("https://www.kaggle.com/datasets/mssmartypants/rice-type-classification")

In [3]:
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [4]:
data_df = pd.read_csv("/content/rice-type-classification/riceClassification.csv")
data_df.head()

FileNotFoundError: [Errno 2] No such file or directory: '/content/rice-type-classification/riceClassification.csv'

In [None]:
data_df.info()

In [None]:
data_df.describe()

In [None]:
data_df.isnull().sum()

In [None]:
data_df.dropna(inplace=True)
data_df.drop(columns=['id'], axis=1, inplace=True)

In [None]:
data_df.shape

In [None]:
data_df.head()

In [None]:
data_df.Class.unique()

In [None]:
data_df.Class.value_counts()

In [None]:
original_df = data_df.copy()

In [None]:
for column in data_df.columns:
  data_df[column] = data_df[column] / data_df[column].abs().max()

data_df.head()

In [None]:
X = np.array(data_df.iloc[:, :-1])
y = np.array(data_df.iloc[:, -1])

In [None]:
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.3, random_state=42)

In [None]:
X_test, X_val, y_test, y_val = tts(X_test, y_test, test_size=0.5, random_state=42)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

In [None]:
X_train

In [None]:
X_test

In [None]:
X_val

In [None]:
class dataset(Dataset):
  def __init__(self, X, y):
    self.X = torch.tensor(X, dtype=torch.float32).to(device)
    self.y = torch.tensor(y, dtype=torch.float32).to(device)

  def __len__(self):
    return len(self.X)

  def __getitem__(self, index):
    return self.X[index], self.y[index]

In [None]:
training_data = dataset(X_train, y_train)
validation_data = dataset(X_val, y_val)
testing_data = dataset(X_test, y_test)

In [None]:
training_data

In [None]:
train_dataloader = DataLoader(training_data, batch_size=32, shuffle=True)
val_dataloader = DataLoader(validation_data, batch_size=32, shuffle=True)
test_dataloader = DataLoader(testing_data, batch_size=32, shuffle=True)

In [None]:
for X, y in train_dataloader:
  print(X)
  print("==================")
  print(y)
  break

In [None]:
HIDDEN_NEURONS = 10

class MyModel(nn.Module):
  def __init__(self):
    super(MyModel, self).__init__()

    self.input_layer = nn.Linear(X.shape[1], HIDDEN_NEURONS)
    self.linear = nn.Linear(HIDDEN_NEURONS, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, X):
    x = self.input_layer(X)
    x = self.linear(x)
    x = self.sigmoid(x)
    return x

In [None]:
model = MyModel().to(device)

In [None]:
summary(model, (X.shape[1], ))

In [None]:
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=1e-3)

In [None]:
total_loss_train_plot = []
total_loss_val_plot = []
total_acc_train_plot = []
total_acc_val_plot = []

epochs = 10

for epoch in range(epochs):
  total_acc_train = 0
  total_loss_train = 0
  total_acc_val = 0
  total_loss_val = 0

  for data in train_dataloader:
    inputs, labels = data

    prediction = model(inputs).squeeze(1)

    batch_loss = criterion(prediction, labels)
    total_loss_train += batch_loss.item()

    acc = ((prediction).round() == labels).sum().item()
    total_acc_train += acc

    batch_loss.backward()
    optimizer.step()
    optimizer.zero_grad()
  with torch.no_grad():
    for data in val_dataloader:
      inputs, labels = data

      prediction = model(inputs).squeeze(1)
      batch_loss = criterion(prediction, labels)
      total_loss_val += batch_loss.item()

      acc = ((prediction).round() == labels).sum().item()
      total_acc_val += acc
    total_loss_train_plot.append(round(total_loss_train/1000, 4))
    total_loss_val_plot.append(round(total_loss_val/1000, 4))
    total_acc_train_plot.append(round(total_acc_train/training_data.__len__() * 100, 4))
    total_acc_val_plot.append(round(total_acc_val/validation_data.__len__() * 100, 4))

    print(f"""Epoch no {epoch + 1}
          Train Loss:      {round(total_loss_train/1000, 4)}\t\t Train Accuracy:    {round(total_acc_train/training_data.__len__() * 100, 4)}
          Validation Loss: {round(total_loss_val/1000, 4)}\t\t Validation Accuracy: {round(total_acc_val/validation_data.__len__() * 100, 4)}""")
    print("="*80)

In [None]:
with torch.no_grad():
  total_loss_test = 0
  total_acc_test = 0
  for data in test_dataloader:
    inputs, labels = data

    prediction = model(inputs).squeeze(1)

    batch_loss = criterion(prediction, labels)
    total_loss_test += batch_loss.item()

    acc = ((prediction).round() == labels).sum().item()
    total_acc_test += acc

print(f"Accuracy: {round(total_acc_test / testing_data.__len__() * 100, 4)}")

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

axs[0].plot(total_loss_train_plot, label = "Training Loss")
axs[0].plot(total_loss_val_plot, label = "Validation Loss")
axs[0].set_title("Training and Validation loss over epochs")
axs[0].set_xlabel("Epochs")
axs[0].set_ylabel("Loss")
axs[0].set_ylim([0, 0.1])
axs[0].legend()

axs[1].plot(total_acc_train_plot, label = "Training Accuracy")
axs[1].plot(total_acc_val_plot, label = "Validation Accuracy")
axs[1].set_title("Training and Validation Accuracy over epochs")
axs[1].set_xlabel("Epochs")
axs[1].set_ylabel("Accuracy")
axs[1].set_ylim([97, 100])
axs[1].legend()

plt.show()

In [None]:
original_df.head()

In [None]:
area = 6284 / original_df['Area'].abs().max()
MajorAxisLength =  81 / original_df['MajorAxisLength'].abs().max()
MinorAxisLength =  42 / original_df['MinorAxisLength'].abs().max()
Eccentricity =  0.98 / original_df['Eccentricity'].abs().max()
ConvexArea =  1200 / original_df['ConvexArea'].abs().max()
EquivDiameter =  33 / original_df['EquivDiameter'].abs().max()
Extent =  0.7 / original_df['Extent'].abs().max()
Perimeter =  927 / original_df['Perimeter'].abs().max()
Roundness =  0.9 / original_df['Roundness'].abs().max()
AspectRation =  1.45 / original_df['AspectRation'].abs().max()


In [None]:
pred = model(torch.tensor([area, MajorAxisLength, MinorAxisLength, Eccentricity, ConvexArea, EquivDiameter, Extent, Perimeter, Roundness, AspectRation], dtype=torch.float32).to(device))

In [None]:
pred

In [None]:
pred.item()

In [None]:
round(pred.item())

In [None]:
from google.colab import drive
drive.mount('/content/drive')