# 1. Imports


In [None]:
import torch   # main library
import torch.nn as nn # Used for getting the NN Layers
from torch.optim import Adam  # Adam optimizer
from torch.utils.data import Dataset, DataLoader # Dataset class and DataLoader for creatning the objects
from torchsummary import summary # Visualize the model layers and number of parameters

#sklearn
from sklearn.model_selection import train_test_split # Split the dataset (train, validation, test)
from sklearn.metrics import accuracy_score # Calculate the testing Accuracy
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt # Plotting the training progress at the end
import pandas as pd # Data reading and preprocessing
import numpy as np # Mathematical operations

device = 'cuda' if torch.cuda.is_available() else 'cpu' # detect the GPU if any, if not use CPU, change cuda to mps if you have a mac
print(device)

# 2. Dataset

In [None]:
 df=pd.read_csv(r'/kaggle/input/rice-type-classification/riceClassification.csv')
df.head()

In [None]:
df.dropna(inplace = True) # Drop missing/null values
df.drop(["id"], axis =1, inplace = True) # Drop Id column
print("Output possibilities: ", df["Class"].unique()) # Possible Outputs
print("Data Shape (rows, cols): ", df.shape) # Print data shape
df.head() # Print/visualize the first 5 rows of the data
     

In [None]:
df['Class'].value_counts()

In [None]:
original_df = df.copy() # Creating a copy of the original Dataframe to use to normalize inference


# 3. Splitting

`Training Size 70%
Validation Size 15%
Testing Size 15% `

In [None]:
X=np.array(df.drop('Class',axis=1))
y=np.array(df['Class'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,random_state=42) # Create the training split
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size = 0.5,random_state=42) # Create the validation split

print("Training set is: ", X_train.shape[0], " rows which is ", round(X_train.shape[0]/df.shape[0],4)*100, "%") # Print training shape
print("Validation set is: ",X_val.shape[0], " rows which is ", round(X_val.shape[0]/df.shape[0],4)*100, "%") # Print validation shape
print("Testing set is: ",X_test.shape[0], " rows which is ", round(X_test.shape[0]/df.shape[0],4)*100, "%") # Print testing shape
     

# 4. Normalization

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# 5. Dataset Object

In [None]:
class dataset(Dataset):
    def __init__(self,X,y):
       self.X = torch.tensor(X, dtype=torch.float32).to(device)
       self.y = torch.tensor(y, dtype=torch.float32).to(device)
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

training_dataset = dataset(X_train,y_train)
validation_dataset = dataset(X_val, y_val)
testing_dataset = dataset(X_test, y_test)

# 6. Hyper Parameters

In [None]:
EPOCHS=10
BATCH_SIZE=32
HIDDEN_NEURONS = 15

LR = 1e-3

# 7. Data Loaders

In [None]:
train_dataloader= DataLoader(dataset=training_dataset, batch_size=BATCH_SIZE,shuffle=True)
valid_dataloader= DataLoader(dataset=validation_dataset, batch_size=BATCH_SIZE,shuffle=True)
test_dataloader= DataLoader(dataset=testing_dataset, batch_size=BATCH_SIZE,shuffle=False)

# 8. Bulid Model

In [None]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel,self).__init__()
        self.input_layer=nn.Linear(X.shape[1],HIDDEN_NEURONS)
        self.fc1=nn.Linear(HIDDEN_NEURONS,1)
        self.sigmoid=nn.Sigmoid()
        
    def forward(self,x):
        x=self.input_layer(x)
        x=self.fc1(x)
        x=self.sigmoid(x)
        return x


# 9. Creat Model 

In [None]:
model = MyModel().to(device)
summary(model, (X.shape[1],))

# 10. Loss and Optimizer

In [None]:
optimizer= Adam(model.parameters(),lr=LR)
criterion= nn.BCELoss()

# 11. Training 

In [None]:
total_loss_train_plot = []
total_loss_validation_plot = []
total_acc_train_plot = []
total_acc_validation_plot = []

for epoch in range(EPOCHS):
    total_acc_train = 0
    total_loss_train = 0
    total_acc_val = 0
    total_loss_val = 0
    ## Training and Validation
    for data in train_dataloader:

        inputs, labels = data

        prediction = model(inputs).squeeze(1)

        batch_loss = criterion(prediction, labels)

        total_loss_train += batch_loss.item()

        acc = ((prediction).round() == labels).sum().item()

        total_acc_train += acc

        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()


    ## Validation
    with torch.no_grad():
        for data in valid_dataloader:
            inputs, labels = data

            prediction = model(inputs).squeeze(1)

            batch_loss = criterion(prediction, labels)

            total_loss_val += batch_loss.item()

            acc = ((prediction).round() == labels).sum().item()

            total_acc_val += acc

    total_loss_train_plot.append(round(total_loss_train/1000, 4))
    total_loss_validation_plot.append(round(total_loss_val/1000, 4))
    total_acc_train_plot.append(round(total_acc_train/(training_dataset.__len__())*100, 4))
    total_acc_validation_plot.append(round(total_acc_val/(validation_dataset.__len__())*100, 4))

    print(f'''Epoch no. {epoch + 1} Train Loss: {total_loss_train/1000:.4f} Train Accuracy: {(total_acc_train/(training_dataset.__len__())*100):.4f} Validation Loss: {total_loss_val/1000:.4f} Validation Accuracy: {(total_acc_val/(validation_dataset.__len__())*100):.4f}''')
    print("="*50)

# 12. Testing

In [None]:
with torch.no_grad():
  total_loss_test = 0
  total_acc_test = 0
  for data in test_dataloader:
    inputs, labels = data

    prediction = model(inputs).squeeze(1)

    batch_loss_test = criterion((prediction), labels)
    total_loss_test += batch_loss_test.item()
    acc = ((prediction).round() == labels).sum().item()
    total_acc_test += acc

print(f"Accuracy Score is: {round((total_acc_test/X_test.shape[0])*100, 2)}%")

# 13. Plotting and Visualizations


In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

axs[0].plot(total_loss_train_plot, label='Training Loss')
axs[0].plot(total_loss_validation_plot, label='Validation Loss')
axs[0].set_title('Training and Validation Loss over Epochs')
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss')
axs[0].set_ylim([0, 2])
axs[0].legend()

axs[1].plot(total_acc_train_plot, label='Training Accuracy')
axs[1].plot(total_acc_validation_plot, label='Validation Accuracy')
axs[1].set_title('Training and Validation Accuracy over Epochs')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Accuracy')
axs[1].set_ylim([0, 100])
axs[1].legend()

plt.tight_layout()

plt.show()
     