In [16]:
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [17]:
file_path = "../data/riceClassification.csv"
data_df = pd.read_csv(file_path)

In [18]:
data_df.head()

Unnamed: 0,id,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,1,4537,92.229316,64.012769,0.719916,4677,76.004525,0.657536,273.085,0.76451,1.440796,1
1,2,2872,74.691881,51.400454,0.725553,3015,60.471018,0.713009,208.317,0.831658,1.453137,1
2,3,3048,76.293164,52.043491,0.731211,3132,62.296341,0.759153,210.012,0.868434,1.46595,1
3,4,3073,77.033628,51.928487,0.738639,3157,62.5513,0.783529,210.657,0.870203,1.483456,1
4,5,3693,85.124785,56.374021,0.749282,3802,68.571668,0.769375,230.332,0.874743,1.51,1


In [19]:
data_df.dropna(inplace=True)
data_df.drop(['id'], axis=1,inplace=True)
print(data_df.shape)

(18185, 11)


In [20]:
data_df.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,4537,92.229316,64.012769,0.719916,4677,76.004525,0.657536,273.085,0.76451,1.440796,1
1,2872,74.691881,51.400454,0.725553,3015,60.471018,0.713009,208.317,0.831658,1.453137,1
2,3048,76.293164,52.043491,0.731211,3132,62.296341,0.759153,210.012,0.868434,1.46595,1
3,3073,77.033628,51.928487,0.738639,3157,62.5513,0.783529,210.657,0.870203,1.483456,1
4,3693,85.124785,56.374021,0.749282,3802,68.571668,0.769375,230.332,0.874743,1.51,1


In [21]:
print(data_df["Class"].unique())

[1 0]


In [22]:
print(data_df["Class"].value_counts())

Class
1    9985
0    8200
Name: count, dtype: int64


In [23]:
original  = data_df.copy()

In [25]:
for column in data_df.columns:
    data_df[column] = data_df[column]/data_df[column].abs().max()

data_df.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,0.444368,0.503404,0.775435,0.744658,0.424873,0.66661,0.741661,0.537029,0.844997,0.368316,1.0
1,0.281293,0.407681,0.622653,0.750489,0.273892,0.53037,0.80423,0.409661,0.919215,0.371471,1.0
2,0.298531,0.416421,0.630442,0.756341,0.28452,0.54638,0.856278,0.412994,0.959862,0.374747,1.0
3,0.300979,0.420463,0.629049,0.764024,0.286791,0.548616,0.883772,0.414262,0.961818,0.379222,1.0
4,0.361704,0.464626,0.682901,0.775033,0.345385,0.601418,0.867808,0.452954,0.966836,0.386007,1.0


In [26]:
X = np.array(data_df.iloc[:, :-1])
Y = np.array(data_df.iloc[:, -1])


In [29]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, test_size=0.5)


In [30]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)
print(X_val.shape)
print(Y_val.shape)

(12729, 10)
(2728, 10)
(12729,)
(2728,)
(2728, 10)
(2728,)


In [31]:
class dataset(Dataset):
    def __init__(self, X, Y):
        self.X = torch.tensor(X, dtype=torch.float32).to(device)
        self.Y = torch.tensor(Y, dtype=torch.float32).to(device)

    def __len__(self):
        return len(self.X) # shape of the input
    
    def __getitem__(self, index):
        return self.X[index], self.Y[index]

In [32]:
training_data = dataset(X_train, Y_train)
validation_data = dataset(X_val, Y_val)
testing_data = dataset(X_test, Y_test)

In [33]:
train_dataloader = DataLoader(training_data, batch_size=8, shuffle=True)
validation_dataloader = DataLoader(training_data, batch_size=8, shuffle=True)
test_dataloader = DataLoader(training_data, batch_size=8, shuffle=True)

In [35]:
for x, y in train_dataloader:
    print(x)
    print("+++++")
    print(y)
    break

tensor([[0.6059, 0.8829, 0.5983, 0.9850, 0.5780, 0.7784, 0.5680, 0.7067, 0.6653,
         0.8373],
        [0.9203, 0.8698, 0.9196, 0.9095, 0.8755, 0.9593, 0.9122, 0.7586, 0.8770,
         0.5366],
        [0.4895, 0.7229, 0.5901, 0.9619, 0.4651, 0.6997, 0.7335, 0.5901, 0.7709,
         0.6950],
        [0.5924, 0.8431, 0.6243, 0.9751, 0.5705, 0.7696, 0.5076, 0.6863, 0.6898,
         0.7662],
        [0.8297, 0.8850, 0.8151, 0.9411, 0.7809, 0.9109, 0.6702, 0.7393, 0.8324,
         0.6160],
        [0.6850, 0.7855, 0.7525, 0.9330, 0.6460, 0.8277, 0.6818, 0.6600, 0.8623,
         0.5922],
        [0.6553, 0.9129, 0.6259, 0.9838, 0.6258, 0.8095, 0.8032, 0.7304, 0.6736,
         0.8275],
        [0.6463, 0.8521, 0.6595, 0.9694, 0.6116, 0.8039, 0.6294, 0.6860, 0.7532,
         0.7330]])
+++++
tensor([1., 0., 1., 1., 0., 0., 1., 1.])
