In [1]:
import torch
from torch import nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.decomposition import PCA

In [2]:
df = pd.read_csv('./Raisin_Dataset.csv')

In [3]:
df.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter,Class
0,87524,442.246011,253.291155,0.819738,90546,0.758651,1184.04,Kecimen
1,75166,406.690687,243.032436,0.801805,78789,0.68413,1121.786,Kecimen
2,90856,442.267048,266.328318,0.798354,93717,0.637613,1208.575,Kecimen
3,45928,286.540559,208.760042,0.684989,47336,0.699599,844.162,Kecimen
4,79408,352.19077,290.827533,0.564011,81463,0.792772,1073.251,Kecimen


In [4]:
# converting df class outputs to labels instead of string
# to be done for all samples

df.iloc[:450, 7] = 0
df.iloc[450:, 7] = 1

In [5]:
n_components = 2  # You can change this to the desired number of components
pca = PCA(n_components=n_components)

data_np = df.to_numpy()

# Fit PCA to the data
pca.fit(data_np)

# Transform the data using the learned PCA transformation
data_pca = pca.transform(data_np)

# Convert the transformed data back to a tensor
data_pca_tensor = torch.tensor(data_pca)

# Print the shape of the transformed data
print("Shape of the transformed data:", data_pca_tensor.shape)

Shape of the transformed data: torch.Size([900, 2])


In [6]:
# shuffle first 
X = df.iloc[:, :1].to_numpy().astype(np.float32)
Y = df['Class'].iloc[:].to_numpy().astype(np.float32) # change this based on output


# shuffling data

combined = list(zip(X, Y))

# Shuffle the combined list
random.shuffle(combined)

# Unzip the shuffled list to get back the original order
X_np_shuffled, Y_np_shuffled = zip(*combined)

X = np.array(X_np_shuffled)
Y = np.array(Y_np_shuffled)

In [7]:
train_split, test_split = 0.7, 0.3
assert train_split + test_split == 1;

split_index = int(train_split * df.shape[0])
X_train = X[:split_index]
Y_train = Y[:split_index]


In [8]:
X_train_tensor = torch.from_numpy(np.array(X_train))
Y_train_tensor = torch.from_numpy(np.array(Y_train))

In [9]:
X_train_tensor.shape

torch.Size([630, 1])

In [10]:
#normalizing the tensor
X_train_tensor_normalized = []
for i in range(X_train_tensor.shape[1]):
    # print(i + 1)
    X_train_tensor_normalized.append((X_train_tensor[:, i] - torch.mean(X_train_tensor[:, i])) / torch.std(X_train_tensor[:, i]))


X_train_tensor_normalized = torch.from_numpy(np.array(X_train_tensor_normalized)).reshape(split_index, -1)
X_train_tensor_normalized.shape


torch.Size([630, 1])

In [11]:
class RaisinNetwork(nn.Module):

    def __init__(self):
        super().__init__()
        self.linear_container_I = nn.Sequential(
            nn.Linear(1, 32),
            nn.LeakyReLU(),
            nn.Linear(32, 64),
            nn.LeakyReLU()
        )
        self.linear_container_II = nn.Sequential(
            nn.Linear(64, 32),
            nn.LeakyReLU(),
            nn.Linear(32, 8),
            nn.LeakyReLU(),
            nn.Linear(8, 1),
        )
        self.bn_layer_I = nn.BatchNorm1d(num_features=64)
        self.bn_layer_II = nn.BatchNorm1d(num_features=1)
        self.sigmoid = nn.Sigmoid()
        self.leaky_relu = nn.LeakyReLU()

    def forward(self, x):
        x = self.linear_container_I(x)
        x = self.bn_layer_I(x)
        x = self.leaky_relu(x)
        x = self.linear_container_II(x)
        x = self.bn_layer_II(x)
        x = self.sigmoid(x)
        
        return x
        

In [12]:
rn = RaisinNetwork()

In [13]:
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(rn.parameters(), lr=3e-2)

In [14]:
epochs = 500
for i in range(epochs):
    loss = loss_fn(rn(X_train_tensor_normalized), Y_train_tensor.reshape(-1, 1))
    print(f'EPOCH {i + 1} | LOSS {loss}')
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

EPOCH 1 | LOSS 0.5673609375953674
EPOCH 2 | LOSS 0.48017990589141846
EPOCH 3 | LOSS 0.46476519107818604
EPOCH 4 | LOSS 0.4547451436519623
EPOCH 5 | LOSS 0.4472959339618683
EPOCH 6 | LOSS 0.4451373517513275
EPOCH 7 | LOSS 0.4419794976711273
EPOCH 8 | LOSS 0.4375048875808716
EPOCH 9 | LOSS 0.4346925914287567
EPOCH 10 | LOSS 0.43264204263687134
EPOCH 11 | LOSS 0.42996031045913696
EPOCH 12 | LOSS 0.4270193874835968
EPOCH 13 | LOSS 0.4247560501098633
EPOCH 14 | LOSS 0.42290112376213074
EPOCH 15 | LOSS 0.420757919549942
EPOCH 16 | LOSS 0.4185243546962738
EPOCH 17 | LOSS 0.41671258211135864
EPOCH 18 | LOSS 0.41511380672454834
EPOCH 19 | LOSS 0.41345393657684326
EPOCH 20 | LOSS 0.41174936294555664
EPOCH 21 | LOSS 0.4102824926376343
EPOCH 22 | LOSS 0.4090252220630646
EPOCH 23 | LOSS 0.4078402519226074
EPOCH 24 | LOSS 0.40663325786590576
EPOCH 25 | LOSS 0.40547290444374084
EPOCH 26 | LOSS 0.40451499819755554
EPOCH 27 | LOSS 0.403644323348999
EPOCH 28 | LOSS 0.40275076031684875
EPOCH 29 | LOSS 0.

In [15]:
X_test_tensor = torch.from_numpy(X[split_index:])
Y_test_tensor = torch.from_numpy(Y[split_index:])

In [16]:
X_test_tensor.shape

torch.Size([270, 1])

In [17]:
Y_test_tensor.shape

torch.Size([270])

In [18]:
#normalizing the tensor
X_test_tensor_normalized = []
for i in range(X_test_tensor.shape[1]):
    X_test_tensor_normalized.append((X_test_tensor[:, i] - torch.mean(X_test_tensor[:, i])) / torch.std(X_test_tensor[:, i]))

X_test_tensor_normalized = torch.from_numpy(np.array(X_test_tensor_normalized)).reshape(X.shape[0] - split_index, -1)
X_test_tensor_normalized.shape


torch.Size([270, 1])

In [19]:

rn(X_test_tensor_normalized).shape

torch.Size([270, 1])

In [20]:
def accuracy(output, target):
    """
    Computes the accuracy of the model predictions given the output and target tensors.
    
    Args:
        output (torch.Tensor): Predicted output tensor of shape [batch_size, 1].
        target (torch.Tensor): Target tensor of shape [batch_size].
    
    Returns:
        float: Accuracy of the predictions.
    """
    # Convert output to binary predictions (0 or 1)
    predictions = (output >= 0.5).squeeze().long()
    
    # Compare predictions with target labels
    correct = (predictions == target).sum().item()
    
    # Calculate accuracy
    accuracy = correct / target.size(0)
    return accuracy

In [21]:
accuracy(rn(X_train_tensor_normalized), Y_train_tensor)

0.8158730158730159

In [22]:
accuracy(rn(X_test_tensor_normalized), Y_test_tensor)

0.8185185185185185

In [23]:
X_threshold_tensor = torch.from_numpy(df.iloc[:, :1].to_numpy().astype(np.float32))
Y_threshold_tensor = torch.from_numpy(df['Class'].iloc[:].to_numpy().astype(np.float32))

In [24]:
#normalizing the tensor
X_threshold_tensor_normalized = []
for i in range(X_threshold_tensor.shape[1]):
    # print(i + 1)
    X_threshold_tensor_normalized.append((X_threshold_tensor[:, i] - torch.mean(X_threshold_tensor[:, i])) / torch.std(X_threshold_tensor[:, i]))


X_threshold_tensor_normalized = torch.from_numpy(np.array(X_threshold_tensor_normalized)).reshape(900, -1)
X_threshold_tensor_normalized.shape


torch.Size([900, 1])

In [25]:
accuracy(rn(X_threshold_tensor_normalized), Y_threshold_tensor)

0.8166666666666667