# Dry Bean Variety Prediction by Multiclass Classification using ANN (testing)

## Load dataset for the training process

In [1]:
import os
import numpy as np
import pandas as pd

### Open the CSV

In [2]:
df_test = pd.read_csv(os.path.join("DryBeanDataset", "dry_bean_test.csv"))
print("Num test:", len(df_test))

Num test: 2722


### Create dataset loader for testing process

In [3]:
import torch
from torch.utils.data import TensorDataset, DataLoader, Dataset

### Features to be used and target

In [4]:
x_names = [
    "Area", "Perimeter", "MajorAxisLength", "MinorAxisLength", "Eccentricity", "ConvexArea",
    "EquivDiameter", "Extent", "Solidity", "ShapeFactor1", "ShapeFactor2", "ShapeFactor3", "ShapeFactor4"]
y_name = "Class"
y_classes = ["SEKER", "BARBUNYA", "BOMBAY", "CALI", "DERMASON", "HOROZ", "SIRA"]

### Convert Pandas dataframe to PyTorch dataset

In [5]:
def df_to_dataset(df: pd.DataFrame) -> Dataset:
    features = df[x_names].to_numpy(dtype=np.float32)
    # preprocess data
    df["Area"] /= 500_000
    df["Perimeter"] /= 5000
    df["MajorAxisLength"] /= 2000
    df["MinorAxisLength"] /= 2000
    df["ConvexArea"] /= 500_000
    df["EquivDiameter"] /= 2000
    # preprocess labels
    labels = df[[y_name]].applymap(lambda x: y_classes.index(x)) # map Class labels to int
    labels = labels.to_numpy(dtype=np.int64).squeeze(axis=1)
    # create the dataset
    features = torch.from_numpy(features)
    labels = torch.from_numpy(labels)
    my_dataset = TensorDataset(features, labels)
    return my_dataset

In [6]:
ds_test = df_to_dataset(df_test)

### Create PyTorch data loader

In [7]:
BATCH_SIZE = 64
loader_test = DataLoader(ds_test, batch_size=BATCH_SIZE)

## Model testing

In [8]:
import models
from tqdm.notebook import tqdm
from sklearn.metrics import f1_score, accuracy_score

### Which device we will use for testing process (CPU/GPU)

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


### Create the model

In [10]:
model = models.MLP4Layers(n_features=len(x_names), n_classes=len(y_classes))
# Move the model from CPU to the device
# Actually, only required if the device is not CPU and has no effect if it is CPU
model = model.to(device)

### Load the trained weights of the model parameters

In [11]:
WEIGHT_PATH = os.path.join("runs_clf", "train_20220904_081814", "best.pt")
model.load_state_dict(torch.load(WEIGHT_PATH))

<All keys matched successfully>

### The testing process

In [12]:
# We don't need gradients for the model testing process
model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for i, vdata in tqdm(enumerate(loader_test)):
        vinputs, vlabels = vdata
        y_true.extend(vlabels.numpy().tolist())
        voutputs = model(vinputs.to(device))
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(voutputs.data, 1)
        y_pred.extend(predicted.cpu().numpy().tolist())

# Calculate our classification metrics
acc = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average="weighted")
print(f'Accuracy: {acc}, Weighted F1: {f1}')


0it [00:00, ?it/s]

Accuracy: 0.75165319617928, Weighted F1: 0.7490729516571731
