In [1]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,accuracy_score,classification_report
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import pandas

In [2]:
Xtr = np.load("train_features.npy")
ytr = np.load("train_labels.npy")
num_classes = np.max(ytr) + 1


scaler=StandardScaler()
Xtr= scaler.fit_transform(Xtr)
#Xte= scaler.transform(Xte)

In [None]:
class MLP(nn.Module):
    def __init__(self, input_size, output_size=num_classes, dropout_prob=0.5):
        super(MLP, self).__init__()
        
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 64)
        self.out = nn.Linear(64, output_size)
        
        self.dropout = nn.Dropout(p=dropout_prob)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc4(x))
        x = self.dropout(x)
        
        x = self.out(x)
        return x

In [None]:
class MLP_Simple(nn.Module):
    def __init__(self, input_size, output_size, hidden_size=128, dropout_prob=0.1):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.dropout = nn.Dropout(0.1)
        self.out = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.out(x)
        return x


In [3]:
# já corrido, file mlp_garbage_classifier_1.pth - perdi a file 

class MLP_Improved(nn.Module):
    def __init__(self, input_size, reduced_size=2048, hidden1=1024, hidden2=512, output_size=num_classes, dropout_prob=0.2):
        super().__init__()
        self.fc_reduce = nn.Linear(input_size, reduced_size)
        self.fc1 = nn.Linear(reduced_size, hidden1)
        self.fc2 = nn.Linear(hidden1, hidden2)
        self.out = nn.Linear(hidden2, output_size)
        self.dropout = nn.Dropout(p=dropout_prob)

    def forward(self, x):
        x = F.relu(self.fc_reduce(x))
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.out(x)
        return x

In [None]:
class MLP_Improved_2(nn.Module):
    def __init__(self, input_size, output_size=num_classes, dropout_prob=0.3):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.dropout = nn.Dropout(p=dropout_prob)
        self.out = nn.Linear(256, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.out(x)
        return x


In [4]:
num_epochs=100
lr=0.0005
dropout=0.2
batch_size=32

In [5]:
Xtr = torch.tensor(Xtr, dtype=torch.float32)
ytr = torch.tensor(ytr, dtype=torch.long)
#Xte = torch.tensor(Xte, dtype=torch.float32)
#yte = torch.tensor(yte, dtype=torch.float32)

# Wrap Xtr and ytr into a dataset
train_dataset = TensorDataset(Xtr, ytr)

# Create DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [6]:
# Model, Loss, Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MLP(input_size=Xtr.shape[1], dropout_prob=dropout).to(device)
criterion = nn.CrossEntropyLoss()  # for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=lr)

NameError: name 'MLP' is not defined

In [None]:
# Model, Loss, Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MLP_Simple(input_size=Xtr.shape[1], output_size=num_classes, hidden_size=128, dropout_prob=0.1).to(device)
criterion = nn.CrossEntropyLoss()  # for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=lr)

In [7]:
# Model, Loss, Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MLP_Improved(input_size=Xtr.shape[1]).to(device)
criterion = nn.CrossEntropyLoss()  # for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=lr)

In [8]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0

    for batch_x, batch_y in train_dataloader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        logits = model(batch_x)
        loss = criterion(logits, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [1/100], Loss: 1.5795
Epoch [2/100], Loss: 1.3272
Epoch [3/100], Loss: 1.2157
Epoch [4/100], Loss: 1.1357
Epoch [5/100], Loss: 1.0716
Epoch [6/100], Loss: 1.0097
Epoch [7/100], Loss: 0.9493
Epoch [8/100], Loss: 0.8944
Epoch [9/100], Loss: 0.8521
Epoch [10/100], Loss: 0.8051
Epoch [11/100], Loss: 0.7576
Epoch [12/100], Loss: 0.7324
Epoch [13/100], Loss: 0.6924
Epoch [14/100], Loss: 0.6698
Epoch [15/100], Loss: 0.6452
Epoch [16/100], Loss: 0.6390
Epoch [17/100], Loss: 0.5970
Epoch [18/100], Loss: 0.5771
Epoch [19/100], Loss: 0.5675
Epoch [20/100], Loss: 0.5620
Epoch [21/100], Loss: 0.5376
Epoch [22/100], Loss: 0.5330
Epoch [23/100], Loss: 0.5073
Epoch [24/100], Loss: 0.4998
Epoch [25/100], Loss: 0.4787
Epoch [26/100], Loss: 0.5160
Epoch [27/100], Loss: 0.4830
Epoch [28/100], Loss: 0.4523
Epoch [29/100], Loss: 0.4556
Epoch [30/100], Loss: 0.4499
Epoch [31/100], Loss: 0.4568
Epoch [32/100], Loss: 0.4535
Epoch [33/100], Loss: 0.4372
Epoch [34/100], Loss: 0.4234
Epoch [35/100], Loss: 0

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Xtr = Xtr.to(device)
ytr = ytr.to(device)

model = model.to(device)
model.eval()  # set model to evaluation mode


with torch.no_grad():  # disable gradient computation
    logits = model(Xtr)                 # forward pass
    preds = torch.argmax(logits, dim=1)          # get class indices


from sklearn.metrics import accuracy_score, classification_report

y_true = ytr.cpu().numpy()  # true labels
y_pred = preds.cpu().numpy()         # predicted labels

# Overall accuracy
accuracy = accuracy_score(y_true, y_pred)
print("Accuracy:", accuracy)

# Full classification report (precision, recall, F1-score per class)
report = classification_report(y_true, y_pred)
print(report)

Accuracy: 0.9347083386055928
              precision    recall  f1-score   support

           0       0.89      0.90      0.90       755
           1       0.97      0.91      0.94       797
           2       1.00      0.93      0.96      1460
           3       1.00      0.97      0.99      4261
           4       0.96      0.85      0.90      2448
           5       0.96      0.91      0.93       816
           6       0.97      0.95      0.96      1344
           7       0.93      0.94      0.93      1587
           8       0.75      0.98      0.85      1581
           9       0.85      0.92      0.89       757

    accuracy                           0.93     15806
   macro avg       0.93      0.93      0.93     15806
weighted avg       0.94      0.93      0.94     15806



In [10]:
Xtest_studio = np.load("test_studio_features.npy")
ytest_studio = np.load("test_studio_labels.npy")

Xtest_studio = scaler.transform(Xtest_studio)
Xtest_studio = torch.tensor(Xtest_studio, dtype=torch.float32)
ytest_studio = torch.tensor(ytest_studio , dtype=torch.long)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Xtest_studio = Xtest_studio.to(device)
ytest_studio = ytest_studio.to(device)

model = model.to(device)
model.eval()  # set model to evaluation mode


with torch.no_grad():  # disable gradient computation
    logits = model(Xtest_studio)                 # forward pass
    preds = torch.argmax(logits, dim=1)          # get class indices


from sklearn.metrics import accuracy_score, classification_report

y_true = ytest_studio.cpu().numpy()  # true labels
y_pred = preds.cpu().numpy()         # predicted labels

# Overall accuracy
accuracy = accuracy_score(y_true, y_pred)
print("Accuracy:", accuracy)

# Full classification report (precision, recall, F1-score per class)
#report = classification_report(y_true, y_pred)
#print(report)

Accuracy: 0.687516221126395


In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Xtest_realworld = np.load("test_realworld_features.npy")
ytest_realworld = np.load("test_realworld_labels.npy")

Xtest_realworld = scaler.transform(Xtest_realworld)
Xtest_realworld= torch.tensor(Xtest_realworld, dtype=torch.float32)
ytest_realworld = torch.tensor(ytest_realworld , dtype=torch.long)

Xtest_realworld = Xtest_realworld.to(device)
ytest_realworld = ytest_realworld.to(device)

model = model.to(device)
model.eval()  # set model to evaluation mode


with torch.no_grad():  # disable gradient computation
    logits = model(Xtest_realworld)                 # forward pass
    preds = torch.argmax(logits, dim=1)          # get class indices


from sklearn.metrics import accuracy_score, classification_report

y_true = ytest_realworld.cpu().numpy()  # true labels
y_pred = preds.cpu().numpy()         # predicted labels

# Overall accuracy
accuracy = accuracy_score(y_true, y_pred)
print("Accuracy:", accuracy)

# Full classification report (precision, recall, F1-score per class)
#report = classification_report(y_true, y_pred)
#print(report)


Accuracy: 0.39805825242718446


In [None]:
torch.save(model.state_dict(), "mlp_garbage_classifier_1.pth")
