In [1]:
# pip install ucimlrepo - primeiro instala-se a lib do repositório

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from ucimlrepo import fetch_ucirepo 

In [3]:
# fetch dataset 
wine_quality = fetch_ucirepo(id=186) 

In [4]:
# Convertendo os dados para um DataFrame
df = pd.concat([wine_quality.data.features, wine_quality.data.targets], axis=1)

In [7]:
df['quality'].value_counts()

quality
6    2836
5    2138
7    1079
4     216
8     193
3      30
9       5
Name: count, dtype: int64

In [8]:
# 2. Preparação dos dados
X = df.drop('quality', axis=1).values  # Todas as colunas exceto 'quality' são características
y = df['quality'].values  # A coluna 'quality' é a variável alvo

In [11]:
X

array([[ 7.4 ,  0.7 ,  0.  , ...,  3.51,  0.56,  9.4 ],
       [ 7.8 ,  0.88,  0.  , ...,  3.2 ,  0.68,  9.8 ],
       [ 7.8 ,  0.76,  0.04, ...,  3.26,  0.65,  9.8 ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  2.99,  0.46,  9.4 ],
       [ 5.5 ,  0.29,  0.3 , ...,  3.34,  0.38, 12.8 ],
       [ 6.  ,  0.21,  0.38, ...,  3.26,  0.32, 11.8 ]])

In [9]:
# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
# Normalizando os dados
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
# Convertendo para tensores PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [13]:
# 3. Construir a rede neural
class WineQualityNN(nn.Module):
    def __init__(self):
        super(WineQualityNN, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [14]:
# Instanciar o modelo
model = WineQualityNN()

In [15]:
# Definir a função de perda e o otimizador
criterion = nn.MSELoss()  # Usando MSE porque estamos tratando a qualidade como uma variável contínua
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [16]:
# 4. Treinamento do modelo
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/50], Loss: 33.1677
Epoch [2/50], Loss: 32.4250
Epoch [3/50], Loss: 31.7007
Epoch [4/50], Loss: 30.9907
Epoch [5/50], Loss: 30.2900
Epoch [6/50], Loss: 29.5937
Epoch [7/50], Loss: 28.8958
Epoch [8/50], Loss: 28.1919
Epoch [9/50], Loss: 27.4777
Epoch [10/50], Loss: 26.7501
Epoch [11/50], Loss: 26.0076
Epoch [12/50], Loss: 25.2484
Epoch [13/50], Loss: 24.4710
Epoch [14/50], Loss: 23.6747
Epoch [15/50], Loss: 22.8590
Epoch [16/50], Loss: 22.0241
Epoch [17/50], Loss: 21.1705
Epoch [18/50], Loss: 20.2998
Epoch [19/50], Loss: 19.4140
Epoch [20/50], Loss: 18.5157
Epoch [21/50], Loss: 17.6075
Epoch [22/50], Loss: 16.6923
Epoch [23/50], Loss: 15.7734
Epoch [24/50], Loss: 14.8544
Epoch [25/50], Loss: 13.9392
Epoch [26/50], Loss: 13.0320
Epoch [27/50], Loss: 12.1373
Epoch [28/50], Loss: 11.2599
Epoch [29/50], Loss: 10.4045
Epoch [30/50], Loss: 9.5763
Epoch [31/50], Loss: 8.7802
Epoch [32/50], Loss: 8.0214
Epoch [33/50], Loss: 7.3049
Epoch [34/50], Loss: 6.6353
Epoch [35/50], Loss: 6.0171


In [17]:
# 5. Avaliação do modelo
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    mse_loss = criterion(predictions, y_test_tensor)
    mae_loss = torch.mean(torch.abs(predictions - y_test_tensor))
    print(f'Mean Squared Error on Test Set: {mse_loss.item():.4f}')
    print(f'Mean Absolute Error on Test Set: {mae_loss.item():.4f}')

Mean Squared Error on Test Set: 3.5642
Mean Absolute Error on Test Set: 1.3204


Implementar técnica de data augmentation SMOTE
================================================

In [18]:
from imblearn.over_sampling import SMOTE

In [19]:
# Criar uma máscara para identificar as classes de interesse (qualidade acima de 7 e abaixo de 5)
mask = (y > 7) | (y < 5)

In [20]:
# Aplicar SMOTE para as classes minoritárias (qualidade > 7 e < 5)
smote = SMOTE(k_neighbors=3)  # Ajustando n_neighbors para um valor menor
X_smote, y_smote = smote.fit_resample(X[mask], y[mask])

In [21]:
# Combinar as amostras SMOTE com as instâncias que têm qualidade entre 5 e 7
mask_middle = (y >= 5) & (y <= 7)
X_final = np.vstack([X_smote, X[mask_middle]])
y_final = np.hstack([y_smote, y[mask_middle]])

In [22]:
X_final

array([[ 7.4 ,  0.59,  0.08, ...,  3.38,  0.5 ,  9.  ],
       [ 5.7 ,  1.13,  0.09, ...,  3.5 ,  0.48,  9.8 ],
       [ 8.8 ,  0.61,  0.3 , ...,  3.26,  0.51,  9.3 ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  2.99,  0.46,  9.4 ],
       [ 5.5 ,  0.29,  0.3 , ...,  3.34,  0.38, 12.8 ],
       [ 6.  ,  0.21,  0.38, ...,  3.26,  0.32, 11.8 ]])

In [23]:
# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.2, random_state=42)

In [24]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [25]:
# Convertendo para tensores PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [26]:
# 4. Treinamento do modelo
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/50], Loss: 3.7834
Epoch [2/50], Loss: 3.7507
Epoch [3/50], Loss: 3.7065
Epoch [4/50], Loss: 3.6509
Epoch [5/50], Loss: 3.5852
Epoch [6/50], Loss: 3.5115
Epoch [7/50], Loss: 3.4324
Epoch [8/50], Loss: 3.3504
Epoch [9/50], Loss: 3.2682
Epoch [10/50], Loss: 3.1881
Epoch [11/50], Loss: 3.1119
Epoch [12/50], Loss: 3.0412
Epoch [13/50], Loss: 2.9768
Epoch [14/50], Loss: 2.9193
Epoch [15/50], Loss: 2.8686
Epoch [16/50], Loss: 2.8243
Epoch [17/50], Loss: 2.7859
Epoch [18/50], Loss: 2.7526
Epoch [19/50], Loss: 2.7236
Epoch [20/50], Loss: 2.6979
Epoch [21/50], Loss: 2.6745
Epoch [22/50], Loss: 2.6527
Epoch [23/50], Loss: 2.6318
Epoch [24/50], Loss: 2.6111
Epoch [25/50], Loss: 2.5903
Epoch [26/50], Loss: 2.5692
Epoch [27/50], Loss: 2.5475
Epoch [28/50], Loss: 2.5252
Epoch [29/50], Loss: 2.5025
Epoch [30/50], Loss: 2.4793
Epoch [31/50], Loss: 2.4561
Epoch [32/50], Loss: 2.4329
Epoch [33/50], Loss: 2.4101
Epoch [34/50], Loss: 2.3878
Epoch [35/50], Loss: 2.3663
Epoch [36/50], Loss: 2.3457
E

In [27]:
# 5. Avaliação do modelo
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    mse_loss = criterion(predictions, y_test_tensor)
    mae_loss = torch.mean(torch.abs(predictions - y_test_tensor))
    print(f'Mean Squared Error on Test Set: {mse_loss.item():.4f}')
    print(f'Mean Absolute Error on Test Set: {mae_loss.item():.4f}')

Mean Squared Error on Test Set: 2.2366
Mean Absolute Error on Test Set: 1.1270


Inserir Mix-Up
===============

In [31]:
# Função MixUp para gerar mais instâncias
def mixup(X, y, alpha=0.2, n_samples=2):
    X_mix = []
    y_mix = []
    
    for _ in range(n_samples):
        indices = np.arange(len(X))
        np.random.shuffle(indices)
        X_shuffled = X[indices]
        y_shuffled = y[indices]
        
        lam = np.random.beta(alpha, alpha)
        
        X_mixed = lam * X + (1 - lam) * X_shuffled
        y_mixed = lam * y + (1 - lam) * y_shuffled
        
        X_mix.append(X_mixed)
        y_mix.append(y_mixed)
    
    return np.vstack(X_mix), np.hstack(y_mix)

In [32]:
# Aplicar MixUp dentro de cada grupo de qualidade
X_mix = []
y_mix = []

for quality in np.unique(y_final):
    X_group = X_final[y_final == quality]
    y_group = y_final[y_final == quality]
    
    X_mixed, y_mixed = mixup(X_group, y_group)
    
    X_mix.append(X_mixed)
    y_mix.append(y_mixed)

# Combinar as amostras mixadas de volta em um único conjunto de dados
X_combined = np.vstack(X_mix)
y_combined = np.hstack(y_mix)

In [33]:
X_combined

array([[10.3606451 ,  0.4495194 ,  0.47895105, ...,  3.15383408,
         0.53884877,  9.35214647],
       [ 9.69683263,  0.56804031,  0.47274674, ...,  3.13703189,
         0.59668866,  9.06057724],
       [ 7.38473261,  0.86763757,  0.1406929 , ...,  3.43100533,
         0.51296999, 10.7904761 ],
       ...,
       [ 8.15961349,  0.25340494,  0.40021483, ...,  3.24425348,
         0.47106337, 11.56165392],
       [ 6.66617821,  0.34676463,  0.31647096, ...,  3.40470526,
         0.58485166, 12.46617821],
       [ 7.01509994,  0.30246679,  0.42629684, ...,  3.33178226,
         0.3945329 , 12.81499347]])

In [34]:
# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2, random_state=42)

# Normalizando os dados
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convertendo para tensores PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [35]:
# 4. Treinamento do modelo
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/50], Loss: 1.9539
Epoch [2/50], Loss: 1.9432
Epoch [3/50], Loss: 1.9326
Epoch [4/50], Loss: 1.9220
Epoch [5/50], Loss: 1.9115
Epoch [6/50], Loss: 1.9011
Epoch [7/50], Loss: 1.8909
Epoch [8/50], Loss: 1.8808
Epoch [9/50], Loss: 1.8710
Epoch [10/50], Loss: 1.8613
Epoch [11/50], Loss: 1.8519
Epoch [12/50], Loss: 1.8427
Epoch [13/50], Loss: 1.8337
Epoch [14/50], Loss: 1.8249
Epoch [15/50], Loss: 1.8163
Epoch [16/50], Loss: 1.8078
Epoch [17/50], Loss: 1.7994
Epoch [18/50], Loss: 1.7911
Epoch [19/50], Loss: 1.7829
Epoch [20/50], Loss: 1.7749
Epoch [21/50], Loss: 1.7668
Epoch [22/50], Loss: 1.7589
Epoch [23/50], Loss: 1.7511
Epoch [24/50], Loss: 1.7433
Epoch [25/50], Loss: 1.7356
Epoch [26/50], Loss: 1.7280
Epoch [27/50], Loss: 1.7205
Epoch [28/50], Loss: 1.7130
Epoch [29/50], Loss: 1.7057
Epoch [30/50], Loss: 1.6984
Epoch [31/50], Loss: 1.6912
Epoch [32/50], Loss: 1.6841
Epoch [33/50], Loss: 1.6771
Epoch [34/50], Loss: 1.6702
Epoch [35/50], Loss: 1.6633
Epoch [36/50], Loss: 1.6565
E

In [36]:
# 5. Avaliação do modelo
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    mse_loss = criterion(predictions, y_test_tensor)
    mae_loss = torch.mean(torch.abs(predictions - y_test_tensor))
    print(f'Mean Squared Error on Test Set: {mse_loss.item():.4f}')
    print(f'Mean Absolute Error on Test Set: {mae_loss.item():.4f}')

Mean Squared Error on Test Set: 1.4755
Mean Absolute Error on Test Set: 0.9388


Inserir ruído gaussiano
=======================

In [38]:
# Aplicar ruído gaussiano segmentado pela classe quality
def add_gaussian_noise(X, mean=0, stddev=0.1):
    noise = np.random.normal(mean, stddev, X.shape)
    return X + noise

In [39]:
X_noisy = []
y_noisy = []

for quality in np.unique(y_combined):
    X_group = X_combined[y_combined == quality]
    y_group = y_combined[y_combined == quality]
    
    X_group_noisy = add_gaussian_noise(X_group)
    
    X_noisy.append(X_group_noisy)
    y_noisy.append(y_group)

# Combinar as amostras com ruído de volta em um único conjunto de dados
X_final_noisy = np.vstack([X_combined] + X_noisy)
y_final_noisy = np.hstack([y_combined] + y_noisy)

In [40]:
X_final_noisy

array([[10.3606451 ,  0.4495194 ,  0.47895105, ...,  3.15383408,
         0.53884877,  9.35214647],
       [ 9.69683263,  0.56804031,  0.47274674, ...,  3.13703189,
         0.59668866,  9.06057724],
       [ 7.38473261,  0.86763757,  0.1406929 , ...,  3.43100533,
         0.51296999, 10.7904761 ],
       ...,
       [ 8.10427668,  0.14396187,  0.59156379, ...,  3.16690746,
         0.33553317, 11.72847718],
       [ 6.58089145,  0.25502186,  0.27525059, ...,  3.45401118,
         0.51736464, 12.48341482],
       [ 6.88709876,  0.41315748,  0.41099086, ...,  3.31969781,
         0.43249432, 12.84239108]])

In [41]:
# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X_final_noisy, y_final_noisy, test_size=0.2, random_state=42)

# Normalizando os dados
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convertendo para tensores PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [42]:
# 4. Treinamento do modelo
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/50], Loss: 2.9309
Epoch [2/50], Loss: 2.9132
Epoch [3/50], Loss: 2.8846
Epoch [4/50], Loss: 2.8472
Epoch [5/50], Loss: 2.8033
Epoch [6/50], Loss: 2.7546
Epoch [7/50], Loss: 2.7027
Epoch [8/50], Loss: 2.6490
Epoch [9/50], Loss: 2.5944
Epoch [10/50], Loss: 2.5399
Epoch [11/50], Loss: 2.4861
Epoch [12/50], Loss: 2.4335
Epoch [13/50], Loss: 2.3823
Epoch [14/50], Loss: 2.3330
Epoch [15/50], Loss: 2.2856
Epoch [16/50], Loss: 2.2402
Epoch [17/50], Loss: 2.1971
Epoch [18/50], Loss: 2.1561
Epoch [19/50], Loss: 2.1174
Epoch [20/50], Loss: 2.0811
Epoch [21/50], Loss: 2.0473
Epoch [22/50], Loss: 2.0159
Epoch [23/50], Loss: 1.9868
Epoch [24/50], Loss: 1.9600
Epoch [25/50], Loss: 1.9352
Epoch [26/50], Loss: 1.9123
Epoch [27/50], Loss: 1.8910
Epoch [28/50], Loss: 1.8712
Epoch [29/50], Loss: 1.8528
Epoch [30/50], Loss: 1.8355
Epoch [31/50], Loss: 1.8193
Epoch [32/50], Loss: 1.8040
Epoch [33/50], Loss: 1.7895
Epoch [34/50], Loss: 1.7758
Epoch [35/50], Loss: 1.7628
Epoch [36/50], Loss: 1.7504
E

In [43]:
# 5. Avaliação do modelo
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    mse_loss = criterion(predictions, y_test_tensor)
    mae_loss = torch.mean(torch.abs(predictions - y_test_tensor))
    print(f'Mean Squared Error on Test Set: {mse_loss.item():.4f}')
    print(f'Mean Absolute Error on Test Set: {mae_loss.item():.4f}')

Mean Squared Error on Test Set: 1.5883
Mean Absolute Error on Test Set: 0.9762
