In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# data preparation 
train = pd.read_csv('data/new_train.csv', index_col='id')
test = pd.read_csv('data/test.csv', index_col='id')

y_columns = ['Pastry', 'Z_Scratch', 'K_Scatch', 'Stains', 'Dirtiness', 'Bumps', 'Other_Faults']
y = train[y_columns]
X = train.drop(y_columns, axis=1)

scaler = StandardScaler()
X = scaler.fit_transform(X)
X_test = scaler.transform(test)
y_test = np.zeros((X_test.shape[0], len(y_columns)))
# y_test = pd.DataFrame(y_test, columns=y_columns)


X = torch.tensor(X, dtype=torch.float32).to(device)
y = torch.tensor(y.values, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)


X = X.view(-1, 1, 27)  # Reshape X for training data
X_test = X_test.view(-1, 1, 27)  # Reshape X_test for testing data
# X = X.view(-1, 27)  # Reshape X for training data
# X_test = X_test.view(-1, 27)  # Reshape X_test for testing data

# Create TensorDatasets and DataLoaders for training and testing
train_dataset = TensorDataset(X, y)
test_dataset = TensorDataset(X_test, y_test)  # Assuming you have or will create y_test similarly

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# No need to shuffle the test loader
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


class DeeperCNN(nn.Module):
    def __init__(self, input_channels=1, num_classes=7):
        super(DeeperCNN, self).__init__()
        # Convolutional layers with increased dropout
        self.conv1 = nn.Conv1d(input_channels, 64, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(64)
        self.dropout1 = nn.Dropout(0.5)  # Increased dropout
        self.conv2 = nn.Conv1d(64, 256, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout2 = nn.Dropout(0.5)  # Increased dropout
        self.conv3 = nn.Conv1d(256, 1024, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm1d(1024)
        self.dropout3 = nn.Dropout(0.5)  # Increased dropout
        self.conv4 = nn.Conv1d(1024, 512, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm1d(512)
        self.dropout4 = nn.Dropout(0.5)  # Apply dropout also here

        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        
        self._to_linear = None
        self._dummy_x = torch.zeros(1, input_channels, 27)
        self._forward_features(self._dummy_x)

        # Fully connected layers with dropout
        self.fc1 = nn.Linear(self._to_linear, 1024)
        self.dropout_fc1 = nn.Dropout(0.5)  # Apply dropout before final layer
        self.fc2 = nn.Linear(1024, num_classes)

    def _forward_features(self, x):
        x = self.dropout1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(x)
        x = self.dropout2(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(x)
        x = self.dropout3(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(x)
        x = self.dropout4(F.relu(self.bn4(self.conv4(x))))
        x = self.pool(x)
        if self._to_linear is None:
            self._to_linear = int(x.numel() / x.size(0))
        return x

    def forward(self, x):
        x = self._forward_features(x)
        x = x.view(-1, self._to_linear)
        x = self.dropout_fc1(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return torch.sigmoid(x)

# class ResidualBlock1D(nn.Module):
#     def __init__(self, in_channels, out_channels, stride=1, downsample=None):
#         super(ResidualBlock1D, self).__init__()
#         self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
#         self.bn1 = nn.BatchNorm1d(out_channels)
#         self.relu = nn.ReLU(inplace=True)
#         self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
#         self.bn2 = nn.BatchNorm1d(out_channels)
#         self.downsample = downsample
#         self.dropout = nn.Dropout(0.5)  # Add dropout

#     def forward(self, x):
#         identity = x
#         if self.downsample is not None:
#             identity = self.downsample(x)
        
#         out = self.conv1(x)
#         out = self.bn1(out)
#         out = self.relu(out)
#         out = self.conv2(out)
#         out = self.bn2(out)
#         out = self.dropout(out)  # Apply dropout
#         out += identity
#         out = self.relu(out)
#         return out



# class ResNet1D(nn.Module):
#     def __init__(self, input_channels=1, num_blocks=[2, 2, 2], num_classes=7):
#         super(ResNet1D, self).__init__()
#         self.in_channels = 64
#         self.conv1 = nn.Conv1d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
#         self.bn1 = nn.BatchNorm1d(64)
#         self.relu = nn.ReLU(inplace=True)
#         self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
        
#         self.layer1 = self._make_layer(64, num_blocks[0])
#         self.layer2 = self._make_layer(128, num_blocks[1], stride=2)
#         self.layer3 = self._make_layer(256, num_blocks[2], stride=2)
        
#         self.avgpool = nn.AdaptiveAvgPool1d(1)
#         self.fc = nn.Linear(256, num_classes)
    
#     def _make_layer(self, out_channels, blocks, stride=1):
#         downsample = None
#         if stride != 1 or self.in_channels != out_channels:
#             downsample = nn.Sequential(
#                 nn.Conv1d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
#                 nn.BatchNorm1d(out_channels),
#             )
        
#         layers = []
#         layers.append(ResidualBlock1D(self.in_channels, out_channels, stride, downsample))
#         self.in_channels = out_channels  # Update in_channels to match out_channels for the next block
#         for _ in range(1, blocks):
#             layers.append(ResidualBlock1D(out_channels, out_channels))
        
#         return nn.Sequential(*layers)


#     def forward(self, x):
#         x = self.conv1(x)
#         x = self.bn1(x)
#         x = self.relu(x)
#         x = self.maxpool(x)
        
#         x = self.layer1(x)
#         x = self.layer2(x)
#         x = self.layer3(x)
        
#         x = self.avgpool(x)
#         x = torch.flatten(x, 1)
#         x = self.fc(x)
#         return torch.sigmoid(x)

# class LargerFCNN(nn.Module):
#     def __init__(self, input_dim=27, output_dim=7, dropout_rate=0.5):
#         super(LargerFCNN, self).__init__()
#         self.fc1 = nn.Linear(input_dim, 256)  # First layer
#         self.dropout1 = nn.Dropout(dropout_rate)
#         self.fc2 = nn.Linear(256, 1024)  # Second layer
#         self.dropout2 = nn.Dropout(dropout_rate)
#         self.fc3 = nn.Linear(1024, 128)  # Third layer
#         self.dropout3 = nn.Dropout(dropout_rate)
#         self.fc4 = nn.Linear(128, 512)  # Fourth layer
#         self.dropout4 = nn.Dropout(dropout_rate)
#         self.fc5 = nn.Linear(512, output_dim)  # Output layer

#     def forward(self, x):
#         x = F.relu(self.fc1(x))
#         x = self.dropout1(x)
#         x = F.relu(self.fc2(x))
#         x = self.dropout2(x)
#         x = F.relu(self.fc3(x))
#         x = self.dropout3(x)
#         x = F.relu(self.fc4(x))
#         x = self.dropout4(x)
#         x = torch.sigmoid(self.fc5(x))  # Sigmoid activation for binary output
#         return x

# Ensure model is compatible with CUDA
model = DeeperCNN().to(device)

# Random seed for reproducibility
torch.manual_seed(42)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)


epochs = 200
for epoch in range(epochs):
    for inputs, labels in train_loader:
        inputs, labels = inputs, labels
        optimizer.zero_grad()
        outputs = model(inputs)
        
        # outputs = outputs.detach().cpu().numpy()
        # all_indexes = np.argmax(outputs, axis=1)
        # outputs = np.zeros(outputs.shape)
        # outputs[np.arange(all_indexes.size), all_indexes] = 1
        # outputs = torch.tensor(outputs, dtype=torch.float32).to(device)
        
        if epoch == 1: 
            print(outputs[0:1,:])
        
        # indexes = torch.argmax(outputs, dim=1)
        # outputs = torch.zeros(outputs.shape, dtype=torch.float32).to(device)
        # outputs[torch.arange(indexes.size(0)), indexes] = 1
        
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')


cuda
Epoch 1/200, Loss: 0.37571215629577637
tensor([[0.0638, 0.1869, 0.1027, 0.0514, 0.0429, 0.1650, 0.4417]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[0.1491, 0.1001, 0.0060, 0.0211, 0.0179, 0.2647, 0.4920]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[0.2990, 0.0285, 0.0176, 0.0081, 0.0125, 0.2680, 0.4544]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[0.0161, 0.0384, 0.8632, 0.0192, 0.0092, 0.0076, 0.1328]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[0.0128, 0.0215, 0.9484, 0.0240, 0.0043, 0.0110, 0.1624]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[0.1084, 0.1468, 0.0349, 0.0777, 0.0668, 0.2749, 0.3787]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[0.0179, 0.0088, 0.8681, 0.0252, 0.0068, 0.0156, 0.2061]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[0.1201, 0.1319, 0.0832, 0.1162, 0.1338, 0.3101, 0.2361]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[0.0411, 0.1

KeyboardInterrupt: 

In [4]:
# Store for predictions and actual labels
all_predictions = []
all_labels = []

with torch.no_grad():
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        all_predictions.extend(outputs.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_predictions = np.array(all_predictions)
all_labels = np.array(all_labels)

# find the index of largest value in each row
all_indexes = np.argmax(all_predictions, axis=1)
all_predictions = np.zeros(all_predictions.shape)
all_predictions[np.arange(all_indexes.size), all_indexes] = 1


# Compute AUC-ROC for each dimension
auc_scores = []
for i in range(7):  # Assuming 7 output dimensions
    auc_score = roc_auc_score(all_labels[:, i], all_predictions[:, i])
    auc_scores.append(auc_score)

mean_auc_score = np.mean(auc_scores)

print("AUC-ROC Scores for each output dimension:", auc_scores)
print("Mean AUC-ROC Score:", mean_auc_score)



AUC-ROC Scores for each output dimension: [0.6221756864332302, 0.7873114076237089, 0.9526771312397259, 0.9347323188514213, 0.6372189939357128, 0.7062049242231597, 0.6685828034555914]
Mean AUC-ROC Score: 0.7584147522517928


In [5]:
# Store for predictions and actual labels
all_predictions = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        all_predictions.extend(outputs.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_predictions = np.array(all_predictions)
all_labels = np.array(all_labels)

all_indexes = np.argmax(all_predictions, axis=1)
all_predictions = np.zeros(all_predictions.shape)
all_predictions[np.arange(all_indexes.size), all_indexes] = 1

# save to a file for submission
# id starts at 19219
submission = pd.DataFrame(all_predictions, columns=y_columns)
submission.index += 19219
submission.index.name = 'id'
submission.to_csv('submissions/cnn7.csv', index=True)