In [137]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [138]:
import pickle
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [139]:
with open('/content/drive/MyDrive/train_1500.pkl', 'rb') as f:
    train_data = pickle.load(f)
train_features = train_data['X']
train_labels = train_data['y']

In [140]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)

In [141]:
# to tensor
train_features_tensor = torch.tensor(train_features_scaled, dtype=torch.float32).unsqueeze(1)
train_labels_tensor = torch.tensor(train_labels, dtype=torch.long)

print(train_features_tensor.size())
print(train_labels_tensor.size())

torch.Size([28102, 1, 1500])
torch.Size([28102])


In [142]:
class CNN_LSTM_Model(nn.Module):
    def __init__(self, num_classes=4):
        super(CNN_LSTM_Model, self).__init__()

        # Layer 1
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=7, stride=1, padding=3)
        self.bn1 = nn.BatchNorm1d(64)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)


        # Layer 2
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=2)
        self.bn2 = nn.BatchNorm1d(128)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)

        # Layer 3
        self.conv3 = nn.Conv1d(in_channels=128, out_channels=164, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm1d(164)
        self.pool3 = nn.MaxPool1d(kernel_size=2, stride=2)

        # Layer 4
        self.conv4 = nn.Conv1d(in_channels=164, out_channels=200, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm1d(200)
        self.pool4 = nn.MaxPool1d(kernel_size=2, stride=2)

        # Layer 5
        self.conv5 = nn.Conv1d(in_channels=200, out_channels=200, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm1d(200)
        self.pool5 = nn.MaxPool1d(kernel_size=2, stride=2)

        # LSTM layer
        self.lstm = nn.LSTM(input_size=200, hidden_size=200, batch_first=True)

        # Fully connected layer
        self.fc = nn.Linear(200, num_classes)

        self.dropout = nn.Dropout(0.1)

    def forward(self, x):
        x = self.pool1(torch.relu(self.bn1(self.conv1(x))))
        x = self.dropout(x)

        x = self.pool2(torch.relu(self.bn2(self.conv2(x))))
        x = self.dropout(x)

        x = self.pool3(torch.relu(self.bn3(self.conv3(x))))
        x = self.dropout(x)

        x = self.pool4(torch.relu(self.bn4(self.conv4(x))))
        x = self.dropout(x)

        x = self.pool5(torch.relu(self.bn5(self.conv5(x))))
        x = self.dropout(x)

        x = x.permute(0, 2, 1)

        _, (h_n, _) = self.lstm(x)

        x = h_n[-1]

        x = self.fc(x)

        return x

In [143]:
from torch.utils.data import DataLoader, Dataset, random_split

class Dataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            'input_ids': self.features[idx],
            'labels': self.labels[idx]
        }

In [144]:
batch_size = 32
device = 'cuda'
num_epochs = 40
weight_decay = 1e-6
learning_rate = 1e-4

In [145]:
dataset = Dataset(train_features_tensor, train_labels_tensor)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [146]:
import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, alpha=1.0, gamma=2.0, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, labels):

        prob = F.softmax(logits, dim=1)

        pt = prob.gather(1, labels.unsqueeze(1)).squeeze(1)

        focal_factor = (1 - pt) ** self.gamma

        ce_loss = -torch.log(pt)

        loss = self.alpha * focal_factor * ce_loss

        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:  # 'none'
            return loss

In [147]:
from sklearn.metrics import f1_score
from torch.nn.utils import clip_grad_norm_
from torch.optim.lr_scheduler import OneCycleLR
model = CNN_LSTM_Model().to(device)
loss_function = nn.CrossEntropyLoss()
focal_loss = FocalLoss()
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=learning_rate,
    betas=(0.9, 0.999),
    eps=1e-6,
    weight_decay=weight_decay
)
grad_clip = 1.0

In [148]:
# training

best_val_f1 = 0.0
best_model_path = "/content/drive/MyDrive/best_model.pth"

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    total_preds, total_labels = [], []

    for batch in train_loader:
        optimizer.zero_grad()
        inputs = batch["input_ids"].to(device)
        labels = batch["labels"].to(device)

        logits = model(inputs)
        loss = focal_loss(logits, labels)


        loss.backward()

        if grad_clip > 0:
            clip_grad_norm_(model.parameters(), max_norm=grad_clip)

        optimizer.step()
        # lr_scheduler.step()
        total_loss += loss.item()
        total_preds.extend(torch.argmax(logits, dim=-1).cpu().numpy())
        total_labels.extend(labels.cpu().numpy())

    train_loss = total_loss / len(train_loader)
    train_f1 = f1_score(total_labels, total_preds, average='micro')


    model.eval()
    val_loss = 0
    val_preds, val_labels = [], []

    with torch.no_grad():
        for batch in val_loader:
            inputs = batch['input_ids'].to(device)
            labels = batch['labels'].to(device)

            logits = model(inputs)
            loss = loss_function(logits, labels)

            val_loss += loss.item()
            val_preds.extend(torch.argmax(logits, dim=-1).cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

        val_loss /= len(val_loader)
        val_f1 = f1_score(val_labels, val_preds, average='micro')

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), best_model_path)


        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train F1: {train_f1:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}")

Epoch 1/40, Train Loss: 0.4216, Train F1: 0.5898, Val Loss: 0.9343, Val F1: 0.6054
Epoch 2/40, Train Loss: 0.3369, Train F1: 0.6314, Val Loss: 0.7875, Val F1: 0.6534
Epoch 3/40, Train Loss: 0.3047, Train F1: 0.6563, Val Loss: 0.7814, Val F1: 0.6675
Epoch 4/40, Train Loss: 0.2826, Train F1: 0.6768, Val Loss: 0.7911, Val F1: 0.6568
Epoch 5/40, Train Loss: 0.2664, Train F1: 0.6967, Val Loss: 0.7214, Val F1: 0.7020
Epoch 6/40, Train Loss: 0.2495, Train F1: 0.7194, Val Loss: 0.6725, Val F1: 0.7374
Epoch 7/40, Train Loss: 0.2369, Train F1: 0.7344, Val Loss: 0.6758, Val F1: 0.7447
Epoch 8/40, Train Loss: 0.2260, Train F1: 0.7442, Val Loss: 0.6652, Val F1: 0.7548
Epoch 9/40, Train Loss: 0.2169, Train F1: 0.7573, Val Loss: 0.6553, Val F1: 0.7588
Epoch 10/40, Train Loss: 0.2134, Train F1: 0.7618, Val Loss: 0.6550, Val F1: 0.7589
Epoch 11/40, Train Loss: 0.2059, Train F1: 0.7685, Val Loss: 0.6358, Val F1: 0.7639
Epoch 12/40, Train Loss: 0.1996, Train F1: 0.7745, Val Loss: 0.6372, Val F1: 0.7641
E

In [149]:
with open('/content/drive/MyDrive/test_1500.pkl', 'rb') as f:
    test_data = pickle.load(f)
test_features = test_data['X']
test_idx = test_data['idx']

In [150]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
test_features_scaled = scaler.fit_transform(test_features)

In [151]:
test_features_tensor = torch.tensor(test_features_scaled, dtype=torch.float32).unsqueeze(1)
test_idx_tensor = torch.tensor(test_idx, dtype=torch.long)

In [152]:
print(test_features_tensor.size())
print(test_idx_tensor.size())

torch.Size([18634, 1, 1500])
torch.Size([18634])


In [153]:
class TestDataset(Dataset):
    def __init__(self, features, indix):
        self.features = features
        self.indix = indix

    def __len__(self):
        return len(self.indix)

    def __getitem__(self, idx):
        return {
            'input_ids': self.features[idx],
            'indices': self.indix[idx]
        }

test_data = TestDataset(test_features_tensor, test_idx_tensor)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [154]:
from collections import defaultdict

# model.load_state_dict(torch.load(best_model_path))
model.eval()

results = defaultdict(list)

with torch.no_grad():
  for batch in test_loader:
    inputs = batch['input_ids'].to(device)
    indices = batch['indices']

    outputs = model(inputs)
    probabilities = torch.softmax(outputs, dim=1)  # Get probabilities per class

    # Store probabilities by index
    for i, idx in enumerate(indices):
        results[idx.item()].append(probabilities[i].cpu().numpy())

# Average the probabilities for each unique index and determine final class label
final_labels = {}
for idx, probs in results.items():
  avg_prob = np.mean(probs, axis=0)  # Average across all samples with the same index
  final_label = np.argmax(avg_prob)  # Get the class with the highest average probability
  final_labels[idx] = final_label

In [155]:
final_labels_array = np.array(list(final_labels.values()))
print(final_labels_array.shape)

(3411,)


In [156]:
import pandas as pd
indices = np.arange(len(final_labels_array))
final_labels_df = pd.DataFrame({
    'id': indices,
    'y': final_labels_array
})
final_labels_df.to_csv('/content/drive/MyDrive/pred.csv', index=False)