In [1]:
# !pip install trackio


In [2]:
# Setup 

import os
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import pytorch_lightning as pl
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

# import trackio
import kagglehub

pl.seed_everything(42, workers=True)

Seed set to 42


42

In [3]:
# Data Loading (Assumes standard Kaggle competition file structure)
import pandas as pd
# Define the path to the competition data
DATA_PATH = '/kaggle/input/sep-25-dl-gen-ai-nppe-2/'

# Load the training and test dataframes
try:
    # Assuming files are named 'train.csv' and 'test.csv' in the competition data folder
    train_df = pd.read_csv(os.path.join(DATA_PATH, 'train.csv'))
    test_df = pd.read_csv(os.path.join(DATA_PATH, 'test.csv'))
    print(f"Train samples: {len(train_df)}")
    print(f"Test samples: {len(test_df)}")
except FileNotFoundError:
    print(f"Error: Data files not found in {DATA_PATH}. Please check the path.")


Train samples: 7262
Test samples: 1816


In [4]:
# Configuration

class Config:
    # Tokens
    AMINO_ACIDS = 'ACDEFGHIKLMNPQRSTVWY*X'
    PAD_IDX = 0
    UNK_IDX = len(AMINO_ACIDS) + 1

    # Labels
    Q8_LABELS = ['H','G','I','E','B','T','S','C']
    Q3_LABELS = ['H','E','C']

    # Mappings
    AA_TO_IDX = {aa: i + 1 for i, aa in enumerate(AMINO_ACIDS)}
    AA_TO_IDX['UNK'] = UNK_IDX
    IDX_TO_Q8 = {i: l for i, l in enumerate(Q8_LABELS)}
    IDX_TO_Q3 = {i: l for i, l in enumerate(Q3_LABELS)}
    Q8_TO_IDX = {l: i for i, l in enumerate(Q8_LABELS)}
    Q3_TO_IDX = {l: i for i, l in enumerate(Q3_LABELS)}

    # Model
    VOCAB_SIZE = UNK_IDX + 1
    EMBED_DIM = 64
    HIDDEN_DIM = 128
    NUM_LAYERS = 3

    # Training
    BATCH_SIZE = 64
    LR = 1e-3
    EPOCHS = 10

In [5]:
# Data Loader and Lightning Data Module 

class ProteinDataset(Dataset):
    def __init__(self, df, is_test=False, augment=False):
        self.seqs = df['seq'].tolist()
        self.is_test = is_test
        self.augment = augment

        if not is_test:
            self.sst8 = df['sst8'].tolist()
            self.sst3 = df['sst3'].tolist()

    def encode(self, seq, mapping):
        return [mapping.get(x, Config.UNK_IDX) for x in seq]

    def __len__(self):
        return len(self.seqs)

    def __getitem__(self, idx):
        seq = self.seqs[idx]

        # ðŸ”§ Optional augmentation placeholder
        if self.augment and np.random.rand() < 0.05:
            seq = seq[::-1]

        x = torch.tensor(self.encode(seq, Config.AA_TO_IDX), dtype=torch.long)
        item = {'input': x}

        if not self.is_test:
            s8, s3 = self.sst8[idx], self.sst3[idx]
            assert len(seq) == len(s8) == len(s3)

            y8 = torch.tensor(self.encode(s8, Config.Q8_TO_IDX))
            y3 = torch.tensor(self.encode(s3, Config.Q3_TO_IDX))
            item.update({'target_q8': y8, 'target_q3': y3})

        return item

def collate_fn(batch):
    x = [b['input'] for b in batch]
    lengths = torch.tensor([len(i) for i in x])

    x = nn.utils.rnn.pad_sequence(x, batch_first=True, padding_value=0)
    out = {'input': x, 'lengths': lengths}

    if 'target_q8' in batch[0]:
        out['target_q8'] = nn.utils.rnn.pad_sequence(
            [b['target_q8'] for b in batch], batch_first=True, padding_value=-100)
        out['target_q3'] = nn.utils.rnn.pad_sequence(
            [b['target_q3'] for b in batch], batch_first=True, padding_value=-100)

    return out


class ProteinDataModule(pl.LightningDataModule):
    def __init__(self, train_df, test_df):
        super().__init__()
        self.train_df = train_df
        self.test_df = test_df

    def setup(self, stage=None):
        tr, val = train_test_split(self.train_df, test_size=0.1, random_state=42)
        self.train_ds = ProteinDataset(tr, augment=True)
        self.val_ds = ProteinDataset(val)
        self.test_ds = ProteinDataset(self.test_df, is_test=True)

    def train_dataloader(self):
        return DataLoader(self.train_ds, batch_size=Config.BATCH_SIZE,
                          shuffle=True, collate_fn=collate_fn, num_workers=4, persistent_workers=True)

    def val_dataloader(self):
        return DataLoader(self.val_ds, batch_size=Config.BATCH_SIZE,
                          collate_fn=collate_fn, num_workers=4, persistent_workers=True)

    def test_dataloader(self):
        return DataLoader(self.test_ds, batch_size=Config.BATCH_SIZE,
                          collate_fn=collate_fn, num_workers=4, persistent_workers=True)

In [6]:
# Model Architecture Base 

class BaseSSModel(pl.LightningModule):
    def _loss_f1(self, logits, targets):
        logits = logits.reshape(-1, logits.size(-1))
        targets = targets.reshape(-1)
        loss = self.loss_fn(logits, targets)

        mask = targets != -100
        if mask.sum() == 0:
            return loss, 0.0

        preds = torch.argmax(logits, 1)[mask].cpu().numpy()
        gold = targets[mask].cpu().numpy()
        return loss, f1_score(gold, preds, average='macro')

In [7]:
# RNN

class BiRNN(BaseSSModel):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(Config.VOCAB_SIZE, Config.EMBED_DIM, padding_idx=0)
        self.encoder = nn.RNN(Config.EMBED_DIM, Config.HIDDEN_DIM,
                              Config.NUM_LAYERS, batch_first=True, bidirectional=True)

        self.q8 = nn.Linear(2*Config.HIDDEN_DIM, len(Config.Q8_LABELS))
        self.q3 = nn.Linear(2*Config.HIDDEN_DIM, len(Config.Q3_LABELS))

        self.loss_fn = nn.CrossEntropyLoss(ignore_index=-100)

    def forward(self, x, lengths):
        emb = self.embedding(x)
        packed = nn.utils.rnn.pack_padded_sequence(
            emb, lengths.cpu(), batch_first=True, enforce_sorted=False)
        out, _ = self.encoder(packed)
        out, _ = nn.utils.rnn.pad_packed_sequence(out, batch_first=True)
        return self.q8(out), self.q3(out)

In [8]:
# GRU
class BiGRU(BaseSSModel):
    def __init__(self):
        super().__init__()
        # 1. Embedding Layer
        self.embedding = nn.Embedding(Config.VOCAB_SIZE, Config.EMBED_DIM, padding_idx=0)
        
        # 2. GRU Encoder (Replaces RNN)
        self.encoder = nn.GRU(
            Config.EMBED_DIM, 
            Config.HIDDEN_DIM,
            Config.NUM_LAYERS, 
            batch_first=True, 
            bidirectional=True
        )

        # 3. Output Heads (Same as before)
        self.q8 = nn.Linear(2*Config.HIDDEN_DIM, len(Config.Q8_LABELS))
        self.q3 = nn.Linear(2*Config.HIDDEN_DIM, len(Config.Q3_LABELS))

        self.loss_fn = nn.CrossEntropyLoss(ignore_index=-100)

    def forward(self, x, lengths):
        # 1. Embed
        emb = self.embedding(x)
        
        # 2. Pack
        packed = nn.utils.rnn.pack_padded_sequence(
            emb, lengths.cpu(), batch_first=True, enforce_sorted=False
        )
        
        # 3. Encode (GRU)
        out, _ = self.encoder(packed)
        
        # 4. Unpack
        out, _ = nn.utils.rnn.pad_packed_sequence(out, batch_first=True)
        
        # 5. Project to labels
        return self.q8(out), self.q3(out)

In [9]:
# Loss, Optimiser and Metrics 

class LightningSS(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        # Store predictions and targets for the whole epoch
        self.validation_step_outputs = []

    def training_step(self, batch, batch_idx):
        # Forward pass
        q8_logits, q3_logits = self.model(batch['input'], batch['lengths'])
        
        # Calculate loss for both heads
        l8, _ = self.model._loss_f1(q8_logits, batch['target_q8'])
        l3, _ = self.model._loss_f1(q3_logits, batch['target_q3'])
        
        loss = l8 + l3
        
        # Log batch metrics
        self.log('train_loss', loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        q8_logits, q3_logits = self.model(batch['input'], batch['lengths'])
        
        # Just collect data here; don't calculate full F1 yet
        self.validation_step_outputs.append({
            'q8_logits': q8_logits.detach().cpu(),
            'q3_logits': q3_logits.detach().cpu(),
            'target_q8': batch['target_q8'].detach().cpu(),
            'target_q3': batch['target_q3'].detach().cpu()
        })

    def on_validation_epoch_end(self):
        # Initialize lists to hold the FLATTENED predictions/targets
        q8_preds_list = []
        q3_preds_list = []
        t8_target_list = []
        t3_target_list = []

        # Iterate over each batch output stored
        for output in self.validation_step_outputs:
            # 1. Create mask for this specific batch (removes padding)
            # output['target_q8'] shape: [Batch, SeqLen_of_this_batch]
            mask = output['target_q8'] != -100

            # 2. Apply mask to flatten this batch's data
            # taking argmax here handles the shape issue immediately
            
            # Q8 Data
            # Logits shape: [Batch, Seq, 8] -> Masked: [Valid_Tokens, 8] -> Argmax: [Valid_Tokens]
            batch_q8_preds = torch.argmax(output['q8_logits'], dim=-1)[mask]
            batch_t8_target = output['target_q8'][mask]
            
            # Q3 Data
            batch_q3_preds = torch.argmax(output['q3_logits'], dim=-1)[mask]
            batch_t3_target = output['target_q3'][mask]
            
            # Append 1D tensors to list
            q8_preds_list.append(batch_q8_preds)
            t8_target_list.append(batch_t8_target)
            q3_preds_list.append(batch_q3_preds)
            t3_target_list.append(batch_t3_target)

        # 3. Concatenate the 1D tensors (Safe because they are all 1D now)
        q8_preds_all = torch.cat(q8_preds_list)
        t8_target_all = torch.cat(t8_target_list)
        q3_preds_all = torch.cat(q3_preds_list)
        t3_target_all = torch.cat(t3_target_list)

        # 4. Calculate Global F1s
        # Move to CPU numpy for sklearn
        f1_8 = f1_score(t8_target_all.cpu().numpy(), q8_preds_all.cpu().numpy(), average='macro')
        f1_3 = f1_score(t3_target_all.cpu().numpy(), q3_preds_all.cpu().numpy(), average='macro')

        # 5. Harmonic Mean
        if (f1_8 + f1_3) > 0:
            hm_score = (2 * f1_8 * f1_3) / (f1_8 + f1_3)
        else:
            hm_score = 0.0

        # 6. Log & Clear
        print(f"\nEpoch Result -> Score: {hm_score:.4f} | Q8: {f1_8:.4f} | Q3: {f1_3:.4f}")
        self.log_dict({'val_score': hm_score, 'val_q8': f1_8, 'val_q3': f1_3}, prog_bar=True)
        
        self.validation_step_outputs.clear()

    def configure_optimizers(self):
        return optim.Adam(self.model.parameters(), lr=Config.LR)

In [10]:
# from kaggle_secrets import UserSecretsClient
# from huggingface_hub import login
# import trackio

# import io
# user_secrets = UserSecretsClient()
# hf_token = user_secrets.get_secret("hf_token")
# os.environ['HF_TOKEN'] = hf_token 
# login(token=hf_token)
# TRACK_HFID = 'jananiramaseshan/25-t3-nppe2'
# TRACK_PROJECT = 'dlgenai-nppe'

# try:
#     # Using the Config dictionary to log hyperparameters
#     track_logger = trackio.init(
#         project=TRACK_PROJECT,
#         space_id=TRACK_HFID,
#     )
#     config = {}
#     for k, v in Config.__dict__.items():
#         if not k.startswith('__'):
#             config[k] = v

#     print(f"Trackio initialized for {TRACK_HFID}/{TRACK_PROJECT}")
# except Exception as e:
#     print(f"Warning: Trackio initialization failed. Continuing without logging. Error: {e}")
#     track_logger = None 

In [11]:
# # Training Code 


# data = ProteinDataModule(train_df, test_df)
# model = LightningSS(BiGRU())

# trainer = pl.Trainer(max_epochs=Config.EPOCHS)
# trainer.fit(model, data)

# torch.save(model.state_dict(), "/kaggle/working/model.pth")
# # 
# if track_logger:
#     trackio.finish()

In [12]:
# Modified Training Code for Model Selection

# 1. Define the models we want to compare
# We map a Name -> Class definition
model_candidates = {
    "BiRNN": BiRNN,
    "BiGRU": BiGRU
}

best_score = -1.0
best_model_name = ""
best_model_path = ""

# 2. Iterate and Train each model
for name, model_cls in model_candidates.items():
    print(f"\n{'='*20} Training {name} {'='*20}")
    
    # Initialize the specific model architecture
    core_model = model_cls()
    
    # Wrap it in your Lightning Module
    # IMPORTANT: Re-initialize LightningSS every loop to reset optimizers/schedulers
    model_wrapper = LightningSS(core_model)
    
    # Initialize Data Module (Resetting ensures clean split if logic changes, though okay here)
    data_module = ProteinDataModule(train_df, test_df)
    
    # Initialize Trainer
    # We disable the progress bar refresh slightly to keep logs clean for multiple runs
    trainer = pl.Trainer(
        max_epochs=Config.EPOCHS, 
        enable_checkpointing=False, # We will save manually to keep it simple
        logger=False # Optional: disable logger if you don't want separate log folders
    )
    
    # Fit the model
    trainer.fit(model_wrapper, data_module)
    
    # 3. Retrieve the Validation Score
    # We access the metric you logged as 'val_score' in on_validation_epoch_end
    val_score = trainer.callback_metrics.get('val_score').item()
    print(f"--> {name} Finished. Validation Score: {val_score:.4f}")
    
    # Save this specific model
    save_path = f"/kaggle/working/model_{name}.pth"
    torch.save(model_wrapper.state_dict(), save_path)
    
    # 4. Compare and Update Best
    if val_score > best_score:
        best_score = val_score
        best_model_name = name
        best_model_path = save_path

# 5. Final Summary
print(f"\n{'='*20} RESULT {'='*20}")
print(f"Best Model: {best_model_name}")
print(f"Best Score: {best_score:.4f}")
print(f"Saved at:   {best_model_path}")

# if track_logger:
#     trackio.finish()




GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type  | Params | Mode 
----------------------------------------
0 | model | BiRNN | 251 K  | train
----------------------------------------
251 K     Trainable params
0         Non-trainable params
251 K     Total params
1.007     Total estimated model params size (MB)
6         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.1337 | Q8: 0.0836 | Q3: 0.3340


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.3462 | Q8: 0.2445 | Q3: 0.5927


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.3921 | Q8: 0.2782 | Q3: 0.6642


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4061 | Q8: 0.2913 | Q3: 0.6701


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4014 | Q8: 0.2889 | Q3: 0.6577


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.3992 | Q8: 0.2846 | Q3: 0.6681


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4143 | Q8: 0.2976 | Q3: 0.6820


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4162 | Q8: 0.2985 | Q3: 0.6870


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4069 | Q8: 0.2952 | Q3: 0.6546


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4151 | Q8: 0.2979 | Q3: 0.6844


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type  | Params | Mode 
----------------------------------------
0 | model | BiGRU | 746 K  | train
----------------------------------------
746 K     Trainable params
0         Non-trainable params
746 K     Total params
2.985     Total estimated model params size (MB)
6         Modules in train mode
0         Modules in eval mode



Epoch Result -> Score: 0.4228 | Q8: 0.3060 | Q3: 0.6838
--> BiRNN Finished. Validation Score: 0.4228



Sanity Checking: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.0661 | Q8: 0.0375 | Q3: 0.2818


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.3606 | Q8: 0.2546 | Q3: 0.6176


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.3805 | Q8: 0.2691 | Q3: 0.6493


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4006 | Q8: 0.2845 | Q3: 0.6766


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4139 | Q8: 0.2950 | Q3: 0.6931


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4179 | Q8: 0.2979 | Q3: 0.6996


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4180 | Q8: 0.2992 | Q3: 0.6930


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4258 | Q8: 0.3059 | Q3: 0.7005


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4395 | Q8: 0.3192 | Q3: 0.7052


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch Result -> Score: 0.4341 | Q8: 0.3137 | Q3: 0.7041


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.



Epoch Result -> Score: 0.4320 | Q8: 0.3139 | Q3: 0.6926
--> BiGRU Finished. Validation Score: 0.4320

Best Model: BiGRU
Best Score: 0.4320
Saved at:   /kaggle/working/model_BiGRU.pth


In [13]:
# Upload to KaggleHUB
'''
kagglehub.upload(
    path="model.pth",
    repo_id="username/ss-model"
)
'''
# trackio.show(project='pssp')

'\nkagglehub.upload(\n    path="model.pth",\n    repo_id="username/ss-model"\n)\n'

In [14]:
# # Load trained model
# model = LightningSS(BiGRU())
# model.load_state_dict(torch.load("model.pth", map_location="cpu"))
# model.eval()

# data = ProteinDataModule(train_df, test_df)
# data.setup()

# q8_predictions = []
# q3_predictions = []

# with torch.no_grad():
#     for batch in data.test_dataloader():
#         q8_logits, q3_logits = model.model(batch['input'], batch['lengths'])

#         q8_preds = torch.argmax(q8_logits, dim=-1)
#         q3_preds = torch.argmax(q3_logits, dim=-1)

#         for i in range(q8_preds.size(0)):
#             seq_len = batch['lengths'][i]

#             q8_seq = q8_preds[i][:seq_len]
#             q3_seq = q3_preds[i][:seq_len]

#             q8_labels = ''.join(
#                 Config.IDX_TO_Q8[j.item()] for j in q8_seq
#             )
#             q3_labels = ''.join(
#                 Config.IDX_TO_Q3[j.item()] for j in q3_seq
#             )

#             q8_predictions.append(q8_labels)
#             q3_predictions.append(q3_labels)

In [15]:
# Load the Best Trained Model dynamically

print(f"Loading best model: {best_model_name} from {best_model_path}...")

# 1. Re-initialize the correct architecture based on the winner
if best_model_name == "BiRNN":
    core_model = BiRNN()
elif best_model_name == "BiGRU":
    core_model = BiGRU()
else:
    raise ValueError("Unknown model name")

# 2. Wrap and Load Weights
model = LightningSS(core_model)
model.load_state_dict(torch.load(best_model_path, map_location="cpu"))
model.eval()

# 3. Setup Data
data = ProteinDataModule(train_df, test_df)
data.setup()

q8_predictions = []
q3_predictions = []

# 4. Run Inference (Same as before)
with torch.no_grad():
    for batch in data.test_dataloader():
        # Ensure we call the internal model correctly
        q8_logits, q3_logits = model.model(batch['input'], batch['lengths'])

        q8_preds = torch.argmax(q8_logits, dim=-1)
        q3_preds = torch.argmax(q3_logits, dim=-1)

        for i in range(q8_preds.size(0)):
            seq_len = batch['lengths'][i]

            q8_seq = q8_preds[i][:seq_len]
            q3_seq = q3_preds[i][:seq_len]

            q8_labels = ''.join(
                Config.IDX_TO_Q8[j.item()] for j in q8_seq
            )
            q3_labels = ''.join(
                Config.IDX_TO_Q3[j.item()] for j in q3_seq
            )

            q8_predictions.append(q8_labels)
            q3_predictions.append(q3_labels)

print("Inference complete using best model.")

Loading best model: BiGRU from /kaggle/working/model_BiGRU.pth...
Inference complete using best model.


In [16]:
def calculate_competition_metric(model_logits, targets, q8_to_idx, q3_to_idx):
    """
    Computes the Harmonic Mean of F1_Q8 and F1_Q3.
    
    Args:
        model_logits: [batch, seq_len, 9] (Raw output from model)
        targets: [batch, seq_len] (Ground truth Q8 indices)
    """
    # 1. Get Q8 Predictions
    # shape: [batch, seq_len]
    q8_preds = torch.argmax(model_logits, dim=-1)
    
    # 2. Masking (Flatten and remove padding)
    # We assume '0' is the padding index
    mask = targets != 0
    
    valid_q8_preds = q8_preds[mask].cpu().numpy()
    valid_q8_targets = targets[mask].cpu().numpy()
    
    # 3. Calculate F1 for Q8
    # 'macro' treats all classes equally (Helix is as important as Coil)
    f1_q8 = f1_score(valid_q8_targets, valid_q8_preds, average='macro')
    
    # 4. Derived Q3 Predictions & Targets
    # We map the Q8 predictions/targets to Q3 to score the second task
    # Note: We must map the *tensors* before converting to numpy
    q3_preds_tensor = map_preds_q8_to_q3(q8_preds[mask], q8_to_idx, q3_to_idx)
    q3_targets_tensor = map_preds_q8_to_q3(targets[mask], q8_to_idx, q3_to_idx)
    
    valid_q3_preds = q3_preds_tensor.numpy()
    valid_q3_targets = q3_targets_tensor.numpy()
    
    # 5. Calculate F1 for Q3
    f1_q3 = f1_score(valid_q3_targets, valid_q3_preds, average='macro')
    
    # 6. Harmonic Mean
    if (f1_q8 + f1_q3) == 0:
        final_score = 0.0
    else:
        final_score = (2 * f1_q8 * f1_q3) / (f1_q8 + f1_q3)
        
    return final_score, f1_q8, f1_q3

In [17]:
# Create submission
submission = pd.DataFrame({
    'id': test_df['id'].values,
    'sst8': q8_predictions,
    'sst3': q3_predictions
})

# Save ONLY to writable directory
submission_path = "/kaggle/working/submission.csv"
submission.to_csv(submission_path, index=False)

print("submission.csv created at", submission_path)
print(submission.head())

submission.csv created at /kaggle/working/submission.csv
   id                                               sst8  \
0   0  CCCCCHHHHHHHHHHHHHHHHHHCSEEEEEEETCTTCCEEEEEECT...   
1   1  CCCCCCCCCCEEEEEEECSTTEEEEEEECTTHHHHHHHHHCCTTHH...   
2   2  CCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCCHHHHHHHHH...   
3   3  CCCTTHHHHHHHHHHHHHHHHHHHHHHHCCCEEETTHHHHHHHHHH...   
4   4  CCEEEEEECCCTHHHHHHHHHHHHHHHCCEEECCTTTCEEEECSEE...   

                                                sst3  
0  CCCCCCHHHHHHHHHHHHHHHHHCCEEEEEECCCCCCCCEEEEECC...  
1  CCCCCCCCCCEEEEEEECCCCCEEEEEECCCHHHHHHHHHCCCCHH...  
2  CCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCHCCCHHHHHHHH...  
3  CCCCCCHHHHHHHHHHHHHHHHHHHHHHCCCEEECCHHHHHHHHHH...  
4  CCEEEEEECCCCHHHHHHHHHHHHHHHCCCCECCCCCCEEEECCCE...  
