In [3]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from sklearn.model_selection import train_test_split
import optuna
from tqdm import tqdm

# Set random seed for reproducibility
torch.manual_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load data
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

# Create text input by combining relevant features
def create_text_input(df):
    return df.apply(lambda x: f"HomePlanet: {x['HomePlanet']} CryoSleep: {x['CryoSleep']} \
        Destination: {x['Destination']} VIP: {x['VIP']} Age: {x['Age']} \
        RoomService: {x['RoomService']} FoodCourt: {x['FoodCourt']} \
        ShoppingMall: {x['ShoppingMall']} Spa: {x['Spa']} VRDeck: {x['VRDeck']}", axis=1)

# Custom Dataset class
class SpaceshipDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

# Training function
def train_model(model, train_loader, val_loader, optimizer, device, num_epochs):
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        
        for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            optimizer.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_train_loss += loss.item()
            
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        total_val_loss = 0
        
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)
                
                outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
                total_val_loss += outputs.loss.item()
        
        avg_val_loss = total_val_loss / len(val_loader)
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            
    return best_val_loss

# Objective function for Optuna
def objective(trial):
    # Define hyperparameters to optimize
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True),
        'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64]),
        'num_epochs': trial.suggest_int('num_epochs', 2, 5),
        'max_length': trial.suggest_categorical('max_length', [128, 256, 512])
    }
    
    # Create datasets
    train_texts = create_text_input(X_train)
    val_texts = create_text_input(X_val)
    
    # Initialize tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
    model = AutoModelForSequenceClassification.from_pretrained(
        'bert-base-uncased', 
        num_labels=2
    ).to(device)
    
    # Create datasets and dataloaders
    train_dataset = SpaceshipDataset(
        train_texts, 
        y_train, 
        tokenizer, 
        params['max_length']
    )
    val_dataset = SpaceshipDataset(
        val_texts, 
        y_val, 
        tokenizer, 
        params['max_length']
    )
    
    train_loader = DataLoader(
        train_dataset, 
        batch_size=params['batch_size'], 
        shuffle=True
    )
    val_loader = DataLoader(
        val_dataset, 
        batch_size=params['batch_size']
    )
    
    # Initialize optimizer
    optimizer = AdamW(model.parameters(), lr=params['learning_rate'])
    
    # Train and evaluate
    try:
        best_val_loss = train_model(
            model, 
            train_loader, 
            val_loader, 
            optimizer, 
            device, 
            params['num_epochs']
        )
        return best_val_loss
    except Exception as e:
        print(f"Trial failed: {e}")
        return float('inf')

# Prepare data
X = train_data.drop('Transported', axis=1)
y = train_data['Transported'].astype(int)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)  # Adjust number of trials as needed

# Print best parameters
print("Best parameters:", study.best_params)
print("Best validation loss:", study.best_value)

# Train final model with best parameters
best_params = study.best_params

# Initialize final model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
final_model = AutoModelForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=2
).to(device)

# Create final datasets
train_texts = create_text_input(X)
test_texts = create_text_input(test_data)

train_dataset = SpaceshipDataset(
    train_texts,
    y,
    tokenizer,
    best_params['max_length']
)
test_dataset = SpaceshipDataset(
    test_texts,
    [0] * len(test_data),  # Dummy labels for test set
    tokenizer,
    best_params['max_length']
)

# Create final dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=best_params['batch_size'],
    shuffle=True
)
test_loader = DataLoader(
    test_dataset,
    batch_size=best_params['batch_size']
)

# Train final model
optimizer = AdamW(final_model.parameters(), lr=best_params['learning_rate'])
train_model(final_model, train_loader, test_loader, optimizer, device, best_params['num_epochs'])

# Make predictions
final_model.eval()
predictions = []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        
        outputs = final_model(input_ids=input_ids, attention_mask=attention_mask)
        predictions.extend(torch.argmax(outputs.logits, dim=1).cpu().numpy())

# Create submission file
submission = pd.DataFrame({
    'PassengerId': test_data['PassengerId'],
    'Transported': np.array(predictions).astype(bool)
})

# Save submission
submission.to_csv('sample_submission.csv', index=False)

  from .autonotebook import tqdm as notebook_tqdm
[I 2024-11-11 16:07:28,702] A new study created in memory with name: no-name-c5b8a55d-ad4f-41c3-a76f-f2160dfa6087
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3:   0%|          | 0/109 [00:00<?, ?it/s]
[I 2024-11-11 16:08:26,412] Trial 0 finished with value: inf and parameters: {'learning_rate': 1.6093487814486677e-05, 'batch_size': 64, 'num_epochs': 3, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 6911


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3:   0%|          | 0/435 [00:00<?, ?it/s]
[I 2024-11-11 16:08:27,211] Trial 1 finished with value: inf and parameters: {'learning_rate': 1.7807462539383518e-05, 'batch_size': 16, 'num_epochs': 3, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 4714


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/2:   0%|          | 0/218 [00:00<?, ?it/s]
[I 2024-11-11 16:08:27,789] Trial 2 finished with value: inf and parameters: {'learning_rate': 0.0006611694926019982, 'batch_size': 32, 'num_epochs': 2, 'max_length': 256}. Best is trial 0 with value: inf.


Trial failed: 1992


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3:   0%|          | 0/218 [00:00<?, ?it/s]
[I 2024-11-11 16:08:28,282] Trial 3 finished with value: inf and parameters: {'learning_rate': 9.019962351075856e-05, 'batch_size': 32, 'num_epochs': 3, 'max_length': 128}. Best is trial 0 with value: inf.


Trial failed: 3820


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5:   0%|          | 0/109 [00:00<?, ?it/s]
[I 2024-11-11 16:08:28,877] Trial 4 finished with value: inf and parameters: {'learning_rate': 1.1498238533492045e-05, 'batch_size': 64, 'num_epochs': 5, 'max_length': 128}. Best is trial 0 with value: inf.


Trial failed: 2164


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/2:   0%|          | 0/435 [00:00<?, ?it/s]
[I 2024-11-11 16:08:29,411] Trial 5 finished with value: inf and parameters: {'learning_rate': 2.1475292359823386e-05, 'batch_size': 16, 'num_epochs': 2, 'max_length': 256}. Best is trial 0 with value: inf.


Trial failed: 1183


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/4:   0%|          | 0/218 [00:00<?, ?it/s]
[I 2024-11-11 16:08:29,920] Trial 6 finished with value: inf and parameters: {'learning_rate': 1.3341103624029123e-05, 'batch_size': 32, 'num_epochs': 4, 'max_length': 256}. Best is trial 0 with value: inf.


Trial failed: 2273


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/4:   0%|          | 0/435 [00:00<?, ?it/s]
[I 2024-11-11 16:08:30,391] Trial 7 finished with value: inf and parameters: {'learning_rate': 0.0004012448082479393, 'batch_size': 16, 'num_epochs': 4, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 3121


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/4:   0%|          | 0/435 [00:00<?, ?it/s]
[I 2024-11-11 16:08:30,875] Trial 8 finished with value: inf and parameters: {'learning_rate': 0.00014786895183268036, 'batch_size': 16, 'num_epochs': 4, 'max_length': 256}. Best is trial 0 with value: inf.


Trial failed: 5662


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3:   0%|          | 0/109 [00:00<?, ?it/s]
[I 2024-11-11 16:08:31,369] Trial 9 finished with value: inf and parameters: {'learning_rate': 0.00035285347919807654, 'batch_size': 64, 'num_epochs': 3, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 5670


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5:   0%|          | 0/109 [00:00<?, ?it/s]
[I 2024-11-11 16:08:31,915] Trial 10 finished with value: inf and parameters: {'learning_rate': 4.2991168353868866e-05, 'batch_size': 64, 'num_epochs': 5, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 970


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3:   0%|          | 0/435 [00:00<?, ?it/s]
[I 2024-11-11 16:08:32,354] Trial 11 finished with value: inf and parameters: {'learning_rate': 3.427327410567354e-05, 'batch_size': 16, 'num_epochs': 3, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 1957


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3:   0%|          | 0/109 [00:00<?, ?it/s]
[I 2024-11-11 16:08:32,824] Trial 12 finished with value: inf and parameters: {'learning_rate': 6.117736231259217e-05, 'batch_size': 64, 'num_epochs': 3, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 4812


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/2:   0%|          | 0/435 [00:00<?, ?it/s]
[I 2024-11-11 16:08:33,317] Trial 13 finished with value: inf and parameters: {'learning_rate': 2.5427811565479325e-05, 'batch_size': 16, 'num_epochs': 2, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3:   0%|          | 0/109 [00:00<?, ?it/s]
[I 2024-11-11 16:08:33,780] Trial 14 finished with value: inf and parameters: {'learning_rate': 1.0392343488054326e-05, 'batch_size': 64, 'num_epochs': 3, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 87


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/4:   0%|          | 0/109 [00:00<?, ?it/s]
[I 2024-11-11 16:08:34,264] Trial 15 finished with value: inf and parameters: {'learning_rate': 2.138113911095935e-05, 'batch_size': 64, 'num_epochs': 4, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 1233


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/2:   0%|          | 0/435 [00:00<?, ?it/s]
[I 2024-11-11 16:08:34,755] Trial 16 finished with value: inf and parameters: {'learning_rate': 0.00015158465155308494, 'batch_size': 16, 'num_epochs': 2, 'max_length': 128}. Best is trial 0 with value: inf.


Trial failed: 3296


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3:   0%|          | 0/435 [00:00<?, ?it/s]
[I 2024-11-11 16:08:35,297] Trial 17 finished with value: inf and parameters: {'learning_rate': 5.646914255787812e-05, 'batch_size': 16, 'num_epochs': 3, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 472


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/4:   0%|          | 0/109 [00:00<?, ?it/s]
[I 2024-11-11 16:08:35,882] Trial 18 finished with value: inf and parameters: {'learning_rate': 1.7790914161323355e-05, 'batch_size': 64, 'num_epochs': 4, 'max_length': 512}. Best is trial 0 with value: inf.


Trial failed: 4446


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/2:   0%|          | 0/218 [00:00<?, ?it/s]
[I 2024-11-11 16:08:36,345] Trial 19 finished with value: inf and parameters: {'learning_rate': 3.2217837349490723e-05, 'batch_size': 32, 'num_epochs': 2, 'max_length': 128}. Best is trial 0 with value: inf.


Trial failed: 5973
Best parameters: {'learning_rate': 1.6093487814486677e-05, 'batch_size': 64, 'num_epochs': 3, 'max_length': 512}
Best validation loss: inf


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3:   0%|          | 0/136 [00:00<?, ?it/s]

: 