### Check whether gpu available or not

#### For Nvidia system

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)

if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
import torch

# Check if MPS (Metal Performance Shaders) is available for Apple Silicon
if torch.cuda.is_available():
    print('GPU is available for acceleration.')
    device = torch.device("cuda")  # Use MPS backend
else:
    print('GPU is not available. Using CPU.')
    device = torch.device("cpu")

print('Selected device:', device)


#### For Apple Silicon system

In [None]:
import torch

# Check if MPS (Metal Performance Shaders) is available for Apple Silicon
if torch.backends.mps.is_available():
    print('Metal is available for acceleration.')
    device = torch.device("mps")  # Use MPS backend
else:
    print('Metal is not available. Using CPU.')
    device = torch.device("cpu")

print('Selected device:', device)


### Import necessary libraries

In [None]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, AutoTokenizer, AdamW
from sklearn.metrics import classification_report
import numpy as np

from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import time

import wandb

### Configure "Wndb" login

In [None]:
wandb.login()

wandb.init(
    # set the wandb project where this run will be logged
    project="__project____",
    name="__name___",
    # track hyperparameters and run metadata
    config={
    "learning_rate": 2e-5,
    "architecture": "DistilBERT",
    "dataset": "full",
    "epochs": 3,
    }
)

### Read the dataset files

In [None]:
train_df = pd.read_csv('Set your dataset path')
validate_df = pd.read_csv('Set your dataset path')
test_df = pd.read_csv('Set your dataset path')

In [None]:
class ReviewsDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.text = data.text
        self.labels = data.label
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, idx):
        text = str(self.text[idx])
        labels = self.labels[idx]

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt',
            truncation=True
        )

        return {
            'review_text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(labels, dtype=torch.long)
        }

### Generate tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

### Dataset and DataLoader

In [None]:
# Depending on the capacity of your GPU, set...
max_len = 256  # ... a suitable maximum length
batch_size = 16  # ... a suitable batch size

train_dataset = ReviewsDataset(train_df, tokenizer, max_len)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

validate_dataset = ReviewsDataset(validate_df, tokenizer, max_len)
validate_loader = DataLoader(validate_dataset, batch_size=batch_size)

### Load the model

In [None]:
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')

### Set the optimizer

In [None]:
optimizer = AdamW(model.parameters(), lr=2e-5) #Set the desired learning rate (lr)

### Training Loop

In [None]:
num_epochs = 3  # Number of epochs
total_start_time = time.time()
train_correct = 0
train_total = 0

for epoch in range(num_epochs):
    epoch_start_time = time.time()

    model.train()
    total_loss, batch_count = 0, 0

    for batch in train_loader:
        batch_start_time = time.time()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        total_loss += loss.item()
        batch_count += 1

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_end_time = time.time()
        batch_duration = batch_end_time - batch_start_time
        total_elapsed_time = batch_end_time - total_start_time
        estimated_total_time = total_elapsed_time / (epoch * len(train_loader) + batch_count) * num_epochs * len(train_loader)
        remaining_time = estimated_total_time - total_elapsed_time

        # Calculate predictions from the model's logits
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=1)

        # Update the count of correctly predicted labels
        train_correct += (predictions == labels).sum().item()
        train_total += labels.size(0)

        # Print loss and remaining time every 10 batches
        if batch_count % 100 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Batch {batch_count}, Loss: {loss.item():.4f}, "
                  f"Time remaining: {remaining_time // 60:.0f}m {remaining_time % 60:.0f}s")

    # Calculate training accuracy
    train_acc = train_correct / train_total

    # Reset counters for the next epoch
    train_correct = 0
    train_total = 0
    
    avg_loss = total_loss / batch_count if batch_count else 0
    print(f"End of Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
    
    # Log training metrics to wandb
    wandb.log({"train_accuracy": train_acc, "train_loss": avg_loss})

total_end_time = time.time()
print(f"Total training time: {(total_end_time - total_start_time)//60:.0f}m {(total_end_time - total_start_time)%60:.0f}s")


### Validation Loop

In [None]:
val_total_loss = 0
val_correct = 0
val_total = 0

model.eval()  # Set the model to evaluation mode
predictions, true_labels = [], []
total_val_time = 0

progress_bar = tqdm(validate_loader, desc="Validating", leave=False)
for batch in progress_bar:
    val_start_time = time.time()

    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['labels'].to(device)

    with torch.no_grad():
        # Pass labels to the model to calculate loss
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        logits = outputs.logits

        val_total_loss += loss.item()

        # Convert logits to predictions
        batch_predictions = torch.argmax(logits, dim=1)
        predictions.extend(batch_predictions.tolist())
        true_labels.extend(labels.tolist())

        # Update correct and total counts for accuracy
        val_correct += (batch_predictions == labels).sum().item()
        val_total += labels.size(0)

    val_end_time = time.time()
    val_batch_time = val_end_time - val_start_time
    total_val_time += val_batch_time

    # Updating progress bar with elapsed and remaining time
    avg_batch_time = total_val_time / (progress_bar.n + 1)
    remaining_time = avg_batch_time * (len(validate_loader) - progress_bar.n - 1)
    progress_bar.set_postfix(elapsed=f"{total_val_time // 60:.0f}m {total_val_time % 60:.0f}s", 
                             remaining=f"{remaining_time // 60:.0f}m {remaining_time % 60:.0f}s")

# Calculate average validation loss and accuracy
val_avg_loss = val_total_loss / len(validate_loader) if len(validate_loader) > 0 else 0
val_acc = val_correct / val_total if val_total > 0 else 0

# Log validation metrics to wandb
wandb.log({"val_accuracy": val_acc, "val_loss": val_avg_loss})


### Printing the results

In [None]:
print(classification_report(true_labels, predictions))

### Reporting the result to wandb

In [None]:
report = classification_report(true_labels, predictions, output_dict=True)
print(report)
# Save as a serialized object
with open('classification_report.pkl', 'wb') as file:
    pickle.dump(report, file)

In [None]:
wandb.finish()