In [26]:
import mlflow
import mlflow.sklearn
from datetime import datetime

# Set MLflow tracking URI (local folder)
mlflow.set_tracking_uri("file:./mlruns")

# Set experiment name
mlflow.set_experiment("transformer_finetuning_experiment")

<Experiment: artifact_location=('file:///c:/Users/HUMAIDU/Desktop/groub '
 'B/Thrive_Internship_ML_A/notebooks/mlruns/867608244561598634'), creation_time=1761930430291, experiment_id='867608244561598634', last_update_time=1761930430291, lifecycle_stage='active', name='transformer_finetuning_experiment', tags={}>

In [27]:
import pandas as pd
df = pd.read_csv('../file-for-fineTuning.csv')
df.head()

Unnamed: 0,text,label,ticket_id,agent_name,text_length
0,Where can I find API rate limits documentation?,other,1000,Dana,47
1,Can I migrate data from Freshdesk?,other,1001,Bob,34
2,Cannot update billing address; page keeps relo...,billing,1002,Charlie,52
3,Looking for a product roadmap or upcoming feat...,other,1003,Dana,51
4,Dark mode toggled but UI stays light.,tech_support,1004,Alice,37


In [28]:
texts = df["text"].tolist()
labels = df["label"].astype("category").cat.codes.tolist()

Tokenize the Text Data

In [29]:
### Using the DistilBERT tokenizer:
from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
encodings = tokenizer(texts, truncation = True, padding = True, max_length = 128)



Creating Dataset for Training

In [30]:
from torch.utils.data import Dataset
import torch

class TicketDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {
            key: torch.tensor(val[idx]) 
            for key, val in self.encodings.items()
        }
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)  
        return item

    def __len__(self):
        return len(self.labels)



In [31]:
from sklearn.model_selection import train_test_split

train_texts, val_texts, train_labels, val_labes = train_test_split(texts, labels, test_size=0.2, random_state=42)
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128)

train_dataset = TicketDataset(train_encodings, train_labels)
val_dataset = TicketDataset(val_encodings, val_labes)

Load Pre-Trained DistilBERT and Fine-Tune

In [32]:
# Running this to check if all packages are installed correctly
import torch
import transformers
import accelerate

print(f"PyTorch version: {torch.__version__}")
print(f"Transformers version: {transformers.__version__}")
print(f"Accelerate version: {accelerate.__version__}")

# Checking if GPU is available
print(f"GPU available: {torch.cuda.is_available()}")

PyTorch version: 2.9.0+cpu
Transformers version: 4.30.2
Accelerate version: 1.11.0
GPU available: False


In [33]:
# Running this after restarting kernel
import accelerate
print(f"Accelerate version: {accelerate.__version__}")

from transformers import Trainer, TrainingArguments
print("✅ Trainer imported successfully!")

Accelerate version: 1.11.0
✅ Trainer imported successfully!


In [34]:
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
import torch

# Making sure the number of labels matches the dataset
num_labels = len(set(labels))  # Should be 4 for our dataset

model = DistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased', 
    num_labels=num_labels
)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,  # Added for better training
    metric_for_best_model="eval_loss",  # Added for better training
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
)

# Now training the model
trainer.train()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'pre_classifier.weight', 'classifier.bias', 'pre_classifier.

{'loss': 1.3211, 'learning_rate': 4.3333333333333334e-05, 'epoch': 0.4}


 27%|██▋       | 20/75 [00:20<00:53,  1.02it/s]

{'loss': 0.9756, 'learning_rate': 3.6666666666666666e-05, 'epoch': 0.8}


 33%|███▎      | 25/75 [00:24<00:46,  1.07it/s]
 33%|███▎      | 25/75 [00:25<00:46,  1.07it/s]

{'eval_loss': 0.5605160593986511, 'eval_runtime': 1.0746, 'eval_samples_per_second': 92.126, 'eval_steps_per_second': 6.514, 'epoch': 1.0}


 40%|████      | 30/75 [00:32<00:50,  1.12s/it]

{'loss': 0.6312, 'learning_rate': 3e-05, 'epoch': 1.2}


 53%|█████▎    | 40/75 [00:41<00:33,  1.05it/s]

{'loss': 0.4467, 'learning_rate': 2.3333333333333336e-05, 'epoch': 1.6}


 67%|██████▋   | 50/75 [00:50<00:21,  1.14it/s]

{'loss': 0.3424, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.0}



 67%|██████▋   | 50/75 [00:51<00:21,  1.14it/s]

{'eval_loss': 0.2856930196285248, 'eval_runtime': 1.0314, 'eval_samples_per_second': 95.984, 'eval_steps_per_second': 6.787, 'epoch': 2.0}


 80%|████████  | 60/75 [01:02<00:14,  1.05it/s]

{'loss': 0.273, 'learning_rate': 1e-05, 'epoch': 2.4}


 93%|█████████▎| 70/75 [01:11<00:04,  1.02it/s]

{'loss': 0.3206, 'learning_rate': 3.3333333333333333e-06, 'epoch': 2.8}


                                               
100%|██████████| 75/75 [01:19<00:00,  1.11s/it]

{'eval_loss': 0.26141756772994995, 'eval_runtime': 1.2868, 'eval_samples_per_second': 76.935, 'eval_steps_per_second': 5.44, 'epoch': 3.0}


100%|██████████| 75/75 [01:20<00:00,  1.08s/it]

{'train_runtime': 81.1512, 'train_samples_per_second': 14.639, 'train_steps_per_second': 0.924, 'train_loss': 0.5942623805999756, 'epoch': 3.0}





TrainOutput(global_step=75, training_loss=0.5942623805999756, metrics={'train_runtime': 81.1512, 'train_samples_per_second': 14.639, 'train_steps_per_second': 0.924, 'train_loss': 0.5942623805999756, 'epoch': 3.0})

In [None]:
# Get predictions on validation set
predictions = trainer.predict(val_dataset)

# The model outputs logits, so we take argmax
import numpy as np
y_pred = np.argmax(predictions.predictions, axis=1)
y_true = np.array(val_labes)  # validation labels


100%|██████████| 7/7 [00:01<00:00,  6.01it/s]


In [36]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average="weighted")
precision = precision_score(y_true, y_pred, average="weighted")
recall = recall_score(y_true, y_pred, average="weighted")

metrics = {
    "accuracy": accuracy,
    "f1_weighted": f1,
    "precision_weighted": precision,
    "recall_weighted": recall
}

print(metrics)


{'accuracy': 0.898989898989899, 'f1_weighted': 0.8979887090940543, 'precision_weighted': 0.8973905723905724, 'recall_weighted': 0.898989898989899}


In [37]:
metrics = trainer.evaluate()
print(metrics)



100%|██████████| 7/7 [00:01<00:00,  6.07it/s]

{'eval_loss': 0.26141756772994995, 'eval_runtime': 1.2435, 'eval_samples_per_second': 79.615, 'eval_steps_per_second': 5.629, 'epoch': 3.0}





In [38]:
model.save_pretrained("models/distilbert-ticket-classifier")
tokenizer.save_pretrained("models/distilbert-ticket-classifier")

('models/distilbert-ticket-classifier\\tokenizer_config.json',
 'models/distilbert-ticket-classifier\\special_tokens_map.json',
 'models/distilbert-ticket-classifier\\vocab.txt',
 'models/distilbert-ticket-classifier\\added_tokens.json',
 'models/distilbert-ticket-classifier\\tokenizer.json')

In [None]:
import os
import json

# Define directories
model_dir = "models/distilbert-ticket-classifier"
artifacts_dir = "artifacts"
results_dir = "results"

# Create them if they don’t exist
os.makedirs(model_dir, exist_ok=True)
os.makedirs(artifacts_dir, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)

# -----------------------------
# Save model & tokenizer
# -----------------------------
model.save_pretrained(model_dir)
tokenizer.save_pretrained(model_dir)
print(f"✅ Model and tokenizer saved to: {model_dir}")

# -----------------------------
# Save metrics
# -----------------------------
metrics = {
    "accuracy": accuracy_score(y_true, y_pred),
    "f1_weighted": f1_score(y_true, y_pred, average="weighted"),
    "precision_weighted": precision_score(y_true, y_pred, average="weighted"),
    "recall_weighted": recall_score(y_true, y_pred, average="weighted"),
}

with open(os.path.join(artifacts_dir, "transformer_metrics.json"), "w") as f:
    json.dump(metrics, f, indent=4)

print(f"✅ Metrics saved to: {artifacts_dir}/transformer_metrics.json")

# -----------------------------
# Save evaluation summary
# -----------------------------
summary_text = (
    f"Transformer Evaluation Report\n"
    f"--------------------------------\n"
    f"Accuracy: {metrics['accuracy']:.4f}\n"
    f"F1 Score (weighted): {metrics['f1_weighted']:.4f}\n"
    f"Precision (weighted): {metrics['precision_weighted']:.4f}\n"
    f"Recall (weighted): {metrics['recall_weighted']:.4f}\n"
)

with open(os.path.join(results_dir, "transformer_evaluation_report.txt"), "w") as f:
    f.write(summary_text)

print(f"✅ Evaluation report saved to: {results_dir}/transformer_evaluation_report.txt")


✅ Model and tokenizer saved to: models/distilbert-ticket-classifier
✅ Metrics saved to: artifacts/transformer_metrics.json
✅ Evaluation report saved to: results/transformer_evaluation_report.txt


In [40]:
import os, json

os.makedirs("artifacts", exist_ok=True)

with open("artifacts/transformer_metrics.json", "w") as f:
    json.dump(metrics, f, indent=4)


In [41]:
if mlflow.active_run():
    mlflow.end_run()


In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import os, json

# Metrics after model evaluation 
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average="weighted")
precision = precision_score(y_true, y_pred, average="weighted")
recall = recall_score(y_true, y_pred, average="weighted")

# Define directories
model_dir = "models/distilbert-ticket-classifier"
artifacts_dir = "artifacts"
os.makedirs(model_dir, exist_ok=True)
os.makedirs(artifacts_dir, exist_ok=True)

# Start MLflow run
with mlflow.start_run(run_name=f"DistilBERT_Run_{datetime.now().strftime('%Y%m%d_%H%M%S')}"):
    
    # Log parameters
    mlflow.log_param("model_name", "distilbert-base-uncased")
    mlflow.log_param("epochs", 4)
    mlflow.log_param("batch_size", 16)
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_weighted", f1)
    mlflow.log_metric("precision_weighted", precision)
    mlflow.log_metric("recall_weighted", recall)
    
    # Save metrics to file
    metrics = {
        "accuracy": accuracy,
        "f1_weighted": f1,
        "precision_weighted": precision,
        "recall_weighted": recall,
    }
    with open(os.path.join(artifacts_dir, "transformer_metrics.json"), "w") as f:
        json.dump(metrics, f, indent=4)
    
    # Save model and tokenizer
    model.save_pretrained(model_dir)
    tokenizer.save_pretrained(model_dir)
    
    # Log artifacts to MLflow
    mlflow.log_artifacts(artifacts_dir)
    mlflow.log_artifacts(model_dir)

print("✅ MLflow run completed and all artifacts logged successfully!")


✅ MLflow run completed and all artifacts logged successfully!
