# Notebook: IMDb Sentiment Analysis with ClearML

In [2]:
%pip install clearml


[33mDEPRECATION: Loading egg at /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/jupyter-1.0.0-py3.11.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
from clearml import Task
# Then initialize your new task
task = Task.init(
    project_name="IMDb Sentiment Analysis", 
    task_name="Multi-Model Comparison"
)

ClearML Task: created new task id=81e355685bf64d779023f5888195dd96
2024-12-19 14:51:31,234 - clearml.Task - INFO - Storing jupyter notebook directly as code
ClearML results page: https://app.clear.ml/projects/bb33ce1736ad44cbbbb6238137b2528a/experiments/81e355685bf64d779023f5888195dd96/output/log


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, 
    classification_report, 
    confusion_matrix, 
    precision_recall_fscore_support
)
from clearml import Task
import matplotlib.pyplot as plt
import seaborn as sns

def load_data():
    """
    Load IMDb dataset and perform initial checks
    """
    df = pd.read_csv("../data/IMDB-Dataset.csv")
    
    # Data validation
    print("Dataset Information:")
    print(f"Total samples: {len(df)}")
    print(f"Null values:\n{df.isnull().sum()}")
    print(f"Sentiment distribution:\n{df['sentiment'].value_counts()}")
    
    return df

def preprocess_data(df, max_features=5000, test_size=0.2, random_state=42):
    """
    Preprocess text data using TF-IDF vectorization
    """
    X = df['review']
    y = df['sentiment'].map({'positive': 1, 'negative': 0})
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, 
        test_size=test_size, 
        random_state=random_state, 
        stratify=y
    )
    
    # Vectorization
    vectorizer = TfidfVectorizer(
        max_features=max_features, 
        stop_words='english'
    )
    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)
    
    return X_train_vec, X_test_vec, y_train, y_test

def evaluate_model(y_true, y_pred, model_name):
    """
    Comprehensive model evaluation
    """
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average='binary'
    )
    
    print(f"\n{model_name} Performance:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }

def plot_confusion_matrix(y_true, y_pred, model_name, task_logger):
    """
    Plot and log confusion matrix
    """
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    
    # Save plot
    plt.tight_layout()
    confusion_matrix_path = f'{model_name}_confusion_matrix.png'
    plt.savefig(confusion_matrix_path)
    
    # Log to ClearML
    task_logger.report_image(
        title=f'{model_name} Confusion Matrix', 
        series='Confusion Matrix', 
        local_path=confusion_matrix_path
    )
    plt.close()

def main():
    # Initialize ClearML Task
    task = Task.init(
        project_name="IMDb Sentiment Analysis", 
        task_name="Multi-Model Comparison"
    )
    
    # Log configuration
    task.connect_configuration({
        'vectorizer_max_features': 5000,
        'test_size': 0.2,
        'random_state': 42
    })
    
    # Load and preprocess data
    df = load_data()
    X_train, X_test, y_train, y_test = preprocess_data(df)
    
    # Define models
    models = {
        'Logistic Regression': LogisticRegression(max_iter=1000),
        'Random Forest': RandomForestClassifier(n_estimators=100),
        
    }
    
    # Model training and evaluation
    results = {}
    for name, model in models.items():
        # Train model
        model.fit(X_train, y_train)
        
        # Predict
        y_pred = model.predict(X_test)
        
        # Evaluate
        model_results = evaluate_model(y_test, y_pred, name)
        results[name] = model_results
        
        # Plot and log confusion matrix
        plot_confusion_matrix(y_test, y_pred, name, task.get_logger())
        
        # Log metrics to ClearML
        for metric, value in model_results.items():
            task.get_logger().report_scalar(
                title=name, 
                series=metric, 
                iteration=1, 
                value=value
            )
    
    # Comparative analysis
    print("\nComparative Model Performance:")
    for model, metrics in results.items():
        print(f"\n{model}:")
        for metric, value in metrics.items():
            print(f"{metric.capitalize()}: {value:.4f}")

if __name__ == "__main__":
    main()

ClearML Monitor: GPU monitoring failed getting GPU reading, switching off GPU monitoring


Dataset Information:
Total samples: 50000
Null values:
review       0
sentiment    0
dtype: int64
Sentiment distribution:
sentiment
positive    25000
negative    25000
Name: count, dtype: int64

Logistic Regression Performance:
Accuracy: 0.8894
Precision: 0.8831
Recall: 0.8976
F1 Score: 0.8903

Random Forest Performance:
Accuracy: 0.8518
Precision: 0.8609
Recall: 0.8392
F1 Score: 0.8499

Comparative Model Performance:

Logistic Regression:
Accuracy: 0.8894
Precision: 0.8831
Recall: 0.8976
F1_score: 0.8903

Random Forest:
Accuracy: 0.8518
Precision: 0.8609
Recall: 0.8392
F1_score: 0.8499
