In [None]:
from google.colab import drive
import os

# Mount Drive
drive.mount('/content/drive', force_remount=True)

# Check for processed_dataset.csv
dataset_path = '/content/drive/MyDrive/Sentiment_Project/processed_dataset.csv'
if os.path.exists(dataset_path):
    print(f"Found dataset at: {dataset_path}")
else:
    print(f"Dataset not found at: {dataset_path}")

# List all files in Sentiment_Project
sentiment_project_dir = '/content/drive/MyDrive/Sentiment_Project'
print(f"Contents of {sentiment_project_dir}:")
!ls "{sentiment_project_dir}"

Mounted at /content/drive
Found dataset at: /content/drive/MyDrive/Sentiment_Project/processed_dataset.csv
Contents of /content/drive/MyDrive/Sentiment_Project:
 coordinator_logs.txt
 Data_Analysis_Quality_Preprocessing.ipynb
 data_analysis_report.json
 dataset_data.json
 explainability_output
 explainability_outputs
 hyperparams.json
 Notebook_5_CodeGen_Explainability.ipynb
 Part_1_Environment_Setup.ipynb
 Part_3_Model_Training_and_Evaluation.ipynb
 part3_output.json
 part4_output.json
 preprocessed_data.pt
 processed_dataset.csv
 quality_check_report.json
 selected_config_checkpoint.pkl
 Sentiment_Analysis_Model_Optimization.ipynb
 trained_traditional_ml_random_forest-like_encoder.joblib
 trained_traditional_ml_random_forest-like.joblib
 trained_traditional_ml_random_forest-like_vectorizer.joblib
'train_traditional_ml_(random_forest-like).py'
 train_traditional_ml_random_forest-like.py
 user_dataset_prompt.json
 user_feedback.csv


In [None]:
!pip install ray

Collecting ray
  Downloading ray-2.46.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (19 kB)
Downloading ray-2.46.0-cp311-cp311-manylinux2014_x86_64.whl (68.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.5/68.5 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ray
Successfully installed ray-2.46.0


In [None]:
# Cell 0: Initialize Coordinator Agent
# Purpose: Set up the Coordinator Agent for logging and monitoring in Notebook 2.

import ray
import psutil
import subprocess
from datetime import datetime
import logging
import json
import os
import torch

# Initialize Ray
if not ray.is_initialized():
    ray.init(address='local', ignore_reinit_error=True, logging_level=logging.INFO)
    print("Ray cluster initialized in local mode.")
else:
    print("Ray cluster already initialized, skipping reinitialization.")

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Define Coordinator Agent as a Ray actor
@ray.remote(num_cpus=1, num_gpus=0 if not torch.cuda.is_available() else 0.1)
class CoordinatorAgent:
    def __init__(self):
        self.logs = []
        self.start_time = datetime.now()
        self.log_file = '/content/drive/MyDrive/Sentiment_Project/coordinator_logs.txt'
        logger.info("Coordinator Agent initialized.")
        self._check_log_file()

    def _check_log_file(self):
        """Ensure log file exists and manage size."""
        if os.path.exists(self.log_file) and os.path.getsize(self.log_file) > 10 * 1024 * 1024:  # 10MB limit
            with open(self.log_file, 'r') as f:
                lines = f.readlines()
            with open(self.log_file, 'w') as f:
                f.writelines(lines[-1000:])  # Keep last 1000 lines
            logger.info("Log file rotated due to size limit.")

    def log_message(self, message):
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        log_entry = f"[{timestamp}] {message}"
        self.logs.append(log_entry)
        logger.info(log_entry)
        try:
            with open(self.log_file, 'a') as f:
                f.write(log_entry + '\n')
        except Exception as e:
            logger.error(f"Failed to write to log file: {e}")
        return log_entry

    def get_resource_usage(self):
        try:
            cpu_usage = psutil.cpu_percent(interval=1)
            ram = psutil.virtual_memory()
            ram_usage = ram.percent
            gpu_usage = 0.0
            memory_usage = 0.0
            if torch.cuda.is_available():
                gpu_query = subprocess.run(['nvidia-smi', '--query-gpu=utilization.gpu,memory.used,memory.total', '--format=csv,noheader,nounits'],
                                          stdout=subprocess.PIPE, text=True, timeout=5)
                gpu_data = [float(x.strip()) for x in gpu_query.stdout.strip().split(',') if x.strip()]
                gpu_usage = gpu_data[0] if gpu_data else 0.0
                memory_used = gpu_data[1] if len(gpu_data) > 1 else 0.0
                memory_total = gpu_data[2] if len(gpu_data) > 2 else 1.0
                memory_usage = (memory_used / memory_total * 100) if memory_total > 0 else 0.0
            resource_dict = {
                "cpu_usage (%)": cpu_usage,
                "ram_usage (%)": ram_usage,
                "gpu_usage (%)": gpu_usage,
                "gpu_memory_usage (%)": memory_usage,
                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            }
            logger.info(f"Resource usage: {json.dumps(resource_dict)}")
            return resource_dict
        except Exception as e:
            logger.error(f"Resource monitoring error: {e}")
            return {"error": str(e)}

    def get_logs(self):
        return self.logs

# Instantiate Coordinator Agent
coordinator = CoordinatorAgent.remote()

2025-05-28 22:32:50,220	INFO worker.py:1888 -- Started a local Ray instance.


Ray cluster initialized in local mode.


In [None]:
# Cell 1: Load Dataset
# Purpose: Load dataset from Notebook 1 or upload anew, ensuring Drive is mounted and directory exists, with dynamic column detection.

import pandas as pd
from google.colab import drive
import os
import json
import sys

# Mount Google Drive with retry logic
max_attempts = 2
for attempt in range(max_attempts):
    try:
        drive.mount('/content/drive', force_remount=True)
        break
    except Exception as e:
        logger.error("Attempt %d to mount Drive failed: %s", attempt + 1, e)
        if attempt == max_attempts - 1:
            raise SystemExit("Failed to mount Drive after maximum attempts. Check your connection.")
        continue

# Define and create Drive path
project_dir = '/content/drive/MyDrive/Sentiment_Project'
os.makedirs(project_dir, exist_ok=True)
dataset_path = os.path.join(project_dir, 'processed_dataset.csv')

# Function to infer text and label columns if needed
def infer_columns(df):
    text_cols = [col for col in df.columns if df[col].dtype == 'object' and df[col].str.len().mean() > 10]
    label_cols = [col for col in df.columns if df[col].dtype in ['object', 'int', 'float'] and df[col].nunique() < len(df) / 10]
    if not text_cols or not label_cols:
        raise ValueError("Could not infer text or label columns. Ensure dataset contains text and categorical label columns.")
    return text_cols[0], label_cols[0]

# Load column names from user_dataset_prompt.json if available
prompt_file = os.path.join(project_dir, 'user_dataset_prompt.json')
try:
    with open(prompt_file, 'r') as f:
        prompt_data = json.load(f)
    text_column = prompt_data.get('text_column')
    label_column = prompt_data.get('label_column')
    logger.info("Loaded columns from prompt - Text: %s, Label: %s", text_column, label_column)
except FileNotFoundError:
    text_column, label_column = None, None
    logger.warning("Prompt file not found. Will infer columns dynamically.")

# Try to load existing dataset
try:
    df = pd.read_csv(dataset_path, encoding='utf-8')
    logger.info("Loaded dataset from %s", dataset_path)
except FileNotFoundError:
    from google.colab import files
    logger.warning("Dataset not found. Please upload a new dataset (e.g., dataset.csv):")
    uploaded = files.upload()
    if not uploaded:
        raise ValueError("No file uploaded.")
    filename = list(uploaded.keys())[0]
    if filename.endswith(('.csv', '.json')):
        df = (pd.read_csv(filename, encoding='utf-8', on_bad_lines='skip')
              if filename.endswith('.csv')
              else pd.read_json(filename, encoding='utf-8'))
        df.to_csv(dataset_path, index=False, encoding='utf-8')
        logger.info("New dataset saved to %s", dataset_path)
    else:
        raise ValueError("Unsupported file format. Use CSV or JSON.")

# Infer columns if not loaded from prompt
if not text_column or not label_column:
    text_column, label_column = infer_columns(df)
    logger.info("Inferred columns - Text: %s, Label: %s", text_column, label_column)

# Validate basic structure
assert not df.empty, "Loaded dataset is empty."
assert text_column in df.columns and label_column in df.columns, f"Missing inferred columns: {text_column} or {label_column}."
logger.info("Dataset loaded successfully. Shape: %s", df.shape)

Mounted at /content/drive


In [None]:
# Cell 2: Data Analyst Agent with DistilBERT
# Purpose: Analyze dataset complexity and set performance goals with dynamic column handling.

import torch
from transformers import DistilBertTokenizer, DistilBertModel
from datetime import datetime
import numpy as np
import json
import os
import ray

# Initialize DistilBERT
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertModel.from_pretrained('distilbert-base-uncased')
model.eval()
if torch.cuda.is_available():
    model.cuda()

# Analyze text complexity
def get_text_complexity(texts, max_length=128):
    inputs = tokenizer(texts, return_tensors="pt", max_length=max_length, truncation=True, padding=True)
    if torch.cuda.is_available():
        inputs = {k: v.cuda() for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
    norms = np.linalg.norm(embeddings, axis=1)
    return float(np.std(norms))

# Analyze dataset
num_samples = len(df)
avg_text_length = float(df[text_column].str.len().mean())
label_distribution = df[label_column].value_counts(normalize=True).to_dict()
text_complexity = get_text_complexity(df[text_column].tolist()[:1000])  # Sample 1000 for efficiency

# Set dynamic performance goal
imbalance_score = max(label_distribution.values()) / min(label_distribution.values())
base_accuracy = 0.92
if imbalance_score > 2:
    base_accuracy -= 0.05
if text_complexity > 0.5:
    base_accuracy -= 0.03
target_accuracy = max(0.85, base_accuracy)

# Dynamically set dataset name from the loaded file
dataset_name = os.path.splitext(os.path.basename(dataset_path))[0] if 'dataset_path' in locals() else 'Unknown_Dataset'

# Compile report with type conversion
analysis_report = {
    "dataset_name": dataset_name,
    "num_samples": int(num_samples),
    "avg_text_length": avg_text_length,
    "label_distribution": label_distribution,
    "text_complexity": text_complexity,
    "target_accuracy": target_accuracy,
    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}

# Save and log report
report_path = os.path.join(project_dir, 'data_analysis_report.json')
with open(report_path, 'w') as f:
    json.dump(analysis_report, f, indent=4)
ray.get(coordinator.log_message.remote(f"Data analysis completed: {json.dumps(analysis_report)}"))
logger.info("Data Analysis Report:")
logger.info(json.dumps(analysis_report, indent=4))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [None]:
# Cell 3: Dataset Quality Check Agent with pandas and DistilBERT
# Purpose: Detect data issues and provide feedback with dynamic column handling.

from sklearn.metrics.pairwise import cosine_similarity

# Perform quality checks
missing_data = df.isnull().sum() / len(df) * 100
class_imbalance = max(label_distribution.values()) - min(label_distribution.values())

# Anomaly detection with DistilBERT
def detect_anomalies(texts, threshold=0.1):
    inputs = tokenizer(texts, return_tensors="pt", max_length=128, truncation=True, padding=True)
    if torch.cuda.is_available():
        inputs = {k: v.cuda() for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
    similarities = cosine_similarity(embeddings)
    avg_similarities = similarities.mean(axis=1)
    anomaly_indices = np.where(avg_similarities < threshold)[0]
    return anomaly_indices

sample_texts = df[text_column].tolist()[:1000]
anomaly_indices = detect_anomalies(sample_texts)
anomaly_rate = len(anomaly_indices) / len(sample_texts) * 100 if sample_texts else 0

# Compile report
quality_report = {
    "missing_data (%)": missing_data.to_dict(),
    "class_imbalance": class_imbalance,
    "anomaly_rate (%)": anomaly_rate,
    "anomaly_indices": anomaly_indices.tolist()[:10],
    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}

# Save and log
quality_report_path = os.path.join(project_dir, 'quality_check_report.json')
with open(quality_report_path, 'w') as f:
    json.dump(quality_report, f, indent=4)
ray.get(coordinator.log_message.remote(f"Quality check completed: {json.dumps(quality_report)}"))
logger.info("Quality Check Report:")
logger.info(json.dumps(quality_report, indent=4))

In [None]:
# Cell 4: Preprocessor Agent with Fine-Tuned RoBERTa
# Purpose: Fine-tune RoBERTa on the dataset and preprocess data with tokenization, lemmatization, and feature engineering.

from transformers import RobertaTokenizer, RobertaForSequenceClassification
import spacy
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
import numpy as np
import os
import ray

# Log start of preprocessing
ray.get(coordinator.log_message.remote("Preprocessor Agent started."))

# Load spaCy for lemmatization
nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])

# Dynamically map labels to integers
unique_labels = df[label_column].unique()
label_mapping = {label: idx for idx, label in enumerate(unique_labels)}
logger.info("Label mapping: %s", label_mapping)
sentiments = df[label_column].map(label_mapping).values

# Custom Dataset for fine-tuning
class SentimentDataset(Dataset):
    def __init__(self, texts, sentiments, tokenizer, max_length=128):
        self.texts = texts
        self.sentiments = sentiments
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = ' '.join([token.lemma_ for token in nlp(self.texts[idx])])
        encoding = self.tokenizer(text, max_length=self.max_length, truncation=True, padding='max_length', return_tensors='pt')
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.tensor(self.sentiments[idx], dtype=torch.long)
        }

# Load and prepare data
dataset = SentimentDataset(df[text_column].tolist(), sentiments, RobertaTokenizer.from_pretrained('roberta-base'))

# Fine-tune RoBERTa
roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta_model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=len(unique_labels))
if torch.cuda.is_available():
    roberta_model.cuda()

# Simple fine-tuning loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
roberta_model.to(device)
optimizer = AdamW(roberta_model.parameters(), lr=2e-5)
loader = DataLoader(dataset, batch_size=8, shuffle=True)

ray.get(coordinator.log_message.remote("Starting fine-tuning of RoBERTa..."))
roberta_model.train()
for epoch in range(1):  # Single epoch for speed
    total_loss = 0
    for batch in tqdm(loader, desc="Epoch 1"):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = roberta_model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
    avg_loss = total_loss / len(loader)
    logger.info("Epoch loss: %.4f", avg_loss)
ray.get(coordinator.log_message.remote("Fine-tuning completed."))

# Preprocessing pipeline with batching
def preprocess_texts(texts, batch_size=32):
    lemmatized_texts = []
    for doc in tqdm(nlp.pipe(texts, disable=["ner", "parser"]), total=len(texts), desc="Lemmatizing"):
        lemmatized_texts.append(' '.join([token.lemma_ for token in doc]))

    all_inputs = {'input_ids': [], 'attention_mask': []}
    all_embeddings = []
    for i in tqdm(range(0, len(lemmatized_texts), batch_size), desc="Generating Embeddings"):
        batch_texts = lemmatized_texts[i:i + batch_size]
        inputs = roberta_tokenizer(batch_texts, return_tensors="pt", max_length=128, truncation=True, padding=True)
        if torch.cuda.is_available():
            inputs = {k: v.cuda() for k, v in inputs.items()}
        with torch.no_grad():
            outputs = roberta_model(**inputs, output_hidden_states=True)
        embeddings = outputs.hidden_states[-1].mean(dim=1).cpu().numpy()
        all_inputs['input_ids'].append(inputs['input_ids'].cpu().numpy())
        all_inputs['attention_mask'].append(inputs['attention_mask'].cpu().numpy())
        all_embeddings.append(embeddings)

    all_inputs['input_ids'] = np.vstack(all_inputs['input_ids'])
    all_inputs['attention_mask'] = np.vstack(all_inputs['attention_mask'])
    all_embeddings = np.vstack(all_embeddings)
    return {'input_ids': all_inputs['input_ids'], 'attention_mask': all_inputs['attention_mask']}, all_embeddings

# Process dataset
inputs, embeddings = preprocess_texts(df[text_column].tolist(), batch_size=32)
labels = df[label_column].map(label_mapping).values

# Save preprocessed data
preprocessed_path = os.path.join(project_dir, 'preprocessed_data.pt')
torch.save({'inputs': inputs, 'embeddings': embeddings, 'labels': labels, 'label_mapping': label_mapping}, preprocessed_path)
ray.get(coordinator.log_message.remote(f"Preprocessing completed. Data saved to {preprocessed_path}"))
logger.info("Preprocessed data saved to %s", preprocessed_path)
logger.info("Input tensor shape: %s", inputs['input_ids'].shape)
logger.info("Embedding shape: %s", embeddings.shape)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 6250/6250 [27:21<00:00,  3.81it/s]
Lemmatizing: 100%|██████████| 50000/50000 [15:01<00:00, 55.48it/s]
Generating Embeddings: 100%|██████████| 1563/1563 [07:02<00:00,  3.70it/s]


In [None]:
# Install nlpaug
!pip install nlpaug

Collecting nlpaug
  Downloading nlpaug-1.1.11-py3-none-any.whl.metadata (14 kB)
Downloading nlpaug-1.1.11-py3-none-any.whl (410 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.5/410.5 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nlpaug
Successfully installed nlpaug-1.1.11


In [None]:
# Cell 5: Auto-Augmentation Agent with nlpaug
# Purpose: Perform selective data enhancement on small/imbalanced datasets with dynamic column handling.

import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import pandas as pd
from tqdm import tqdm
import os
import ray

# Install nlpaug
!pip install nlpaug -q

# Recompute label distribution for independence
label_distribution = df[label_column].value_counts(normalize=True).to_dict()

# Check augmentation need
imbalance_threshold = 0.3
size_threshold = 5000
imbalance = max(label_distribution.values()) - min(label_distribution.values())
if len(df) < size_threshold or imbalance > imbalance_threshold:
    # Initialize augmenters
    synonym_aug = naw.SynonymAug(aug_p=0.3, aug_max=5)
    back_translation_aug = naw.BackTranslationAug(from_lang='en', to_lang='fr', device='cuda' if torch.cuda.is_available() else 'cpu')

    # Augment minority class
    minority_class = min(label_distribution, key=label_distribution.get)
    minority_df = df[df[label_column] == minority_class]

    # Cap augmentation to 10,000 samples to manage runtime
    max_augmentation = 10000
    minority_texts = minority_df[text_column].tolist()
    if len(minority_texts) > max_augmentation:
        minority_texts = minority_texts[:max_augmentation]
        ray.get(coordinator.log_message.remote(f"Minority class too large ({len(minority_df)} samples). Capping augmentation at {max_augmentation} samples."))

    augmented_texts = []
    for text in tqdm(minority_texts, desc="Augmenting minority class"):
        try:
            aug_text = synonym_aug.augment(text)[0]
            aug_text = back_translation_aug.augment(aug_text)[0]
            augmented_texts.append(aug_text)
        except Exception as e:
            ray.get(coordinator.log_message.remote(f"Augmentation failed for text: {str(e)}. Skipping."))
            augmented_texts.append(text)  # Fallback to original text

    # Create augmented DataFrame
    augmented_df = pd.DataFrame({
        text_column: augmented_texts,
        label_column: [minority_class] * len(augmented_texts)
    })
    augmented_dataset = pd.concat([df, augmented_df], ignore_index=True)

    # Save augmented dataset with error handling
    augmented_path = os.path.join(project_dir, 'augmented_dataset.csv')
    try:
        augmented_dataset.to_csv(augmented_path, index=False, encoding='utf-8')
        ray.get(coordinator.log_message.remote(f"Augmentation completed: {len(augmented_texts)} samples added. Augmented dataset saved to {augmented_path}"))
        logger.info("Augmented dataset saved to %s", augmented_path)
        logger.info("New dataset shape: %s", augmented_dataset.shape)
        logger.info("New label distribution: \n%s", augmented_dataset[label_column].value_counts(normalize=True))
    except Exception as e:
        ray.get(coordinator.log_message.remote(f"Failed to save augmented dataset: {str(e)}"))
        logger.error("Error saving augmented dataset: %s", str(e))
else:
    ray.get(coordinator.log_message.remote(f"No augmentation needed. Dataset size: {len(df)}, Imbalance: {imbalance:.3f}"))
    logger.info("No augmentation needed.")