In [None]:
pip install git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git


In [None]:
import pandas as pd
import random
from nltk.corpus import wordnet
import nltk

# Ensure NLTK WordNet is downloaded
nltk.download('wordnet')

# Function to get synonyms
def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name().replace('_', ' '))
    return list(synonyms)

# Function to introduce controlled scrambling
def scramble_text(text):
    words = text.split()
    scrambled_text = []
    
    for word in words:
        # Randomly replace with a synonym
        synonyms = get_synonyms(word)
        if synonyms and random.random() < 0.3:  # 30% chance to replace
            scrambled_text.append(random.choice(synonyms))
        else:
            # Occasionally add noise to the word
            if random.random() < 0.2:  # 20% chance to add noise
                word = "".join(random.sample(word, len(word)))  # Shuffle characters
            scrambled_text.append(word)
        
        # Randomly insert noise words
        if random.random() < 0.2:  # 20% chance to insert noise word
            scrambled_text.append(random.choice(["extra", "random", "noise", "confuse"]))
    
    # Shuffle chunks of the text
    chunk_size = max(1, len(scrambled_text) // 3)
    chunks = [scrambled_text[i:i+chunk_size] for i in range(0, len(scrambled_text), chunk_size)]
    random.shuffle(chunks)
    shuffled_text = [word for chunk in chunks for word in chunk]
    
    return " ".join(shuffled_text)

# Load dataset
file_path = r"C:\Users\Suhaa\Downloads\Codey AI\Mockups\navigator-batch-generate-6768423f003f7114aba22d03-data.csv"
data = pd.read_csv(file_path)

# Apply scrambling
data['User Command'] = data['User Command'].apply(scramble_text)

# Save scrambled data
output_path = r"C:\Users\Suhaa\Downloads\Codey AI\Mockups\challenging_scrambled_data.csv"
data.to_csv(output_path, index=False)

print(f"Challenging scrambled dataset saved to {output_path}")


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
)
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import time
import seaborn as sns
from collections import Counter
import warnings
import random

warnings.filterwarnings('ignore')

# Set seeds
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Load data
file_path = r"C:\Users\Suhaa\Downloads\Codey AI\Mockups\challenging_scrambled_data.csv"
df = pd.read_csv(file_path)

print("Initial Data Shape:", df.shape)
print("Initial Data Sample:")
print(df.head())

df = df.dropna(subset=["User Command", "Action"]).drop_duplicates()
print("\nData Shape after Dropping NA and Duplicates:", df.shape)

# Encode labels
label_encoder = LabelEncoder()
df["Action_encoded"] = label_encoder.fit_transform(df["Action"])

print("\nEncoded Classes:")
print(dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))

# Create features
X = df["User Command"].values
y = df["Action_encoded"].values

tfidf_vectorizer = TfidfVectorizer(max_features=500)
X_tfidf = tfidf_vectorizer.fit_transform(X)

print("\nTF-IDF Feature Shape:", X_tfidf.shape)

# Balance classes
original_class_counts = Counter(y)
print("\nOriginal Class Distribution:", original_class_counts)

ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X_tfidf, y)

new_class_counts = Counter(y_resampled)
print("After Random Over-Sampling Class Distribution:", new_class_counts)

# Models
nb_model = MultinomialNB()
dt_model = DecisionTreeClassifier(random_state=42)
rf_model = RandomForestClassifier(random_state=42, n_estimators=100)

def create_neural_net(input_dim, output_dim):
    model = Sequential([
        Dense(256, activation="relu", input_dim=input_dim),
        Dropout(0.3),
        Dense(128, activation="relu"),
        Dropout(0.2),
        Dense(output_dim, activation="softmax")
    ])
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

def evaluate_model(y_true, y_pred, y_pred_proba=None, multi_class='ovr'):
    metrics = {}
    metrics['Accuracy'] = accuracy_score(y_true, y_pred)
    metrics['Precision'] = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    metrics['Recall'] = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    metrics['F1-Score'] = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    
    if y_pred_proba is not None and multi_class is not None:
        try:
            y_binarized = label_binarize(y_true, classes=np.unique(y_true))
            if y_binarized.shape[1] == 1:
                roc_auc = roc_auc_score(y_true, y_pred_proba[:, 1])
            else:
                roc_auc = roc_auc_score(y_binarized, y_pred_proba, average='weighted', multi_class=multi_class)
            metrics['ROC-AUC'] = roc_auc
        except:
            metrics['ROC-AUC'] = 'N/A'
    else:
        metrics['ROC-AUC'] = 'N/A'
    
    return metrics

def get_data_subset(X, y, reduction_ratio):
    if reduction_ratio == 1.0:
        return X, y
    else:
        X_subset, _, y_subset, _ = train_test_split(
            X, y,
            train_size=reduction_ratio,
            stratify=y,
            random_state=42
        )
        return X_subset, y_subset

results = []
data_reduction_levels = [1.0, 0.75, 0.5, 0.25, 0.1]
for reduction in data_reduction_levels:
    print(f"\n---\nEvaluating Models with {int(reduction*100)}% of Data\n---")
    
    X_subset, y_subset = get_data_subset(X_resampled, y_resampled, reduction)
    
    print("Subset Class Distribution:", Counter(y_subset))
    print("Subset Feature Shape:", X_subset.shape)
    
    X_train, X_test, y_train, y_test = train_test_split(
        X_subset, y_subset,
        test_size=0.2,
        stratify=y_subset,
        random_state=42
    )
    
    # Naive Bayes
    print("\nTraining and Evaluating Multinomial Naive Bayes...")
    start_time = time.time()
    nb_model.fit(X_train, y_train)
    training_time = time.time() - start_time
    y_pred_nb = nb_model.predict(X_test)
    y_pred_proba_nb = nb_model.predict_proba(X_test)
    metrics_nb = evaluate_model(y_test, y_pred_nb, y_pred_proba_nb, multi_class='ovr')
    
    sample = X_test[0]
    start_proc_time = time.time()
    nb_model.predict(sample)
    processing_time = (time.time() - start_proc_time) * 1000
    
    metrics_nb['Data Size (%)'] = int(reduction*100)
    metrics_nb['Model'] = 'NLP Model (MultinomialNB)'
    metrics_nb['Training Time (s)'] = round(training_time, 4)
    metrics_nb['Processing Time (ms)'] = round(processing_time, 4)
    results.append(metrics_nb)
    
    # Decision Tree
    print("\nTraining and Evaluating Decision Tree...")
    start_time = time.time()
    dt_model.fit(X_train, y_train)
    training_time = time.time() - start_time
    y_pred_dt = dt_model.predict(X_test)
    y_pred_proba_dt = dt_model.predict_proba(X_test)
    metrics_dt = evaluate_model(y_test, y_pred_dt, y_pred_proba_dt, multi_class='ovr')
    
    sample = X_test[0]
    start_proc_time = time.time()
    dt_model.predict(sample)
    processing_time = (time.time() - start_proc_time) * 1000
    
    metrics_dt['Data Size (%)'] = int(reduction*100)
    metrics_dt['Model'] = 'Decision Tree'
    metrics_dt['Training Time (s)'] = round(training_time, 4)
    metrics_dt['Processing Time (ms)'] = round(processing_time, 4)
    results.append(metrics_dt)
    
    # Random Forest
    print("\nTraining and Evaluating Random Forest...")
    start_time = time.time()
    rf_model.fit(X_train, y_train)
    training_time = time.time() - start_time
    y_pred_rf = rf_model.predict(X_test)
    y_pred_proba_rf = rf_model.predict_proba(X_test)
    metrics_rf = evaluate_model(y_test, y_pred_rf, y_pred_proba_rf, multi_class='ovr')
    
    sample = X_test[0]
    start_proc_time = time.time()
    rf_model.predict(sample)
    processing_time = (time.time() - start_proc_time) * 1000
    
    metrics_rf['Data Size (%)'] = int(reduction*100)
    metrics_rf['Model'] = 'Random Forest'
    metrics_rf['Training Time (s)'] = round(training_time, 4)
    metrics_rf['Processing Time (ms)'] = round(processing_time, 4)
    results.append(metrics_rf)
    
    # Neural Network
    print("\nTraining and Evaluating Neural Decision Tree...")
    ndt_model = create_neural_net(input_dim=X_train.shape[1], output_dim=len(label_encoder.classes_))
    early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    
    start_time = time.time()
    history = ndt_model.fit(
        X_train.toarray(), y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_test.toarray(), y_test),
        callbacks=[early_stop],
        verbose=0
    )
    training_time = time.time() - start_time
    y_pred_ndt_prob = ndt_model.predict(X_test.toarray())
    y_pred_ndt = np.argmax(y_pred_ndt_prob, axis=1)
    metrics_ndt = evaluate_model(y_test, y_pred_ndt, y_pred_ndt_prob, multi_class='ovr')
    
    sample = X_test[0].toarray()
    start_proc_time = time.time()
    ndt_model.predict(sample)
    processing_time = (time.time() - start_proc_time) * 1000
    
    metrics_ndt['Data Size (%)'] = int(reduction*100)
    metrics_ndt['Model'] = 'Neural Decision Tree'
    metrics_ndt['Training Time (s)'] = round(training_time, 4)
    metrics_ndt['Processing Time (ms)'] = round(processing_time, 4)
    results.append(metrics_ndt)

# Create final results
results_df = pd.DataFrame(results)
results_df = results_df[['Data Size (%)', 'Model', 'Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC', 'Training Time (s)', 'Processing Time (ms)']]

print("\nFinal Evaluation Results:")
print(results_df)

# results_df.to_csv('model_evaluation_comparison.csv', index=False)


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
)
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import RandomOverSampler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import time
from collections import Counter
import warnings

warnings.filterwarnings('ignore')

# Different neural network architectures
def create_ndt_config_1(input_dim, output_dim):
    model = Sequential([
        Dense(256, activation="relu", input_dim=input_dim),
        Dropout(0.3),
        Dense(128, activation="relu"),
        Dropout(0.2),
        Dense(output_dim, activation="softmax")
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), 
                  loss="sparse_categorical_crossentropy", 
                  metrics=["accuracy"])
    return model

def create_ndt_config_2(input_dim, output_dim):
    model = Sequential([
        Dense(512, activation="relu", input_dim=input_dim),
        Dropout(0.4),
        Dense(256, activation="relu"),
        Dropout(0.3),
        Dense(64, activation="relu"),
        Dropout(0.2),
        Dense(output_dim, activation="softmax")
    ])
    model.compile(optimizer=Adam(learning_rate=0.0005), 
                  loss="sparse_categorical_crossentropy", 
                  metrics=["accuracy"])
    return model

def create_ndt_config_3(input_dim, output_dim):
    model = Sequential([
        Dense(128, activation="relu", input_dim=input_dim),
        Dense(64, activation="relu"),
        Dense(32, activation="relu"),
        Dense(output_dim, activation="softmax")
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), 
                  loss="sparse_categorical_crossentropy", 
                  metrics=["accuracy"])
    return model

def create_ndt_config_4(input_dim, output_dim):
    model = Sequential([
        Dense(300, activation="relu", input_dim=input_dim),
        Dropout(0.5),
        Dense(200, activation="relu"),
        Dropout(0.3),
        Dense(100, activation="relu"),
        Dense(output_dim, activation="softmax")
    ])
    model.compile(optimizer=Adam(learning_rate=0.0001), 
                  loss="sparse_categorical_crossentropy", 
                  metrics=["accuracy"])
    return model

def create_ndt_config_5(input_dim, output_dim):
    model = Sequential([
        Dense(256, activation="relu", input_dim=input_dim),
        Dense(256, activation="relu"),
        Dropout(0.2),
        Dense(output_dim, activation="softmax")
    ])
    model.compile(optimizer=Adam(learning_rate=0.0015), 
                  loss="sparse_categorical_crossentropy", 
                  metrics=["accuracy"])
    return model

def evaluate_model(y_true, y_pred, y_pred_proba=None, multi_class='ovr'):
    metrics = {}
    metrics['Accuracy'] = accuracy_score(y_true, y_pred)
    metrics['Precision'] = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    metrics['Recall'] = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    metrics['F1-Score'] = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    
    if y_pred_proba is not None and multi_class is not None:
        try:
            y_binarized = label_binarize(y_true, classes=np.unique(y_true))
            if y_binarized.shape[1] == 1:
                roc_auc = roc_auc_score(y_true, y_pred_proba[:, 1])
            else:
                roc_auc = roc_auc_score(y_binarized, y_pred_proba, average='weighted', multi_class=multi_class)
            metrics['ROC-AUC'] = roc_auc
        except:
            metrics['ROC-AUC'] = 'N/A'
    else:
        metrics['ROC-AUC'] = 'N/A'
    
    return metrics

def test_all_models(X_train, y_train, X_test, y_test):
    results = []

    # Naive Bayes
    print("\nTesting Multinomial Naive Bayes...")
    nb_model = MultinomialNB()
    nb_model.fit(X_train, y_train)
    y_pred_nb = nb_model.predict(X_test)
    y_pred_proba_nb = nb_model.predict_proba(X_test)
    metrics_nb = evaluate_model(y_test, y_pred_nb, y_pred_proba_nb)
    metrics_nb['Model'] = 'Multinomial Naive Bayes'
    results.append(metrics_nb)

    # Decision Tree
    print("\nTesting Decision Tree...")
    dt_model = DecisionTreeClassifier(random_state=42)
    dt_model.fit(X_train, y_train)
    y_pred_dt = dt_model.predict(X_test)
    y_pred_proba_dt = dt_model.predict_proba(X_test)
    metrics_dt = evaluate_model(y_test, y_pred_dt, y_pred_proba_dt)
    metrics_dt['Model'] = 'Decision Tree'
    results.append(metrics_dt)

    # Random Forest
    print("\nTesting Random Forest...")
    rf_model = RandomForestClassifier(random_state=42, n_estimators=100)
    rf_model.fit(X_train, y_train)
    y_pred_rf = rf_model.predict(X_test)
    y_pred_proba_rf = rf_model.predict_proba(X_test)
    metrics_rf = evaluate_model(y_test, y_pred_rf, y_pred_proba_rf)
    metrics_rf['Model'] = 'Random Forest'
    results.append(metrics_rf)

    # Neural network configurations
    configs = [
        ("Config 1", create_ndt_config_1),
        ("Config 2", create_ndt_config_2),
        ("Config 3", create_ndt_config_3),
        ("Config 4", create_ndt_config_4),
        ("Config 5", create_ndt_config_5),
    ]

    for name, create_model in configs:
        print(f"\nTesting Neural Decision Tree - {name}...")
        model = create_model(input_dim=X_train.shape[1], output_dim=len(np.unique(y_train)))

        early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

        model.fit(
            X_train.toarray(), y_train,
            epochs=20,
            batch_size=32,
            validation_split=0.2,
            callbacks=[early_stop],
            verbose=0
        )

        y_pred_ndt_prob = model.predict(X_test.toarray())
        y_pred_ndt = np.argmax(y_pred_ndt_prob, axis=1)
        metrics_ndt = evaluate_model(y_test, y_pred_ndt, y_pred_ndt_prob)
        metrics_ndt['Model'] = f'Neural Decision Tree - {name}'
        results.append(metrics_ndt)

    return results

# Run tests
file_path = r"C:\Users\Suhaa\Downloads\Codey AI\Mockups\challenging_scrambled_data.csv"

results = test_all_models(X_train, y_train, X_test, y_test)

for result in results:
    print(result)


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
)
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import torch
from transformers import BertTokenizer, BertForSequenceClassification, DistilBertTokenizer, DistilBertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
import time
import seaborn as sns
from collections import Counter
import warnings
import random

warnings.filterwarnings('ignore')

# Set seeds
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
tf.random.set_seed(42)

# Load original data
file_path = r"C:\Users\Suhaa\Downloads\Codey AI\Mockups\navigator-batch-generate-6768423f003f7114aba22d03-data.csv"
df = pd.read_csv(file_path)

print("Initial Data Shape:", df.shape)
print("Initial Data Sample:")
print(df.head())

df = df.dropna(subset=["User Command", "Action"]).drop_duplicates()
print("\nData Shape after Dropping NA and Duplicates:", df.shape)

# Encode labels
label_encoder = LabelEncoder()
df["Action_encoded"] = label_encoder.fit_transform(df["Action"])

print("\nEncoded Classes:")
print(dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))

# Prepare data
X_raw = df["User Command"].values
y = df["Action_encoded"].values

# Balance classes
original_class_counts = Counter(y)
print("\nOriginal Class Distribution:", original_class_counts)

ros = RandomOverSampler(random_state=42)
X_resampled_raw, y_resampled = ros.fit_resample(X_raw.reshape(-1, 1), y)
X_resampled_raw = X_resampled_raw.flatten()

new_class_counts = Counter(y_resampled)
print("After Random Over-Sampling Class Distribution:", new_class_counts)

# Create TF-IDF features
tfidf_vectorizer = TfidfVectorizer(max_features=500)
X_resampled_tfidf = tfidf_vectorizer.fit_transform(X_resampled_raw)

print("\nTF-IDF Feature Shape:", X_resampled_tfidf.shape)

# Models
nb_model = MultinomialNB()
dt_model = DecisionTreeClassifier(random_state=42)
rf_model = RandomForestClassifier(random_state=42, n_estimators=100)

def create_neural_net(input_dim, output_dim):
    model = Sequential([
        Dense(256, activation="relu", input_dim=input_dim),
        Dropout(0.3),
        Dense(128, activation="relu"),
        Dropout(0.2),
        Dense(output_dim, activation="softmax")
    ])
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

def create_bert_model(num_labels):
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)
    return model

def create_distilbert_model(num_labels):
    model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=num_labels)
    return model

def evaluate_model(y_true, y_pred, y_pred_proba=None, multi_class='ovr'):
    metrics = {}
    metrics['Accuracy'] = accuracy_score(y_true, y_pred)
    metrics['Precision'] = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    metrics['Recall'] = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    metrics['F1-Score'] = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    
    if y_pred_proba is not None and multi_class is not None:
        try:
            y_binarized = label_binarize(y_true, classes=np.unique(y_true))
            if y_binarized.shape[1] == 1:
                roc_auc = roc_auc_score(y_true, y_pred_proba[:, 1])
            else:
                roc_auc = roc_auc_score(y_binarized, y_pred_proba, average='weighted', multi_class=multi_class)
            metrics['ROC-AUC'] = roc_auc
        except:
            metrics['ROC-AUC'] = 'N/A'
    else:
        metrics['ROC-AUC'] = 'N/A'
    
    return metrics

def get_data_subset(X, y, reduction_ratio):
    if reduction_ratio == 1.0:
        return X, y
    else:
        X_subset, _, y_subset, _ = train_test_split(
            X, y,
            train_size=reduction_ratio,
            stratify=y,
            random_state=42
        )
        return X_subset, y_subset

results = []
data_reduction_levels = [1.0, 0.75, 0.5, 0.25, 0.1]

def encode_sentences_bert(sentences, labels, tokenizer, max_length=128):
    inputs = tokenizer(list(sentences), padding=True, truncation=True, max_length=max_length, return_tensors='pt')
    dataset = TensorDataset(inputs['input_ids'], inputs['attention_mask'], torch.tensor(labels))
    return dataset

def measure_processing_time_llm(model, tokenizer, sentence, device):
    model.eval()
    inputs = tokenizer(sentence, return_tensors='pt', truncation=True, padding=True, max_length=128).to(device)
    
    start_time = time.time()
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1)
        pred = torch.argmax(probs, dim=1).item()
    end_time = time.time()
    
    processing_time_ms = (end_time - start_time) * 1000
    return pred, processing_time_ms
for reduction in data_reduction_levels:
    print(f"\n---\nEvaluating Models with {int(reduction*100)}% of Data\n---")
    
    X_subset_raw, y_subset = get_data_subset(X_resampled_raw, y_resampled, reduction)
    
    print("Subset Class Distribution:", Counter(y_subset))
    print("Subset Raw Text Shape:", X_subset_raw.shape)
    
    X_subset_tfidf = tfidf_vectorizer.transform(X_subset_raw)
    
    # Split data for traditional models
    X_train_tfidf, X_test_tfidf, y_train, y_test = train_test_split(
        X_subset_tfidf, y_subset,
        test_size=0.2,
        stratify=y_subset,
        random_state=42
    )
    
    # Split data for LLMs
    X_train_raw, X_test_raw, y_train_llm, y_test_llm = train_test_split(
        X_subset_raw, y_subset,
        test_size=0.2,
        stratify=y_subset,
        random_state=42
    )
    
    # Naive Bayes
    print("\nTraining and Evaluating Multinomial Naive Bayes...")
    start_time = time.time()
    nb_model.fit(X_train_tfidf, y_train)
    training_time = time.time() - start_time
    y_pred_nb = nb_model.predict(X_test_tfidf)
    y_pred_proba_nb = nb_model.predict_proba(X_test_tfidf)
    metrics_nb = evaluate_model(y_test, y_pred_nb, y_pred_proba_nb, multi_class='ovr')
    
    sample = X_test_tfidf[0]
    start_proc_time = time.time()
    nb_model.predict(sample)
    processing_time = (time.time() - start_proc_time) * 1000
    
    metrics_nb['Data Size (%)'] = int(reduction*100)
    metrics_nb['Model'] = 'NLP Model (MultinomialNB)'
    metrics_nb['Training Time (s)'] = round(training_time, 4)
    metrics_nb['Processing Time (ms)'] = round(processing_time, 4)
    results.append(metrics_nb)
    
    # Decision Tree
    print("\nTraining and Evaluating Decision Tree...")
    start_time = time.time()
    dt_model.fit(X_train_tfidf, y_train)
    training_time = time.time() - start_time
    y_pred_dt = dt_model.predict(X_test_tfidf)
    y_pred_proba_dt = dt_model.predict_proba(X_test_tfidf)
    metrics_dt = evaluate_model(y_test, y_pred_dt, y_pred_proba_dt, multi_class='ovr')
    
    sample = X_test_tfidf[0]
    start_proc_time = time.time()
    dt_model.predict(sample)
    processing_time = (time.time() - start_proc_time) * 1000
    
    metrics_dt['Data Size (%)'] = int(reduction*100)
    metrics_dt['Model'] = 'Decision Tree'
    metrics_dt['Training Time (s)'] = round(training_time, 4)
    metrics_dt['Processing Time (ms)'] = round(processing_time, 4)
    results.append(metrics_dt)
    
    # Random Forest
    print("\nTraining and Evaluating Random Forest...")
    start_time = time.time()
    rf_model.fit(X_train_tfidf, y_train)
    training_time = time.time() - start_time
    y_pred_rf = rf_model.predict(X_test_tfidf)
    y_pred_proba_rf = rf_model.predict_proba(X_test_tfidf)
    metrics_rf = evaluate_model(y_test, y_pred_rf, y_pred_proba_rf, multi_class='ovr')
    
    sample = X_test_tfidf[0]
    start_proc_time = time.time()
    rf_model.predict(sample)
    processing_time = (time.time() - start_proc_time) * 1000
    
    metrics_rf['Data Size (%)'] = int(reduction*100)
    metrics_rf['Model'] = 'Random Forest'
    metrics_rf['Training Time (s)'] = round(training_time, 4)
    metrics_rf['Processing Time (ms)'] = round(processing_time, 4)
    results.append(metrics_rf)
    
    # Neural Network
    print("\nTraining and Evaluating Neural Decision Tree...")
    ndt_model = create_neural_net(input_dim=X_train_tfidf.shape[1], output_dim=len(label_encoder.classes_))
    early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    
    start_time = time.time()
    ndt_model.fit(
        X_train_tfidf.toarray(), y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_test_tfidf.toarray(), y_test),
        callbacks=[early_stop],
        verbose=0
    )
    training_time = time.time() - start_time
    y_pred_ndt_prob = ndt_model.predict(X_test_tfidf.toarray())
    y_pred_ndt = np.argmax(y_pred_ndt_prob, axis=1)
    metrics_ndt = evaluate_model(y_test, y_pred_ndt, y_pred_ndt_prob, multi_class='ovr')
    
    sample = X_test_tfidf[0].toarray()
    start_proc_time = time.time()
    ndt_model.predict(sample)
    processing_time = (time.time() - start_proc_time) * 1000
    
    metrics_ndt['Data Size (%)'] = int(reduction*100)
    metrics_ndt['Model'] = 'Neural Decision Tree'
    metrics_ndt['Training Time (s)'] = round(training_time, 4)
    metrics_ndt['Processing Time (ms)'] = round(processing_time, 4)
    results.append(metrics_ndt)
    
    # BERT
    print("\nTraining and Evaluating BERT (Open-Source LLM)...")
    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_encoder.classes_))
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    bert_model.to(device)
    
    train_dataset_bert = encode_sentences_bert(X_train_raw, y_train, bert_tokenizer)
    test_dataset_bert = encode_sentences_bert(X_test_raw, y_test, bert_tokenizer)
    
    train_loader_bert = DataLoader(train_dataset_bert, batch_size=16, shuffle=True)
    test_loader_bert = DataLoader(test_dataset_bert, batch_size=16)
    
    optimizer_bert = torch.optim.AdamW(bert_model.parameters(), lr=2e-5)
    
    bert_model.train()
    start_time = time.time()
    epochs = 3
    for epoch in range(epochs):
        for batch in train_loader_bert:
            input_ids, attention_mask, labels = [b.to(device) for b in batch]
            optimizer_bert.zero_grad()
            outputs = bert_model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer_bert.step()
        print(f"BERT Epoch {epoch+1}/{epochs} completed.")
    training_time = time.time() - start_time
    
    bert_model.eval()
    preds, true_labels, pred_probs = [], [], []
    
    with torch.no_grad():
        for batch in test_loader_bert:
            input_ids, attention_mask, labels = [b.to(device) for b in batch]
            outputs = bert_model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=1)
            preds.extend(torch.argmax(probs, dim=1).cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
            pred_probs.extend(probs.cpu().numpy())
    
    metrics_bert = evaluate_model(y_test, preds, pred_probs, multi_class='ovr')
    
    sample_sentence = X_test_raw[0]
    pred_bert, proc_time_bert = measure_processing_time_llm(bert_model, bert_tokenizer, sample_sentence, device)
    
    metrics_bert['Data Size (%)'] = int(reduction*100)
    metrics_bert['Model'] = 'BERT (Open-Source LLM)'
    metrics_bert['Training Time (s)'] = round(training_time, 4)
    metrics_bert['Processing Time (ms)'] = round(proc_time_bert, 4)
    results.append(metrics_bert)
    
    # DistilBERT
    print("\nTraining and Evaluating DistilBERT (Lightweight Open-Source LLM)...")
    distilbert_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    distilbert_model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=len(label_encoder.classes_))
    
    distilbert_model.to(device)
    
    train_dataset_distilbert = encode_sentences_bert(X_train_raw, y_train_llm, distilbert_tokenizer)
    test_dataset_distilbert = encode_sentences_bert(X_test_raw, y_test_llm, distilbert_tokenizer)
    
    train_loader_distilbert = DataLoader(train_dataset_distilbert, batch_size=16, shuffle=True)
    test_loader_distilbert = DataLoader(test_dataset_distilbert, batch_size=16)
    
    optimizer_distilbert = torch.optim.AdamW(distilbert_model.parameters(), lr=2e-5)
    
    distilbert_model.train()
    start_time = time.time()
    epochs = 3
    for epoch in range(epochs):
        for batch in train_loader_distilbert:
            input_ids, attention_mask, labels = [b.to(device) for b in batch]
            optimizer_distilbert.zero_grad()
            outputs = distilbert_model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer_distilbert.step()
        print(f"DistilBERT Epoch {epoch+1}/{epochs} completed.")
    training_time = time.time() - start_time
    
    distilbert_model.eval()
    preds, true_labels, pred_probs = [], [], []
    
    with torch.no_grad():
        for batch in test_loader_distilbert:
            input_ids, attention_mask, labels = [b.to(device) for b in batch]
            outputs = distilbert_model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=1)
            preds.extend(torch.argmax(probs, dim=1).cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
            pred_probs.extend(probs.cpu().numpy())
    
    metrics_distilbert = evaluate_model(y_test_llm, preds, pred_probs, multi_class='ovr')
    
    sample_sentence = X_test_raw[0]
    pred_distilbert, proc_time_distilbert = measure_processing_time_llm(distilbert_model, distilbert_tokenizer, sample_sentence, device)
    
    metrics_distilbert['Data Size (%)'] = int(reduction*100)
    metrics_distilbert['Model'] = 'DistilBERT (Lightweight LLM)'
    metrics_distilbert['Training Time (s)'] = round(training_time, 4)
    metrics_distilbert['Processing Time (ms)'] = round(proc_time_distilbert, 4)
    results.append(metrics_distilbert)

# Create final results
results_df = pd.DataFrame(results)
results_df = results_df[['Data Size (%)', 'Model', 'Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC', 'Training Time (s)', 'Processing Time (ms)']]

print("\nFinal Evaluation Results:")
print(results_df)

# results_df.to_csv('model_evaluation_comparison_with_LLMs.csv', index=False)
