# Merged Analysis Notebook

This notebook contains the merged content from:
1. ML_models_with_TF_IDF_and_SkipGram.ipynb
2. cse440project-Glove-with-7-models.ipynb
3. cse440project-skipgram-with-7-models.ipynb

---



## Section 1: ML_models_with_TF_IDF_and_SkipGram



In [None]:
# Install gdown if not available
!pip install gdown --quiet

import gdown
import pandas as pd

# ============================
# Replace these with your links
# ============================
train_url = "https://drive.google.com/file/d/1lRLZyebOdT2UIRvrPKJoE4a6TqGxczZG/view?usp=drive_link"
test_url  = "https://drive.google.com/file/d/1fwcV7K0vq5OiuS33dw_0Y48lZZSuyZg_/view?usp=drive_link"

# Extract file IDs
train_id = train_url.split("/d/")[1].split("/")[0]
test_id  = test_url.split("/d/")[1].split("/")[0]

# Download files
gdown.download(f"https://drive.google.com/uc?id={train_id}", "train.csv", quiet=False)
gdown.download(f"https://drive.google.com/uc?id={test_id}", "test.csv", quiet=False)

# Load datasets
try:
    train = pd.read_csv("train.csv")
    print("Training dataset loaded successfully!")
except FileNotFoundError:
    print("Training file not found.")
    train = None

try:
    test = pd.read_csv("test.csv")
    print("Testing dataset loaded successfully!")
except FileNotFoundError:
    print("Testing file not found. We'll create a test split from the training data.")
    test = None


In [None]:
print(train.shape)
print(test.shape)
train.head()


In [None]:
# =============================
# Step 1: Exploratory Data Analysis
# =============================
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud

# --- Load Data ---
train = pd.read_csv("train.csv")
test  = pd.read_csv("test.csv")

# --- Class Distribution ---
plt.figure(figsize=(12,6))  # wider figure
sns.countplot(x="Class", data=train, order=train['Class'].value_counts().index)
plt.title("Class Distribution (Train Set)")
plt.xticks(rotation=45, ha='right')  # rotate labels
plt.tight_layout()
plt.show()

print("\nClass distribution (%):")
print(train['Class'].value_counts(normalize=True)*100)

# --- Text Length Distribution ---
train['text_length'] = train['QA Text'].apply(len)
test['text_length']  = test['QA Text'].apply(len)

plt.figure(figsize=(10,6))
sns.histplot(train['text_length'], bins=50, kde=True)
plt.title("Distribution of Text Lengths (Train)")
plt.xlabel("Text Length (#characters)")
plt.ylabel("Frequency")
plt.show()

# --- Word Count Distribution ---
train['word_count'] = train['QA Text'].apply(lambda x: len(str(x).split()))

plt.figure(figsize=(12,6))
sns.boxplot(x='Class', y='word_count', data=train)
plt.title("Word Count by Class")
plt.xticks(rotation=45, ha='right')  # rotate labels
plt.tight_layout()
plt.show()

# --- Word Clouds per Class ---
for label in train['Class'].unique():
    subset = train[train['Class'] == label]
    text = " ".join(subset['QA Text'].astype(str).values)
    wordcloud = WordCloud(
        width=800, height=400,
        background_color="white",
        max_words=200
    ).generate(text)

    plt.figure(figsize=(12,6))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.title(f"Word Cloud - Class: {label}", fontsize=16)
    plt.tight_layout()
    plt.show()


In [None]:
# =============================
# Step 2: Preprocessing (Fixed)
# =============================
import pandas as pd
import re, string, nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# --- Download NLTK resources ---
nltk.download("stopwords")
nltk.download("wordnet")

# --- Initialize stopwords and lemmatizer ---
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()

# --- Text cleaning function ---
def clean_text(text):
    text = str(text).lower()  # lowercase
    text = re.sub(f"[{string.punctuation}]", " ", text)  # remove punctuation
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return " ".join(tokens)

# --- Load raw CSV files ---
train = pd.read_csv("train.csv")
test  = pd.read_csv("test.csv")

# --- Apply text cleaning ---
train["clean_text"] = train["QA Text"].apply(clean_text)
test["clean_text"]  = test["QA Text"].apply(clean_text)

# --- Save preprocessed datasets ---
train.to_csv("train_processed.csv", index=False)
test.to_csv("test_processed.csv", index=False)

print("‚úÖ Preprocessing complete! Files saved as 'train_processed.csv' and 'test_processed.csv'")
print(train[["QA Text", "clean_text", "Class"]].head())


In [None]:
# =============================
# Balanced BoW + Random Forest
# =============================
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

# --- Load preprocessed data ---
train = pd.read_csv("train_processed.csv")
test  = pd.read_csv("test_processed.csv")

X_train, y_train = train["clean_text"], train["Class"]
X_test, y_test   = test["clean_text"], test["Class"]

# --- BoW Vectorization (larger vocab for better accuracy) ---
vectorizer = CountVectorizer(max_features=5000)
Xtr = vectorizer.fit_transform(X_train)
Xte = vectorizer.transform(X_test)

# --- Balanced Random Forest with progress ---
n_estimators = 200
model = RandomForestClassifier(
    n_estimators=1,     # start with 1 tree
    max_depth=30,       # deeper trees for better accuracy
    warm_start=True,    # incremental training for progress display
    n_jobs=-1,          # use all CPU cores
    random_state=42
)

print("Training Random Forest (Balanced BoW):")
for i in tqdm(range(1, n_estimators + 1)):
    model.n_estimators = i
    model.fit(Xtr, y_train)
    if i % 20 == 0:  # print every 20 trees
        print(f"Trained {i}/{n_estimators} trees")

# --- Predictions ---
preds = model.predict(Xte)

# --- Metrics ---
print("\nAccuracy:", accuracy_score(y_test, preds))
print("F1 Score:", f1_score(y_test, preds, average="weighted"))
print(classification_report(y_test, preds))

# --- Confusion Matrix ---
cm = confusion_matrix(y_test, preds)
plt.figure(figsize=(12,7))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix - Balanced RandomForest + BoW")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
# =============================
# Balanced TF-IDF + Random Forest
# =============================
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

# --- Load preprocessed data ---
train = pd.read_csv("train_processed.csv")
test  = pd.read_csv("test_processed.csv")

X_train, y_train = train["clean_text"], train["Class"]
X_test, y_test   = test["clean_text"], test["Class"]

# --- TF-IDF Vectorization (larger vocab for better accuracy) ---
vectorizer = TfidfVectorizer(max_features=5000)
Xtr = vectorizer.fit_transform(X_train)
Xte = vectorizer.transform(X_test)

# --- Balanced Random Forest with progress ---
n_estimators = 200
model = RandomForestClassifier(
    n_estimators=1,     # start with 1 tree
    max_depth=30,       # deeper trees for better accuracy
    warm_start=True,    # incremental training for progress display
    n_jobs=-1,
    random_state=42
)

print("Training Random Forest (balanced speed & accuracy):")
for i in tqdm(range(1, n_estimators + 1)):
    model.n_estimators = i
    model.fit(Xtr, y_train)
    if i % 20 == 0:  # print progress every 20 trees
        print(f"Trained {i}/{n_estimators} trees")

# --- Predictions ---
preds = model.predict(Xte)

# --- Metrics ---
print("\nAccuracy:", accuracy_score(y_test, preds))
print("F1 Score:", f1_score(y_test, preds, average="weighted"))
print(classification_report(y_test, preds))

# --- Confusion Matrix ---
cm = confusion_matrix(y_test, preds)
plt.figure(figsize=(12,7))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix - Balanced RandomForest + TF-IDF")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
# =============================
# Logistic Regression + BoW (Light + Progress)
# =============================
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
from sklearn.exceptions import ConvergenceWarning

# --- Suppress convergence warnings ---
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# --- Load preprocessed data ---
train = pd.read_csv("train_processed.csv")
test  = pd.read_csv("test_processed.csv")
X_train, y_train = train["clean_text"], train["Class"]
X_test, y_test   = test["clean_text"], test["Class"]

# --- BoW Vectorization (smaller features for speed) ---
vectorizer = CountVectorizer(max_features=2000)
Xtr = vectorizer.fit_transform(X_train)
Xte = vectorizer.transform(X_test)

# --- Logistic Regression setup with warm_start ---
lr = LogisticRegression(
    max_iter=50,      # iterations per chunk
    solver='saga',
    n_jobs=-1,
    random_state=42,
    warm_start=True   # allows incremental training
)
model = OneVsRestClassifier(lr)

# --- Train in increments with progress ---
print("Training Logistic Regression (BoW, light version) with progress...")
chunks = 20  # total iterations = 50 * 20 = 1000
for i in tqdm(range(chunks)):
    lr.max_iter = 50 * (i + 1)
    model.fit(Xtr, y_train)

print("Training complete!")

# --- Predictions ---
preds = model.predict(Xte)

# --- Metrics ---
print("\nAccuracy:", accuracy_score(y_test, preds))
print("F1 Score:", f1_score(y_test, preds, average="weighted"))
print(classification_report(y_test, preds))

# --- Confusion Matrix ---
cm = confusion_matrix(y_test, preds)
plt.figure(figsize=(12,7))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix - Logistic Regression + BoW (Light + Progress)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
# =============================
# Logistic Regression + TF-IDF (Super-Ultra-Light + Progress)
# =============================
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
from sklearn.exceptions import ConvergenceWarning

# --- Suppress convergence warnings ---
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# --- Load preprocessed data ---
train = pd.read_csv("train_processed.csv")
test  = pd.read_csv("test_processed.csv")
X_train, y_train = train["clean_text"], train["Class"]
X_test, y_test   = test["clean_text"], test["Class"]

# --- TF-IDF Vectorization (very small features for max speed) ---
vectorizer = TfidfVectorizer(max_features=500)
Xtr = vectorizer.fit_transform(X_train)
Xte = vectorizer.transform(X_test)

# --- Logistic Regression setup ---
lr = LogisticRegression(
    max_iter=10,      # very small steps
    solver='saga',
    n_jobs=-1,
    random_state=42,
    warm_start=True
)
model = OneVsRestClassifier(lr)

# --- Train in increments with progress ---
print("Training Logistic Regression (TF-IDF, super-ultra-light) with progress...")
chunks = 5  # total = 10 * 5 = 50 iterations
for i in tqdm(range(chunks)):
    lr.max_iter = 10 * (i + 1)
    model.fit(Xtr, y_train)

print("Training complete!")

# --- Predictions ---
preds = model.predict(Xte)

# --- Metrics ---
print("\nAccuracy:", accuracy_score(y_test, preds))
print("F1 Score:", f1_score(y_test, preds, average="weighted"))
print(classification_report(y_test, preds))

# --- Confusion Matrix ---
cm = confusion_matrix(y_test, preds)
plt.figure(figsize=(10,6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix - Logistic Regression + TF-IDF (Super-Ultra-Light + Progress)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
# =============================
# Naive Bayes + BoW (Super-Fast + Progress)
# =============================
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

# --- Load preprocessed data ---
train = pd.read_csv("train_processed.csv")
test  = pd.read_csv("test_processed.csv")
X_train, y_train = train["clean_text"], train["Class"]
X_test, y_test   = test["clean_text"], test["Class"]

# --- BoW Vectorization (small features for speed) ---
vectorizer = CountVectorizer(max_features=1000)
Xtr = vectorizer.fit_transform(X_train)
Xte = vectorizer.transform(X_test)

# --- Naive Bayes setup ---
model = MultinomialNB()

# --- Simulated incremental training with progression ---
print("Training Naive Bayes (BoW, super-fast) with progress...")
batch_size = int(Xtr.shape[0] / 10)  # split into 10 mini-batches
for i in tqdm(range(10)):
    start = i * batch_size
    end = (i + 1) * batch_size if i < 9 else Xtr.shape[0]
    model.partial_fit(Xtr[start:end], y_train[start:end], classes=y_train.unique())

print("Training complete!")

# --- Predictions ---
preds = model.predict(Xte)

# --- Metrics ---
print("\nAccuracy:", accuracy_score(y_test, preds))
print("F1 Score:", f1_score(y_test, preds, average="weighted"))
print(classification_report(y_test, preds))

# --- Confusion Matrix ---
cm = confusion_matrix(y_test, preds)
plt.figure(figsize=(10,6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix - Naive Bayes + BoW (Super-Fast + Progress)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
# =============================
# Naive Bayes + TF-IDF (Super-Fast + Progress)
# =============================
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

# --- Load preprocessed data ---
train = pd.read_csv("train_processed.csv")
test  = pd.read_csv("test_processed.csv")
X_train, y_train = train["clean_text"], train["Class"]
X_test, y_test   = test["clean_text"], test["Class"]

# --- TF-IDF Vectorization (small features for speed) ---
vectorizer = TfidfVectorizer(max_features=1000)
Xtr = vectorizer.fit_transform(X_train)
Xte = vectorizer.transform(X_test)

# --- Naive Bayes setup ---
model = MultinomialNB()

# --- Incremental training with progress ---
print("Training Naive Bayes (TF-IDF, super-fast) with progress...")
batch_size = int(Xtr.shape[0] / 10)  # split into 10 mini-batches
for i in tqdm(range(10)):
    start = i * batch_size
    end = (i + 1) * batch_size if i < 9 else Xtr.shape[0]
    model.partial_fit(Xtr[start:end], y_train[start:end], classes=y_train.unique())

print("Training complete!")

# --- Predictions ---
preds = model.predict(Xte)

# --- Metrics ---
print("\nAccuracy:", accuracy_score(y_test, preds))
print("F1 Score:", f1_score(y_test, preds, average="weighted"))
print(classification_report(y_test, preds))

# --- Confusion Matrix ---
cm = confusion_matrix(y_test, preds)
plt.figure(figsize=(10,6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix - Naive Bayes + TF-IDF (Super-Fast + Progress)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
# =============================
# Deep Neural Network + TF-IDF
# =============================
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
from tqdm import tqdm

# --- Load preprocessed data ---
train = pd.read_csv("train_processed.csv")
test  = pd.read_csv("test_processed.csv")
X_train, y_train = train["clean_text"], train["Class"]
X_test, y_test   = test["clean_text"], test["Class"]

# --- TF-IDF Vectorization (lightweight features) ---
vectorizer = TfidfVectorizer(max_features=1000)
Xtr = vectorizer.fit_transform(X_train).toarray()
Xte = vectorizer.transform(X_test).toarray()

# --- Encode labels ---
encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_test_enc  = encoder.transform(y_test)

# --- Build a lightweight DNN ---
model = models.Sequential([
    layers.Input(shape=(1000,)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(len(encoder.classes_), activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# --- Training with progress bar ---
epochs = 10
batch_size = 128
print("Training DNN  with progress...")

for epoch in tqdm(range(1, epochs + 1)):
    history = model.fit(Xtr, y_train_enc,
                        epochs=1,
                        batch_size=batch_size,
                        verbose=0,
                        validation_data=(Xte, y_test_enc))
    acc = history.history['accuracy'][0]
    val_acc = history.history['val_accuracy'][0]
    print(f"Epoch {epoch}/{epochs} - loss: {history.history['loss'][0]:.4f} "
          f"- acc: {acc:.4f} - val_acc: {val_acc:.4f}")

print("Training complete!")

# --- Predictions ---
preds = model.predict(Xte).argmax(axis=1)

# --- Metrics ---
print("\nAccuracy:", accuracy_score(y_test_enc, preds))
print("F1 Score:", f1_score(y_test_enc, preds, average="weighted"))
print(classification_report(y_test_enc, preds, target_names=encoder.classes_))

# --- Confusion Matrix ---
cm = confusion_matrix(y_test_enc, preds)
plt.figure(figsize=(12,7))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=encoder.classes_,
            yticklabels=encoder.classes_,
            cbar=True)
plt.title("Confusion Matrix - DNN + TF-IDF ")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.xticks(rotation=45, ha="right")
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()


In [None]:
# =============================
# Deep Neural Network + BoW (Light + Progress)
# =============================
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
from tqdm import tqdm

# --- Load preprocessed data ---
train = pd.read_csv("train_processed.csv")
test  = pd.read_csv("test_processed.csv")
X_train, y_train = train["clean_text"], train["Class"]
X_test, y_test   = test["clean_text"], test["Class"]

# --- BoW Vectorization (lightweight features) ---
vectorizer = CountVectorizer(max_features=1000)
Xtr = vectorizer.fit_transform(X_train).toarray()
Xte = vectorizer.transform(X_test).toarray()

# --- Encode labels ---
encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_test_enc  = encoder.transform(y_test)

# --- Build a lightweight DNN ---
model = models.Sequential([
    layers.Input(shape=(1000,)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(len(encoder.classes_), activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# --- Training with progress bar ---
epochs = 10
batch_size = 128
print("Training DNN (BoW, light) with progress...")

for epoch in tqdm(range(1, epochs + 1)):
    history = model.fit(Xtr, y_train_enc,
                        epochs=1,
                        batch_size=batch_size,
                        verbose=0,
                        validation_data=(Xte, y_test_enc))
    acc = history.history['accuracy'][0]
    val_acc = history.history['val_accuracy'][0]
    print(f"Epoch {epoch}/{epochs} - loss: {history.history['loss'][0]:.4f} "
          f"- acc: {acc:.4f} - val_acc: {val_acc:.4f}")

print("Training complete!")

# --- Predictions ---
preds = model.predict(Xte).argmax(axis=1)

# --- Metrics ---
print("\nAccuracy:", accuracy_score(y_test_enc, preds))
print("F1 Score:", f1_score(y_test_enc, preds, average="weighted"))
print(classification_report(y_test_enc, preds, target_names=encoder.classes_))

# --- Confusion Matrix ---
cm = confusion_matrix(y_test_enc, preds)
plt.figure(figsize=(12,7))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=encoder.classes_,
            yticklabels=encoder.classes_)
plt.title("Confusion Matrix - DNN + BoW (Light)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


1. Accuracy & F1 Comparison (Bar Plot)

In [None]:
import matplotlib.pyplot as plt

# Data from your report
models = [
    "LR + BoW", "DNN + TF-IDF", "DNN + BoW",
    "NB + TF-IDF", "NB + BoW", "RF + TF-IDF",
    "LR + TF-IDF", "RF + BoW"
]
accuracy = [0.6417, 0.6260, 0.6242, 0.6008, 0.5915, 0.5602, 0.5604, 0.5549]
f1 = [0.6385, 0.6201, 0.6187, 0.5966, 0.5881, 0.5562, 0.5552, 0.5536]

x = range(len(models))
plt.figure(figsize=(12,6))
plt.bar(x, accuracy, width=0.4, label="Accuracy", align='center')
plt.bar(x, f1, width=0.4, label="Macro F1", align='edge')

plt.xticks(x, models, rotation=45, ha="right")
plt.ylabel("Score")
plt.title("Model Performance Comparison (Accuracy vs Macro F1)")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
import numpy as np

x = np.arange(len(models))  # model positions
width = 0.35

plt.figure(figsize=(12,6))
plt.bar(x - width/2, accuracy, width, label='Accuracy')
plt.bar(x + width/2, f1, width, label='Macro F1')

plt.xticks(x, models, rotation=45, ha="right")
plt.ylabel("Score")
plt.title("Accuracy vs F1 across Models")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd

df = pd.DataFrame({"Model": models, "Accuracy": accuracy, "F1": f1})
df_sorted = df.sort_values("Accuracy", ascending=False)

plt.figure(figsize=(10,6))
plt.plot(df_sorted["Model"], df_sorted["Accuracy"], marker="o", label="Accuracy")
plt.plot(df_sorted["Model"], df_sorted["F1"], marker="s", label="Macro F1")

plt.xticks(rotation=45, ha="right")
plt.ylabel("Score")
plt.title("Model Ranking by Accuracy and F1")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.show()


SIFAR CODES



## Section 2: cse440project-Glove-with-7-models



# libraries

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
import re
import random
import nltk
from nltk.tokenize import word_tokenize
import numpy as np
import pandas as pd
import ast
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, GRU, LSTM, Bidirectional, Dense, TimeDistributed
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
from itertools import chain
import seaborn as sns
from collections import Counter
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from sklearn.model_selection import train_test_split

# loading the dataset

In [None]:
import pandas as pd          # for data handling
import gdown                 # for downloading files from Google Drive

# Google Drive share links
train_file_link = "https://drive.google.com/file/d/1lRLZyebOdT2UIRvrPKJoE4a6TqGxczZG/view?usp=sharing"
test_file_link  = "https://drive.google.com/file/d/1NMHO5rEzDoY8v4SQoLXoXl2h8yrrvegs/view?usp=drive_link"

# Extract file IDs from the links
train_id = train_file_link.split("/")[-2]
test_id  = test_file_link.split("/")[-2]

# Download files using gdown and save locally
gdown.download(f"https://drive.google.com/uc?id={train_id}", "train.csv", quiet=False)
gdown.download(f"https://drive.google.com/uc?id={test_id}",  "test.csv",  quiet=False)

# Load CSV files into pandas DataFrames
train_df = pd.read_csv("train.csv")
test_df  = pd.read_csv("test.csv")


In [None]:
# Show first 3 rows of training dataset
print(train_df.head(3).to_string())
print(test_df.head(3).to_string())

# triming the dataset

In [None]:
# #triming the dataset
# import pandas as pd
# print("Original size:", len(train_df))
# print("Original size:", len(test_df))

# # Keep one third of the dataset
# train_df = train_df.sample(frac=0.5/3, random_state=42)
# test_df = test_df.sample(frac=0.5/3, random_state=42)
# print("Reduced size:", len(train_df))
# print("Reduced size:", len(test_df))

# print(train_df.head(3).to_string())
# print(test_df.head(3).to_string())

# spliting the  "QA Text" column

In [None]:
# Function to extract parts from QA Text
def split_qa(text):
    if pd.isna(text):
        return {"Question Title": None, "Question Content": None, "Best Answer": None}

    title = re.search(r"Question Title:\s*(.*?)\s*Question Content:", text, re.S)
    content = re.search(r"Question Content:\s*(.*?)\s*Best Answer:", text, re.S)
    answer = re.search(r"Best Answer:\s*(.*)", text, re.S)

    return {
        "Question Title": title.group(1).strip() if title else None,
        "Question Content": content.group(1).strip() if content else None,
        "Best Answer": answer.group(1).strip() if answer else None
    }

# Apply the function to QA Text column
qa_split_train = train_df["QA Text"].apply(split_qa).apply(pd.Series)
qa_split_test = test_df["QA Text"].apply(split_qa).apply(pd.Series)
# Merge with class column
train_df = pd.concat([qa_split_train, train_df["Class"]], axis=1)
test_df = pd.concat([qa_split_test, test_df["Class"]], axis=1)
# Preview cleaned data
print(train_df.head(3))
print(test_df.head(3))


# lowercase the dataset

In [None]:
#train
# List of text columns to convert to lowercase
# Hello and hello tokens are different, if not do lowercase
text_columns = ["Question Title", "Question Content", "Best Answer", "Class"]

# Apply to train and test dataset
for col in text_columns:
    train_df[col] = train_df[col].apply(lambda x: x.lower() if isinstance(x, str) else x)
    test_df[col] = test_df[col].apply(lambda x: x.lower() if isinstance(x, str) else x)


# Preview first 2 rows of train dataset
print(train_df.head(2).to_string())
print(test_df.head(2).to_string())


# remove punctuation, normalize spaces, remove newline char(train dataset)

In [None]:
# Improved cleaning function
def clean_text(text):
    if pd.isna(text):
        return ""
    text = re.sub(r"\\n", " ", text)       # remove literal \n sequences
    text = re.sub(r"[\n\r]", " ", text)    # remove actual newline characters
    text = re.sub(r"[^a-z0-9\s]", " ", text)  # keep only letters, numbers, spaces
    text = re.sub(r"\s+", " ", text).strip()  # normalize multiple spaces
    return text
# Apply separately to each column
for col in ["Question Title", "Question Content", "Best Answer"]:
    train_df[col] = train_df[col].apply(clean_text)
    test_df[col] = test_df[col].apply(clean_text)
# Preview
print(train_df.head(3).to_string())
print(test_df.head(3).to_string())


# removing stop words

In [None]:
import nltk
from nltk.corpus import stopwords
# Download stopwords
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))
def remove_stopwords(text):
    if pd.isna(text):
        return text
    return " ".join([word for word in text.split() if word.lower() not in stop_words])


# Apply stopword removal to all three text columns
for col in ["Question Title", "Question Content", "Best Answer"]:
    train_df[col] = train_df[col].apply(remove_stopwords)
    test_df[col] = test_df[col].apply(remove_stopwords)


# Preview result
print(train_df.sample(3).to_string())
print(test_df.sample(3).to_string())

# tokenization

In [None]:
# Download the correct tokenizer
from nltk.tokenize import word_tokenize
nltk.download("punkt")
nltk.download('punkt_tab')


# Function to tokenize text
def tokenize_text(text):
    if pd.isna(text) or text == "":
        return []
    return word_tokenize(text)


# Apply tokenization to each text column
for col in ["Question Title", "Question Content", "Best Answer"]:
    train_df[col] = train_df[col].apply(tokenize_text)
    test_df[col] = test_df[col].apply(tokenize_text)


# Preview tokenized data
print(train_df.sample(3).to_string())
print(test_df.sample(3).to_string())

# integer mapping/token indexing

In [None]:
# Combine all text columns into a single corpus
train_texts = list(train_df["Question Title"].apply(lambda x: " ".join(x))) + \
              list(train_df["Question Content"].apply(lambda x: " ".join(x))) + \
              list(train_df["Best Answer"].apply(lambda x: " ".join(x)))

test_texts  = list(test_df["Question Title"].apply(lambda x: " ".join(x))) + \
              list(test_df["Question Content"].apply(lambda x: " ".join(x))) + \
              list(test_df["Best Answer"].apply(lambda x: " ".join(x)))



# Fit tokenizer ONLY on train data
# Tokenizer fit only on train data ‚Üí prevents test data leakage.
# <OOV> token handles any word in the test set not seen in train data.
max_vocab=30000
tokenizer = Tokenizer(num_words=max_vocab, oov_token="<OOV>")
tokenizer.fit_on_texts(train_texts)


# Convert token lists ‚Üí integer sequences
train_df["Question Title"]   = tokenizer.texts_to_sequences(train_df["Question Title"].apply(lambda x: " ".join(x)))
train_df["Question Content"] = tokenizer.texts_to_sequences(train_df["Question Content"].apply(lambda x: " ".join(x)))
train_df["Best Answer"]      = tokenizer.texts_to_sequences(train_df["Best Answer"].apply(lambda x: " ".join(x)))


test_df["Question Title"]   = tokenizer.texts_to_sequences(test_df["Question Title"].apply(lambda x: " ".join(x)))
test_df["Question Content"] = tokenizer.texts_to_sequences(test_df["Question Content"].apply(lambda x: " ".join(x)))
test_df["Best Answer"]      = tokenizer.texts_to_sequences(test_df["Best Answer"].apply(lambda x: " ".join(x)))


# Preview sequences
print(train_df.head(3).to_string())
print(test_df.head(3).to_string())

# padding

In [None]:
# Find max lengths for each column
#nn needs same length for train and test data, so we dont max len of both separately
max_len_title = max(train_df["Question Title"].apply(len))
max_len_content = max(train_df["Question Content"].apply(len))
max_len_answer = max(train_df["Best Answer"].apply(len))
print("Max Lengths:", max_len_title, max_len_content, max_len_answer)

# Apply padding (post-padding with zeros)
train_df["Question Title"]   = list(pad_sequences(train_df["Question Title"], maxlen=max_len_title, padding="post", truncating="post"))
train_df["Question Content"] = list(pad_sequences(train_df["Question Content"], maxlen=max_len_content, padding="post", truncating="post"))
train_df["Best Answer"]      = list(pad_sequences(train_df["Best Answer"], maxlen=max_len_answer, padding="post", truncating="post"))

test_df["Question Title"]   = list(pad_sequences(test_df["Question Title"], maxlen=max_len_title, padding="post", truncating="post"))
test_df["Question Content"] = list(pad_sequences(test_df["Question Content"], maxlen=max_len_content, padding="post", truncating="post"))
test_df["Best Answer"]      = list(pad_sequences(test_df["Best Answer"], maxlen=max_len_answer, padding="post", truncating="post"))


# Preview
print(train_df.head(3).to_string())
print(test_df.head(3).to_string())


# checking imbalance on label(train,test)

In [None]:
# train
label_percentages = train_df["Class"].value_counts(normalize=True) * 100
print(label_percentages)

#test
label_percentages_test = test_df["Class"].value_counts(normalize=True) * 100
print(label_percentages_test)

# encoding label

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import numpy as np

# Step 1: Initialize LabelEncoder
label_encoder = LabelEncoder()

# Step 2: Fit on training labels and transform both train and test
train_labels_int = label_encoder.fit_transform(train_df["Class"])
test_labels_int  = label_encoder.transform(test_df["Class"])

# Step 3: Convert integer labels ‚Üí one-hot encoding
encoded_train = to_categorical(train_labels_int)
encoded_test  = to_categorical(test_labels_int)

# Step 4: Replace "Class" column with one-hot encoded arrays
train_df["Class"] = list(encoded_train)
test_df["Class"]  = list(encoded_test)


# Preview
print(train_df.head(3).to_string())
print(test_df.head(3).to_string())

# train and test data spliting

In [None]:
import numpy as np

# Merge Title + Content + Answer row-wise into one long sequence
X_train = np.array([list(t) + list(c) + list(a) for t, c, a in zip(
    train_df["Question Title"], train_df["Question Content"], train_df["Best Answer"]
)])
X_test = np.array([list(t) + list(c) + list(a) for t, c, a in zip(
    test_df["Question Title"], test_df["Question Content"], test_df["Best Answer"]
)])

# Labels (already one-hot encoded)
y_train = np.array(train_df["Class"].tolist())
y_test  = np.array(test_df["Class"].tolist())

print("Final Shapes:")
print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_test:", X_test.shape, "y_test:", y_test.shape)


# glove embedding

In [None]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove.6B.zip


In [None]:
embedding_index = {}
embedding_dim = 100  # Must match the GloVe file you use

with open("glove.6B.100d.txt", encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = np.array(values[1:], dtype='float32')
        embedding_index[word] = vector

print("Number of words in GloVe:", len(embedding_index))


vocab_size = max_vocab  # Same as your tokenizer's num_words

# Initialize matrix with zeros
embedding_matrix = np.zeros((vocab_size, embedding_dim))

for word, i in tokenizer.word_index.items():
    if i >= vocab_size:
        continue
    embedding_vector = embedding_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector


# rnn

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, GRU, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping

# Simple RNN Classifier
max_len = max_len_title + max_len_content + max_len_answer
embedding_dim = embedding_matrix.shape[1]  # e.g., 100 if using glove.6B.100d.txt
with tf.device('/GPU:0'):
        rnn_model = Sequential([
            Embedding(
                input_dim=max_vocab,           # same as tokenizer num_words
                output_dim=embedding_dim,      # dimension of GloVe vectors
                weights=[embedding_matrix],    # use pre-trained GloVe embeddings
                input_length=max_len,          # padded input length
                trainable=False                # keep embeddings fixed
            ),
            SimpleRNN(32),
            Dropout(0.5),                      # regularization
            Dense(10, activation='softmax')    # 10 classes (adjust if needed)
        ])

        rnn_model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

        early_stop = EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )
        rnn_model.fit(
            X_train, y_train,
            epochs=5,
            batch_size=64,
            validation_split=0.2,
            callbacks=[early_stop]
            )


        #saving the model
        rnn_model.save("rnn_model.keras")

        test_loss, test_acc = rnn_model.evaluate(X_test, y_test)
        print("Test Accuracy:", test_acc)


In [None]:
#loading model
import tensorflow as tf
rnn_model = tf.keras.models.load_model('rnn_model.keras')


#Metrics: precision / recall / F1 / confusion matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
test_loss, test_acc = rnn_model.evaluate(X_test, y_test, verbose=0)
y_prob = rnn_model.predict(X_test, batch_size=512, verbose=0)
y_pred = np.argmax(y_prob, axis=1)
y_true = np.argmax(y_test, axis=1)


accuracy_rnn  = accuracy_score(y_true, y_pred)
precision_rnn = precision_score(y_true, y_pred, average='macro',   zero_division=0)
recall_rnn  = recall_score(y_true,  y_pred, average='macro',     zero_division=0)
f1_rnn   = f1_score(y_true,      y_pred, average='macro',     zero_division=0)
cm_rnn   = confusion_matrix(y_true, y_pred)


print("Accuracy:", accuracy_rnn)
print("Precision (macro):", precision_rnn)
print("Recall (macro):", recall_rnn)
print("F1 (macro):", f1_rnn)
print("Confusion Matrix:\n", cm_rnn)


# Optional: class names if you used a LabelEncoder earlier
labels = label_encoder.classes_.tolist() if 'label_encoder' in globals() else None
print("\nClassification Report:\n",
      classification_report(y_true, y_pred, target_names=labels))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import ConfusionMatrixDisplay

# Plot confusion matrix with seaborn
plt.figure(figsize=(8,6))
sns.heatmap(cm_rnn, annot=True, fmt="d", cmap="Blues",
            xticklabels=labels if labels else None,
            yticklabels=labels if labels else None)

plt.title("Confusion Matrix - RNN Model")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()


# bidirectional rnn

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, GRU, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping

# Simple RNN Classifier
max_len = max_len_title + max_len_content + max_len_answer
embedding_dim = embedding_matrix.shape[1]  # e.g., 100 if using glove.6B.100d.txt
with tf.device('/GPU:0'):
        brnn_model = Sequential([
            Embedding(
                input_dim=max_vocab,           # same as tokenizer num_words
                output_dim=embedding_dim,      # dimension of GloVe vectors
                weights=[embedding_matrix],    # use pre-trained GloVe embeddings
                input_length=max_len,          # padded input length
                trainable=False                # keep embeddings fixed
            ),
            Bidirectional(SimpleRNN(32)),
            Dropout(0.5),                      # regularization
            Dense(10, activation='softmax')    # 10 classes (adjust if needed)
        ])


        brnn_model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])


        early_stop = EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )

        brnn_model.fit(
            X_train, y_train,
            epochs=5,
            batch_size=64,
            validation_split=0.2,
            callbacks=[early_stop]
            )

        #saving the model
        brnn_model.save("brnn_model.keras")


        test_loss, test_acc = brnn_model.evaluate(X_test, y_test)
        print("Test Accuracy:", test_acc)

In [None]:
#loading model
import tensorflow as tf
brnn_model = tf.keras.models.load_model('brnn_model.keras')


#Metrics: precision / recall / F1 / confusion matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
test_loss, test_acc = brnn_model.evaluate(X_test, y_test, verbose=0)
y_prob = brnn_model.predict(X_test, batch_size=512, verbose=0)
y_pred = np.argmax(y_prob, axis=1)
y_true = np.argmax(y_test, axis=1)


accuracy_brnn  = accuracy_score(y_true, y_pred)
precision_brnn = precision_score(y_true, y_pred, average='macro',   zero_division=0)
recall_brnn  = recall_score(y_true,  y_pred, average='macro',     zero_division=0)
f1_brnn   = f1_score(y_true,      y_pred, average='macro',     zero_division=0)
cm_brnn   = confusion_matrix(y_true, y_pred)


print("Accuracy:", accuracy_brnn)
print("Precision (macro):", precision_brnn)
print("Recall (macro):", recall_brnn)
print("F1 (macro):", f1_brnn)
print("Confusion Matrix:\n", cm_brnn)


# Optional: class names if you used a LabelEncoder earlier
labels = label_encoder.classes_.tolist() if 'label_encoder' in globals() else None
print("\nClassification Report:\n",
      classification_report(y_true, y_pred, target_names=labels))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import ConfusionMatrixDisplay

# Plot confusion matrix with seaborn for Bi-RNN
plt.figure(figsize=(8,6))
sns.heatmap(cm_brnn, annot=True, fmt="d", cmap="Blues",
            xticklabels=labels if labels else None,
            yticklabels=labels if labels else None)

plt.title("Confusion Matrix - Bi-RNN Model")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()


# GRU

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, GRU, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping

# Simple RNN Classifier
max_len = max_len_title + max_len_content + max_len_answer
embedding_dim = embedding_matrix.shape[1]  # e.g., 100 if using glove.6B.100d.txt
with tf.device('/GPU:0'):
        gru_model = Sequential([
            Embedding(
                input_dim=max_vocab,           # same as tokenizer num_words
                output_dim=embedding_dim,      # dimension of GloVe vectors
                weights=[embedding_matrix],    # use pre-trained GloVe embeddings
                input_length=max_len,          # padded input length
                trainable=False                # keep embeddings fixed
            ),
            GRU(32),
            Dropout(0.5),                      # regularization
            Dense(10, activation='softmax')    # 10 classes (adjust if needed)
        ])


        gru_model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])


        early_stop = EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )


        gru_model.fit(
            X_train, y_train,
            epochs=5,
            batch_size=64,
            validation_split=0.2,
            callbacks=[early_stop]
            )


        #saving the model
        gru_model.save("gru_model.keras")

        test_loss, test_acc = gru_model.evaluate(X_test, y_test)
        print("Test Accuracy:", test_acc)

In [None]:
#loading model
import tensorflow as tf
gru_model = tf.keras.models.load_model('gru_model.keras')


#Metrics: precision / recall / F1 / confusion matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
test_loss, test_acc = gru_model.evaluate(X_test, y_test, verbose=0)
y_prob = gru_model.predict(X_test, batch_size=512, verbose=0)
y_pred = np.argmax(y_prob, axis=1)
y_true = np.argmax(y_test, axis=1)


accuracy_gru  = accuracy_score(y_true, y_pred)
precision_gru = precision_score(y_true, y_pred, average='macro',   zero_division=0)
recall_gru  = recall_score(y_true,  y_pred, average='macro',     zero_division=0)
f1_gru   = f1_score(y_true,      y_pred, average='macro',     zero_division=0)
cm_gru   = confusion_matrix(y_true, y_pred)


print("Accuracy:", accuracy_gru)
print("Precision (macro):", precision_gru)
print("Recall (macro):", recall_gru)
print("F1 (macro):", f1_gru)
print("Confusion Matrix:\n", cm_gru)


# Optional: class names if you used a LabelEncoder earlier
labels = label_encoder.classes_.tolist() if 'label_encoder' in globals() else None
print("\nClassification Report:\n",
      classification_report(y_true, y_pred, target_names=labels))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot confusion matrix heatmap for GRU
plt.figure(figsize=(8,6))
sns.heatmap(cm_gru, annot=True, fmt="d", cmap="Blues",
            xticklabels=labels if labels else None,
            yticklabels=labels if labels else None)

plt.title("Confusion Matrix - GRU Model")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()


# bidirectional GRU

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, GRU, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping

# Simple RNN Classifier
max_len = max_len_title + max_len_content + max_len_answer
embedding_dim = embedding_matrix.shape[1]  # e.g., 100 if using glove.6B.100d.txt
with tf.device('/GPU:0'):
        bgru_model = Sequential([
            Embedding(
                input_dim=max_vocab,           # same as tokenizer num_words
                output_dim=embedding_dim,      # dimension of GloVe vectors
                weights=[embedding_matrix],    # use pre-trained GloVe embeddings
                input_length=max_len,          # padded input length
                trainable=False                # keep embeddings fixed
            ),
            Bidirectional(GRU(32)),
            Dropout(0.5),                      # regularization
            Dense(10, activation='softmax')    # 10 classes (adjust if needed)
        ])


        bgru_model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

        early_stop = EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )

        bgru_model.fit(
            X_train, y_train,
            epochs=5,
            batch_size=128,
            validation_split=0.2,
            callbacks=[early_stop]
            )


        #saving the model
        bgru_model.save("bgru_model.keras")

        test_loss, test_acc = bgru_model.evaluate(X_test, y_test)
        print("Test Accuracy:", test_acc)

In [None]:
#loading model
import tensorflow as tf
bgru_modelmodel = tf.keras.models.load_model('bgru_model.keras')


#Metrics: precision / recall / F1 / confusion matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
test_loss, test_acc = bgru_model.evaluate(X_test, y_test, verbose=0)
y_prob = bgru_model.predict(X_test, batch_size=512, verbose=0)
y_pred = np.argmax(y_prob, axis=1)
y_true = np.argmax(y_test, axis=1)


accuracy_bgru  = accuracy_score(y_true, y_pred)
precision_bgru = precision_score(y_true, y_pred, average='macro',   zero_division=0)
recall_bgru  = recall_score(y_true,  y_pred, average='macro',     zero_division=0)
f1_bgru   = f1_score(y_true,      y_pred, average='macro',     zero_division=0)
cm_bgru   = confusion_matrix(y_true, y_pred)


print("Accuracy:", accuracy_bgru)
print("Precision (macro):", precision_bgru)
print("Recall (macro):", recall_bgru)
print("F1 (macro):", f1_bgru)
print("Confusion Matrix:\n", cm_bgru)


# Optional: class names if you used a LabelEncoder earlier
labels = label_encoder.classes_.tolist() if 'label_encoder' in globals() else None
print("\nClassification Report:\n",
      classification_report(y_true, y_pred, target_names=labels))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot confusion matrix heatmap for Bidirectional GRU
plt.figure(figsize=(8,6))
sns.heatmap(cm_bgru, annot=True, fmt="d", cmap="Blues",
            xticklabels=labels if labels else None,
            yticklabels=labels if labels else None)

plt.title("Confusion Matrix - Bidirectional GRU Model")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()


# Lstm

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, GRU, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping

# Simple RNN Classifier
max_len = max_len_title + max_len_content + max_len_answer
embedding_dim = embedding_matrix.shape[1]  # e.g., 100 if using glove.6B.100d.txt
with tf.device('/GPU:0'):
        lstm_model = Sequential([
            Embedding(
                input_dim=max_vocab,           # same as tokenizer num_words
                output_dim=embedding_dim,      # dimension of GloVe vectors
                weights=[embedding_matrix],    # use pre-trained GloVe embeddings
                input_length=max_len,          # padded input length
                trainable=False                # keep embeddings fixed
            ),
            LSTM(32),
            Dropout(0.5),                      # regularization
            Dense(10, activation='softmax')    # 10 classes (adjust if needed)
        ])


        lstm_model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])


        early_stop = EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )


        lstm_model.fit(
            X_train, y_train,
            epochs=5,
            batch_size=128,
            validation_split=0.2,
            callbacks=[early_stop]
            )

        #saving the model
        lstm_model.save("lstm_model.keras")

        test_loss, test_acc = lstm_model.evaluate(X_test, y_test)
        print("Test Accuracy:", test_acc)

In [None]:
#loading model
import tensorflow as tf
lstm_model = tf.keras.models.load_model('lstm_model.keras')


#Metrics: precision / recall / F1 / confusion matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
test_loss, test_acc = lstm_model.evaluate(X_test, y_test, verbose=0)
y_prob = lstm_model.predict(X_test, batch_size=512, verbose=0)
y_pred = np.argmax(y_prob, axis=1)
y_true = np.argmax(y_test, axis=1)


accuracy_lstm  = accuracy_score(y_true, y_pred)
precision_lstm = precision_score(y_true, y_pred, average='macro',   zero_division=0)
recall_lstm  = recall_score(y_true,  y_pred, average='macro',     zero_division=0)
f1_lstm   = f1_score(y_true,      y_pred, average='macro',     zero_division=0)
cm_lstm   = confusion_matrix(y_true, y_pred)


print("Accuracy:", accuracy_lstm)
print("Precision (macro):", precision_lstm)
print("Recall (macro):", recall_lstm)
print("F1 (macro):", f1_lstm)
print("Confusion Matrix:\n", cm_lstm)


# Optional: class names if you used a LabelEncoder earlier
labels = label_encoder.classes_.tolist() if 'label_encoder' in globals() else None
print("\nClassification Report:\n",
      classification_report(y_true, y_pred, target_names=labels))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot confusion matrix heatmap for LSTM
plt.figure(figsize=(8,6))
sns.heatmap(cm_lstm, annot=True, fmt="d", cmap="Blues",
            xticklabels=labels if labels else None,
            yticklabels=labels if labels else None)

plt.title("Confusion Matrix - LSTM Model")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()


# bidirectional lstm

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, GRU, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping


# Simple RNN Classifier
max_len = max_len_title + max_len_content + max_len_answer
embedding_dim = embedding_matrix.shape[1]  # e.g., 100 if using glove.6B.100d.txt
with tf.device('/GPU:0'):
        blstm_model = Sequential([
            Embedding(
                input_dim=max_vocab,           # same as tokenizer num_words
                output_dim=embedding_dim,      # dimension of GloVe vectors
                weights=[embedding_matrix],    # use pre-trained GloVe embeddings
                input_length=max_len,          # padded input length
                trainable=False                # keep embeddings fixed
            ),
            Bidirectional(LSTM(32)),
            Dropout(0.5),                      # regularization
            Dense(10, activation='softmax')    # 10 classes (adjust if needed)
        ])


        blstm_model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])


        early_stop = EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )


        blstm_model.fit(
            X_train, y_train,
            epochs=5,
            batch_size=128,
            validation_split=0.2,
            callbacks=[early_stop]
            )

        #saving the model
        blstm_model.save("blstm_model.keras")

        test_loss, test_acc = blstm_model.evaluate(X_test, y_test)
        print("Test Accuracy:", test_acc)

In [None]:
#loading model
import tensorflow as tf
blstm_model = tf.keras.models.load_model('blstm_model.keras')


#Metrics: precision / recall / F1 / confusion matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
test_loss, test_acc = blstm_model.evaluate(X_test, y_test, verbose=0)
y_prob = blstm_model.predict(X_test, batch_size=512, verbose=0)
y_pred = np.argmax(y_prob, axis=1)
y_true = np.argmax(y_test, axis=1)


accuracy_blstm  = accuracy_score(y_true, y_pred)
precision_blstm = precision_score(y_true, y_pred, average='macro',   zero_division=0)
recall_blstm  = recall_score(y_true,  y_pred, average='macro',     zero_division=0)
f1_blstm   = f1_score(y_true,      y_pred, average='macro',     zero_division=0)
cm_blstm   = confusion_matrix(y_true, y_pred)


print("Accuracy:", accuracy_blstm)
print("Precision (macro):", precision_blstm)
print("Recall (macro):", recall_blstm)
print("F1 (macro):", f1_blstm)
print("Confusion Matrix:\n", cm_blstm)


# Optional: class names if you used a LabelEncoder earlier
labels = label_encoder.classes_.tolist() if 'label_encoder' in globals() else None
print("\nClassification Report:\n",
      classification_report(y_true, y_pred, target_names=labels))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot confusion matrix heatmap for Bidirectional LSTM
plt.figure(figsize=(8,6))
sns.heatmap(cm_blstm, annot=True, fmt="d", cmap="Blues",
            xticklabels=labels if labels else None,
            yticklabels=labels if labels else None)

plt.title("Confusion Matrix - Bidirectional LSTM Model")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()


# dnn


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np

# DNN Classifier
max_len = max_len_title + max_len_content + max_len_answer
embedding_dim = embedding_matrix.shape[1]  # e.g., 100 if using glove.6B.100d.txt

with tf.device('/GPU:0'):
    dnn_model = Sequential([
        Embedding(
            input_dim=max_vocab,           # same as tokenizer num_words
            output_dim=embedding_dim,      # dimension of GloVe vectors
            weights=[embedding_matrix],    # use pre-trained GloVe embeddings
            input_length=max_len,          # padded input length
            trainable=False                # keep embeddings fixed
        ),
        Flatten(),                         # flatten embeddings into vector
        Dense(128, activation='relu'),     # fully connected layer
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')    # adjust output classes if needed
    ])

    dnn_model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True
    )

    dnn_model.fit(
        X_train, y_train,
        epochs=5,
        batch_size=64,
        validation_split=0.2,
        callbacks=[early_stop]
    )

    # saving the model
    dnn_model.save("dnn_model.keras")

    test_loss, test_acc = dnn_model.evaluate(X_test, y_test)
    print("Test Accuracy:", test_acc)

In [None]:
# -------------------- Evaluation --------------------
dnn_model = tf.keras.models.load_model('dnn_model.keras')

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Predictions
y_prob = dnn_model.predict(X_test, batch_size=512, verbose=0)
y_pred = np.argmax(y_prob, axis=1)
y_true = np.argmax(y_test, axis=1)

# Metrics
accuracy_dnn  = accuracy_score(y_true, y_pred)
precision_dnn = precision_score(y_true, y_pred, average='macro', zero_division=0)
recall_dnn    = recall_score(y_true, y_pred, average='macro', zero_division=0)
f1_dnn        = f1_score(y_true, y_pred, average='macro', zero_division=0)
cm_dnn        = confusion_matrix(y_true, y_pred)

print("Accuracy:", accuracy_dnn)
print("Precision (macro):", precision_dnn)
print("Recall (macro):", recall_dnn)
print("F1 (macro):", f1_dnn)
print("Confusion Matrix:\n", cm_dnn)

# Optional: class names if you used a LabelEncoder earlier
labels = label_encoder.classes_.tolist() if 'label_encoder' in globals() else None
print("\nClassification Report:\n",
      classification_report(y_true, y_pred, target_names=labels))

In [None]:

# Confusion matrix heatmap
plt.figure(figsize=(8,6))
sns.heatmap(cm_dnn, annot=True, fmt="d", cmap="Blues",
            xticklabels=labels if labels else None,
            yticklabels=labels if labels else None)

plt.title("Confusion Matrix - DNN Model")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()

# model comparison

In [None]:
import matplotlib.pyplot as plt

# Model names (7 models including DNN)
model_names = ['dnn', 'rnn', 'bidirectional rnn', 'gru', 'bidirectional gru', 'lstm', 'bidirectional lstm']
accuracy_scores = [accuracy_dnn, accuracy_rnn, accuracy_brnn, accuracy_gru, accuracy_bgru, accuracy_lstm, accuracy_blstm]
accuracy_scores_percent = [a * 100 for a in accuracy_scores]

plt.figure(figsize=(10, 6))
bars = plt.bar(model_names, accuracy_scores_percent, color='skyblue')
plt.xlabel("Models")
plt.ylabel("Accuracy (%)")
plt.title("Comparison of Model Accuracy")
plt.ylim(0, 100)  # y-axis in percentage

# Add value labels on top of bars
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height + 1, f'{height:.1f}%', 
             ha='center', va='bottom')

plt.show()


In [None]:
import matplotlib.pyplot as plt

# Precision, Recall, F1 score Comparison
model_names = ['dnn', 'rnn', 'bidirectional rnn', 'gru', 'bidirectional gru', 'lstm', 'bidirectional lstm']
precision_scores = [precision_dnn, precision_rnn, precision_brnn, precision_gru, precision_bgru, precision_lstm, precision_blstm]
recall_scores    = [recall_dnn,   recall_rnn,   recall_brnn,   recall_gru,   recall_bgru,   recall_lstm,   recall_blstm]
f1_scores        = [f1_dnn,       f1_rnn,       f1_brnn,       f1_gru,       f1_bgru,       f1_lstm,       f1_blstm]

x = range(len(model_names))
width = 0.25  

fig, ax1 = plt.subplots(figsize=(12, 6))

# Bars
rects1 = ax1.bar([i - width for i in x], precision_scores, width, label='Precision')
rects2 = ax1.bar(x, recall_scores, width, label='Recall')
rects3 = ax1.bar([i + width for i in x], f1_scores, width, label='F1 Score')

# Labels and title
ax1.set_xlabel('Model Names')
ax1.set_ylabel('Scores')
ax1.set_title('Precision, Recall, and F1 Comparison')
ax1.set_xticks(x)
ax1.set_xticklabels(model_names)
ax1.legend()

plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
model_names = ['dnn', 'rnn', 'bidirectional rnn', 'gru', 'bidirectional gru', 'lstm', 'bidirectional lstm']
confusion_matrices = [cm_dnn, cm_rnn, cm_brnn, cm_gru, cm_bgru, cm_lstm, cm_blstm]
# Plot confusion matrices (2 rows x 4 columns)
fig, axes = plt.subplots(2, 4, figsize=(22, 10))
axes = axes.flatten()  # Flatten 2D array of axes for easy iteration
for i, ax in enumerate(axes[:len(model_names)]):
    sns.heatmap(confusion_matrices[i], annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title(f'{model_names[i]} Confusion Matrix')
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
# Hide unused subplot (since 2x4 = 8 slots but we have 7 models)
if len(model_names) < len(axes):
    axes[-1].axis('off')
plt.tight_layout()
plt.show()


## Section 3: cse440project-skipgram-with-7-models



#  Load and Explore Dataset

In [None]:
# ==================== COMPLETE MULTI-GPU SETUP ====================
# RUN THIS FIRST AFTER RESTARTING KERNEL
# ==================================================================

import tensorflow as tf
import os

print("üîÑ Restarting kernel and setting up multi-GPU...")

# Clean up any existing strategies
if 'strategy' in globals():
    del globals()['strategy']
if 'global_strategy' in globals():
    del globals()['global_strategy']

# Force TensorFlow to use both GPUs
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

# Clear any existing TensorFlow sessions
tf.keras.backend.clear_session()

# Configure physical GPUs
gpus = tf.config.list_physical_devices('GPU')

# Create SINGLE strategy instance
if len(gpus) >= 2:
    print(f"‚úÖ Found {len(gpus)} GPUs")
    
    # Enable memory growth
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    
    # Create ONLY ONE strategy instance
    global_strategy = tf.distribute.MirroredStrategy()
    print(f"üéØ Created MirroredStrategy with {global_strategy.num_replicas_in_sync} replicas")
    
else:
    print("‚ö†Ô∏è Using single GPU or CPU")
    global_strategy = tf.distribute.get_strategy()

print("‚úÖ Strategy setup complete! Use 'global_strategy' everywhere.")
print(f"üìä Number of replicas: {global_strategy.num_replicas_in_sync}")

# Show GPU status
print("\n" + "="*50)
!nvidia-smi --query-gpu=index,name,memory.total --format=csv

# Test with a simple operation
print("\nüß™ Testing strategy with simple operation...")
with global_strategy.scope():
    test_tensor = tf.constant([[1.0, 2.0], [3.0, 4.0]])
    result = tf.matmul(test_tensor, test_tensor)
    print(f"Test operation successful: {result.numpy()}")

print("üöÄ Multi-GPU setup completed successfully!")

In [None]:
# ==================== ULTIMATE MULTI-CORE CPU SETUP ====================
# PUT THIS IN THE VERY FIRST CELL OF YOUR NOTEBOOK
# ======================================================================

import os
import multiprocessing as mp
import numpy as np
import pandas as pd
from threadpoolctl import threadpool_limits
import threading

print("üöÄ INITIALIZING MULTI-CORE PARALLEL EXECUTION...")

# Get total available CPU cores
TOTAL_CORES = mp.cpu_count()
print(f"‚úÖ Found {TOTAL_CORES} CPU cores")

# Configure environment for maximum parallel performance
os.environ['OMP_NUM_THREADS'] = str(TOTAL_CORES)  # OpenMP threads
os.environ['MKL_NUM_THREADS'] = str(TOTAL_CORES)  # Intel MKL
os.environ['OPENBLAS_NUM_THREADS'] = str(TOTAL_CORES)  # OpenBLAS
os.environ['VECLIB_MAXIMUM_THREADS'] = str(TOTAL_CORES)  # macOS Accelerate
os.environ['NUMEXPR_NUM_THREADS'] = str(TOTAL_CORES)  # NumExpr
os.environ['JOBLIB_NUM_CPUS'] = str(TOTAL_CORES)  # Joblib

print("üìä Environment configured for parallel execution")

# Set thread limits for numerical libraries
try:
    with threadpool_limits(limits=TOTAL_CORES, user_api='blas'):
        with threadpool_limits(limits=TOTAL_CORES, user_api='openmp'):
            print("‚úÖ BLAS and OpenMP threads configured")
except:
    print("‚ÑπÔ∏è threadpoolctl not available, using environment variables only")

# Global parallel context
class ParallelExecutor:
    def __init__(self, n_jobs=-1):
        self.n_jobs = n_jobs if n_jobs != -1 else TOTAL_CORES
        
    def __enter__(self):
        return self
        
    def __exit__(self, *args):
        pass

parallel_executor = ParallelExecutor(n_jobs=-1)

# Initialize parallel libraries
def enable_parallel_pandas():
    """Enable parallel operations for pandas"""
    try:
        import swifter
        print("‚úÖ Swifter enabled for parallel pandas operations")
        return True
    except ImportError:
        print("‚ÑπÔ∏è Install 'swifter' for pandas parallelization: !pip install swifter")
        return False
        
    try:
        from pandarallel import pandarallel
        pandarallel.initialize(nb_workers=TOTAL_CORES, progress_bar=False)
        print("‚úÖ Pandarallel initialized")
    except ImportError:
        pass

enable_parallel_pandas()

print("üéØ All CPU operations configured for parallel execution!")

# Test parallel execution
print("\nüß™ Testing parallel operations...")

# Test with threadpoolctl to verify numpy uses all cores
try:
    with threadpool_limits(limits=TOTAL_CORES):
        large_array = np.random.rand(1000, 1000)
        result = np.dot(large_array, large_array.T)
        print("‚úÖ NumPy parallel operation test completed")
except:
    large_array = np.random.rand(1000, 1000)
    result = np.dot(large_array, large_array.T)
    print("‚úÖ NumPy operation completed (thread limits not available)")

print("\n‚úÖ Setup complete! All subsequent CPU operations will use all cores!")

# Show current CPU status
print("\n" + "="*60)
print("CURRENT CPU STATUS")
print("="*60)
print(f"Total cores: {TOTAL_CORES}")
print(f"Active threads: {threading.active_count()}")

# Performance tips
print("\n" + "="*60)
print("PERFORMANCE TIPS")
print("="*60)
print("üìã For pandas: df.swifter.apply(func) - uses all cores")
print("üìã For sklearn: set n_jobs=-1")
print("üìã For joblib: Parallel(n_jobs=-1)")
print("üìã Most numpy operations auto-parallelize")

# Verify environment
print("\n" + "="*60)
print("ENVIRONMENT CONFIGURATION")
print("="*60)
for var in ['OMP_NUM_THREADS', 'MKL_NUM_THREADS', 'NUMEXPR_NUM_THREADS']:
    print(f"{var}: {os.environ.get(var, 'Not set')}")

print("\nüî• Ready for maximum CPU performance! All cores will be utilized!")

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, GRU, LSTM, Bidirectional, Embedding, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from gensim.models import Word2Vec
import matplotlib.pyplot as plt
import seaborn as sns
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import nltk
import time

# Download stopwords if not already downloaded
nltk.download('stopwords')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Set matplotlib style
plt.style.use('ggplot')

In [None]:
# Load your dataset
train_df = pd.read_csv('/kaggle/input/440project/Question Answer Classification Dataset 2 Training.csv')
test_df = pd.read_csv('/kaggle/input/440project/Updated Question Answer Classification DatasetTest.csv')

# Display dataset information
print("Training set shape:", train_df.shape)
print("Testing set shape:", test_df.shape)
print("\nTraining set columns:", train_df.columns.tolist())
print("\nClass distribution in training set:")
print(train_df['Class'].value_counts())
print("\nClass distribution in testing set:")
print(test_df['Class'].value_counts())

# Display sample data
print("\nSample from training data:")
print(train_df.head(3))

In [None]:
# df_train=train_df
# df_test=test_df
# train_df=df_test.sample(50000,replace=True)
# test_df=df_test.sample(20000)
# print(train_df.shape)
# print(test_df.shape)

# Text Preprocessing Functions

In [None]:
import tensorflow as tf
print("GPU available:", tf.config.list_physical_devices('GPU'))


In [None]:
import re
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from multiprocessing import Pool, cpu_count
import nltk

# Download stopwords if not already available
nltk.download('stopwords', quiet=True)

# Optimized text preprocessing function
def preprocess_text(text):
    if isinstance(text, float) or text is None:  # Handle NaN values
        return ""
    
    # Step 1: Replace all whitespace variants with single spaces
    text = re.sub(r'\s+', ' ', text)  # This handles \n, \t, \r, \\n, multiple spaces, etc.
    
    # Step 2: Remove special characters but keep letters and spaces
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    
    # Step 3: Convert to lowercase
    text = text.lower()
    
    # Step 4: Remove extra whitespace (should be redundant but safe)
    text = re.sub(r'\s+', ' ', text).strip()
    
    # Tokenize
    tokens = text.split()
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    
    # Apply stemming
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in tokens]
    
    return ' '.join(tokens)

# Function to process text in parallel
def preprocess_parallel(texts, n_cores=None):
    if n_cores is None:
        n_cores = cpu_count()  # Use all available cores
    
    with Pool(n_cores) as pool:
        results = pool.map(preprocess_text, texts)
    
    return results

# Apply preprocessing to training data using all cores
print("Preprocessing training text data using all CPU cores...")
print(f"Available CPU cores: {cpu_count()}")

# Process in parallel
train_df['processed_text'] = preprocess_parallel(train_df['QA Text'].tolist())
test_df['processed_text'] = preprocess_parallel(test_df['QA Text'].tolist())

print("Preprocessing completed!")

In [None]:
# Test the simplified preprocessing function
test_texts = [
    "Question Title:\nWhat are the names of the 206 bones?\nQuestion Content:\n\nBest Answer:\\n206 bones of the human body:\\n\\nSKULL - AXIAL SKELETON",
    "can you tell me the university name\\nin which i can do distance mphil foods & nutrition?",
    "Suggestions or Advice on tracking someone\nwho owes you money from a small claims verdict and \"skipped town?\""
]

print("Testing simplified preprocessing function:")
print("=" * 60)
for i, text in enumerate(test_texts):
    print(f"Original {i+1}: {repr(text)}")
    processed = preprocess_text(text)
    print(f"Processed {i+1}: {repr(processed)}")
    print("-" * 60)

In [None]:
# Check preprocessing results
print("\nSample of preprocessed text:")
print("Original:", train_df['QA Text'].iloc[0][:100] + "...")
print("Processed:", train_df['processed_text'].iloc[0][:100] + "...")

# Prepare Data for Skip-gram Training

In [None]:
# Prepare sentences for Word2Vec training
sentences = [text.split() for text in train_df['processed_text'] if text.strip() != '']

# Check sentence statistics
sentence_lengths = [len(sentence) for sentence in sentences]
print(f"Number of sentences: {len(sentences)}")
print(f"Average sentence length: {np.mean(sentence_lengths):.2f}")
print(f"Max sentence length: {max(sentence_lengths)}")
print(f"Min sentence length: {min(sentence_lengths)}")

# Plot sentence length distribution
plt.figure(figsize=(10, 6))
plt.hist(sentence_lengths, bins=50, edgecolor='black')
plt.title('Distribution of Sentence Lengths')
plt.xlabel('Sentence Length')
plt.ylabel('Frequency')
plt.show()

# Train Skip-gram Embeddings (Adjusted)

## v7: multiprocessing

In [None]:
import multiprocessing
from gensim.models import Word2Vec
import time

print("‚ö° Ultra-optimized Word2Vec for 280K rows...")

# Use ALL available CPU cores
cores = multiprocessing.cpu_count()
print(f"Using {cores} CPU cores")

start_time = time.time()

# Initialize the model
model = Word2Vec(
    vector_size=100,      # Smaller = faster
    window=5,             # Smaller window
    min_count=2,
    sg=1,                 # Skip-gram
    workers=cores,        # Use ALL cores
    epochs=3,             # Fewer epochs
    batch_words=100000,   # Larger batches
    alpha=0.025,
    negative=5,           # Fewer negative samples
    sample=1e-4,          # More aggressive subsampling
)

# Build vocabulary FIRST (this is fast)
print("üìö Building vocabulary...")
vocab_start = time.time()
model.build_vocab(sentences)
vocab_time = time.time() - vocab_start
print(f"Vocabulary built in {vocab_time:.2f}s - {len(model.wv.key_to_index):,} words")

# NOW train the model (this is the actual training)
print("üöÄ Training Skip-gram model...")
train_start = time.time()
model.train(
    sentences, 
    total_examples=model.corpus_count,
    epochs=model.epochs,
    compute_loss=True  # Track training progress
)
training_time = time.time() - train_start

total_time = time.time() - start_time
print(f"‚úÖ Training completed in {training_time:.2f}s")
print(f"üìä Total time (vocab + training): {total_time:.2f}s")
print(f"‚ö° Speed: {len(sentences)*3/training_time:.0f} sentences/second")

# Check the model actually learned something
print(f"\nüéØ Model trained successfully!")
print(f"Vocabulary size: {len(model.wv.key_to_index):,}")
print(f"Final loss: {model.get_latest_training_loss():.2f}")

# Test with sample words
test_words = ['scienc', 'educ', 'polit', 'question', 'answer']
print("\nüîç Testing embeddings:")
for word in test_words:
    if word in model.wv:
        similar = model.wv.most_similar(word, topn=2)
        print(f"'{word}': {[w[0] for w in similar]}")
    else:
        print(f"'{word}' not in vocabulary")

# Tokenization and Sequence Preparation (Adjusted)

In [None]:
test_df.columns

In [None]:
# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_df['processed_text'])
vocab_size = len(tokenizer.word_index) + 1
print(f"Tokenizer vocabulary size: {vocab_size}")

# Convert text to sequences
X_train = tokenizer.texts_to_sequences(train_df['processed_text'])
X_test = tokenizer.texts_to_sequences(test_df['processed_text'])

# Analyze sequence lengths
train_lengths = [len(seq) for seq in X_train]
test_lengths = [len(seq) for seq in X_test]

print(f"Average training sequence length: {np.mean(train_lengths):.2f}")
print(f"Max training sequence length: {max(train_lengths)}")
print(f"95th percentile: {np.percentile(train_lengths, 95):.2f}")
print(f"99th percentile: {np.percentile(train_lengths, 99):.2f}")

# Determine optimal max sequence length (using 95th percentile)
max_len = int(np.percentile(train_lengths, 95))
print(f"Using sequence length: {max_len} (95th percentile)")

X_train = pad_sequences(X_train, maxlen=max_len, padding='post', truncating='post')
X_test = pad_sequences(X_test, maxlen=max_len, padding='post', truncating='post')

print(f"Training sequences shape: {X_train.shape}")
print(f"Testing sequences shape: {X_test.shape}")

# Check how much data we're truncating
long_sequences = sum(1 for length in train_lengths if length > max_len)
print(f"Percentage of sequences longer than {max_len}: {(long_sequences/len(train_lengths))*100:.2f}%")

# Create Embedding Matrix (Adjusted)

In [None]:
# Create embedding matrix with the larger embedding dimension
embedding_dim = 100
embedding_matrix = np.zeros((vocab_size, embedding_dim))
found_words = 0
not_found_words = 0
low_freq_words = 0
min_count = 2  # Define min_count since it was used in your Word2Vec setup

for word, i in tokenizer.word_index.items():
    if i >= vocab_size:
        continue
    
    # Check if word is in our trained embeddings - use 'model' instead of 'skipgram_model'
    if word in model.wv:
        embedding_matrix[i] = model.wv[word]
        found_words += 1
    else:
        # Check if this is a low frequency word that was filtered out
        word_freq = tokenizer.word_counts[word]
        if word_freq < min_count:
            low_freq_words += 1
        else:
            not_found_words += 1
        
        # Initialize with random values for unknown words
        embedding_matrix[i] = np.random.normal(scale=0.6, size=(embedding_dim,))

print(f"Words found in Word2Vec vocabulary: {found_words:,}")
print(f"Words not found (but should be): {not_found_words:,}")
print(f"Low frequency words filtered out (<{min_count} occurrences): {low_freq_words:,}")
print(f"Coverage of meaningful words: {(found_words/(found_words + not_found_words))*100:.2f}%")

# Prepare labels
label_map = {label: idx for idx, label in enumerate(train_df['Class'].unique())}
reverse_label_map = {idx: label for label, idx in label_map.items()}

y_train = train_df['Class'].map(label_map).values
y_test = test_df['Class'].map(label_map).values
num_classes = len(label_map)

print(f"\nNumber of classes: {num_classes}")
print("Label mapping:", label_map)

# Check class distribution
print("\nClass distribution in training set:")
for label, idx in label_map.items():
    count = sum(y_train == idx)
    print(f"  {label}: {count} samples ({count/len(y_train)*100:.2f}%)")

# Model Creation Functions (Adjusted for Larger Sequences)

In [None]:
# # Model creation functions optimized for longer sequences
# # def create_dnn_model():
# #     model = Sequential([
# #         Embedding(vocab_size, embedding_dim, 
# #                  weights=[embedding_matrix], 
# #                  input_length=max_len, 
# #                  trainable=False),
# #         tf.keras.layers.GlobalAveragePooling1D(),  # Better than Flatten for long sequences
# #         Dense(256, activation='relu'),
# #         Dropout(0.6),
# #         Dense(128, activation='relu'),
# #         Dropout(0.5),
# #         Dense(64, activation='relu'),
# #         Dropout(0.4),
# #         Dense(num_classes, activation='softmax')
# #     ])
    
# #     model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
# #                   loss='sparse_categorical_crossentropy', 
# #                   metrics=['accuracy'])
# #     return model
# def create_improved_dnn_model():
#     model = Sequential([
#         Embedding(vocab_size, embedding_dim, 
#                  weights=[embedding_matrix], 
#                  input_length=max_len, 
#                  trainable=True),  # Make trainable for fine-tuning
        
#         Conv1D(128, 5, activation='relu'),  # Add convolutional layer
#         GlobalMaxPooling1D(),
        
#         Dense(512, activation='relu'),
#         BatchNormalization(),
#         Dropout(0.5),
        
#         Dense(256, activation='relu'),
#         BatchNormalization(),
#         Dropout(0.4),
        
#         Dense(128, activation='relu'),
#         Dropout(0.3),
        
#         Dense(num_classes, activation='softmax')
#     ])
    
#     model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),  # Lower LR
#                   loss='sparse_categorical_crossentropy', 
#                   metrics=['accuracy'])
#     return model

# def create_rnn_model(bidirectional=False):
#     model = Sequential([
#         Embedding(vocab_size, embedding_dim, 
#                  weights=[embedding_matrix], 
#                  input_length=max_len, 
#                  trainable=False)
#     ])
    
#     if bidirectional:
#         model.add(Bidirectional(SimpleRNN(128, return_sequences=False, dropout=0.3)))
#     else:
#         model.add(SimpleRNN(128, return_sequences=False, dropout=0.3))
    
#     model.add(Dense(64, activation='relu'))
#     model.add(Dropout(0.4))
#     model.add(Dense(num_classes, activation='softmax'))
    
#     model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
#                   loss='sparse_categorical_crossentropy', 
#                   metrics=['accuracy'])
#     return model

# def create_gru_model(bidirectional=False):
#     model = Sequential([
#         Embedding(vocab_size, embedding_dim, 
#                  weights=[embedding_matrix], 
#                  input_length=max_len, 
#                  trainable=False)
#     ])
    
#     if bidirectional:
#         model.add(Bidirectional(GRU(128, return_sequences=False, dropout=0.3, recurrent_dropout=0.2)))
#     else:
#         model.add(GRU(128, return_sequences=False, dropout=0.3, recurrent_dropout=0.2))
    
#     model.add(Dense(64, activation='relu'))
#     model.add(Dropout(0.4))
#     model.add(Dense(num_classes, activation='softmax'))
    
#     model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
#                   loss='sparse_categorical_crossentropy', 
#                   metrics=['accuracy'])
#     return model

# def create_lstm_model(bidirectional=False):
#     model = Sequential([
#         Embedding(vocab_size, embedding_dim, 
#                  weights=[embedding_matrix], 
#                  input_length=max_len, 
#                  trainable=False)
#     ])
    
#     if bidirectional:
#         model.add(Bidirectional(LSTM(128, return_sequences=False, dropout=0.3, recurrent_dropout=0.2)))
#     else:
#         model.add(LSTM(128, return_sequences=False, dropout=0.3, recurrent_dropout=0.2))
    
#     model.add(Dense(64, activation='relu'))
#     model.add(Dropout(0.4))
#     model.add(Dense(num_classes, activation='softmax'))
    
#     model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
#                   loss='sparse_categorical_crossentropy', 
#                   metrics=['accuracy'])
#     return model

# # Test model creation
# test_model = create_dnn_model()
# print("DNN model summary:")
# test_model.summary()

## Multi-GPU model creation

In [None]:
# # Model creation functions with multi-GPU support
# from tensorflow.keras.models import Sequential

# from tensorflow.keras.layers import Dense, Embedding, Conv1D, GlobalMaxPooling1D, Dropout, BatchNormalization, LSTM, Bidirectional, GRU, SimpleRNN, SpatialDropout1D, MaxPooling1D

# def create_dnn_model():
#     with global_strategy.scope():
#         model = Sequential([
#             Embedding(vocab_size, embedding_dim, 
#                      weights=[embedding_matrix], 
#                      input_length=max_len, 
#                      trainable=True),  # Enable fine-tuning
            
#             Conv1D(128, 5, activation='relu'),
#             GlobalMaxPooling1D(),
            
#             Dense(512, activation='relu'),
#             BatchNormalization(),
#             Dropout(0.5),
            
#             Dense(256, activation='relu'),
#             BatchNormalization(),
#             Dropout(0.4),
            
#             Dense(128, activation='relu'),
#             Dropout(0.3),
            
#             Dense(num_classes, activation='softmax')
#         ])
        
#         model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
#                       loss='sparse_categorical_crossentropy', 
#                       metrics=['accuracy'])
#     return model

# def create_rnn_model(bidirectional=False):
#     with global_strategy.scope():
#         model = Sequential([
#             Embedding(vocab_size, embedding_dim, 
#                      weights=[embedding_matrix], 
#                      input_length=max_len, 
#                      trainable=True)  # Enable fine-tuning
#         ])
        
#         if bidirectional:
#             model.add(Bidirectional(SimpleRNN(256, return_sequences=False, dropout=0.4, recurrent_dropout=0.3)))
#         else:
#             model.add(SimpleRNN(256, return_sequences=False, dropout=0.4, recurrent_dropout=0.3))
        
#         model.add(Dense(128, activation='relu'))
#         model.add(BatchNormalization())
#         model.add(Dropout(0.5))
#         model.add(Dense(num_classes, activation='softmax'))
        
#         model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003), 
#                       loss='sparse_categorical_crossentropy', 
#                       metrics=['accuracy'])
#     return model

# def create_gru_model(bidirectional=False):
#     with global_strategy.scope():
#         model = Sequential([
#             Embedding(vocab_size, embedding_dim, 
#                      weights=[embedding_matrix], 
#                      input_length=max_len, 
#                      trainable=True)  # Enable fine-tuning
#         ])
        
#         if bidirectional:
#             model.add(Bidirectional(GRU(256, return_sequences=False, dropout=0.4, recurrent_dropout=0.3)))
#         else:
#             model.add(GRU(256, return_sequences=False, dropout=0.4, recurrent_dropout=0.3))
        
#         model.add(Dense(128, activation='relu'))
#         model.add(BatchNormalization())
#         model.add(Dropout(0.5))
#         model.add(Dense(num_classes, activation='softmax'))
        
#         model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003), 
#                       loss='sparse_categorical_crossentropy', 
#                       metrics=['accuracy'])
#     return model

# def create_lstm_model(bidirectional=False):
#     with global_strategy.scope():
#         model = Sequential([
#             Embedding(vocab_size, embedding_dim, 
#                      weights=[embedding_matrix], 
#                      input_length=max_len, 
#                      trainable=True)  # Enable fine-tuning
#         ])
        
#         if bidirectional:
#             model.add(Bidirectional(LSTM(256, return_sequences=False, dropout=0.4, recurrent_dropout=0.3)))
#         else:
#             model.add(LSTM(256, return_sequences=False, dropout=0.4, recurrent_dropout=0.3))
        
#         model.add(Dense(128, activation='relu'))
#         model.add(BatchNormalization())
#         model.add(Dropout(0.5))
#         model.add(Dense(num_classes, activation='softmax'))
        
#         model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003), 
#                       loss='sparse_categorical_crossentropy', 
#                       metrics=['accuracy'])
#     return model

# # Test model creation
# print("Testing model creation with multi-GPU...")
# test_model = create_dnn_model()
# print("DNN model summary:")
# test_model.summary()

## Multi-GPU-underfit

In [None]:
# Model creation functions with multi-GPU support (REVISED FOR UNDERFITTING)
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, Embedding, Conv1D, GlobalMaxPooling1D, Dropout, BatchNormalization, LSTM, Bidirectional, GRU, SimpleRNN, SpatialDropout1D, MaxPooling1D
def create_dnn_model():
    """
    Revised DNN/CNN model with less regularization and more capacity.
    """
    with global_strategy.scope():
        model = Sequential([
            Embedding(vocab_size, embedding_dim, 
                     weights=[embedding_matrix], 
                     input_length=max_len, 
                     trainable=True),
            
            SpatialDropout1D(0.2),  # Better for embedding dropout
            
            # Use two convolutional layers to capture features
            Conv1D(256, 5, activation='relu', padding='same'),
            MaxPooling1D(2),
            Conv1D(128, 3, activation='relu', padding='same'),
            GlobalMaxPooling1D(),
            
            # Deeper classifier with less dropout
            Dense(512, activation='relu'),
            BatchNormalization(),
            Dropout(0.3),  # Reduced from 0.5
            
            Dense(256, activation='relu'),
            BatchNormalization(),
            Dropout(0.2),  # Reduced from 0.4
            
            Dense(128, activation='relu'),
            Dropout(0.1),  # Reduced from 0.3
            
            Dense(num_classes, activation='softmax')
        ])
        
        # Higher learning rate
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), # Increased from 0.0001
                      loss='sparse_categorical_crossentropy', 
                      metrics=['accuracy'])
    return model

def create_rnn_model(bidirectional=True): # Made bidirectional default
    """
    Revised RNN model with stacked layers and less regularization.
    """
    with global_strategy.scope():
        model = Sequential([
            Embedding(vocab_size, embedding_dim, 
                     weights=[embedding_matrix], 
                     input_length=max_len, 
                     trainable=True),
            
            SpatialDropout1D(0.2),
        ])
        
        # Stacked RNN layers
        if bidirectional:
            model.add(Bidirectional(SimpleRNN(128, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)))
            model.add(Bidirectional(SimpleRNN(64, return_sequences=False, dropout=0.1, recurrent_dropout=0.1)))
        else:
            model.add(SimpleRNN(128, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))
            model.add(SimpleRNN(64, return_sequences=False, dropout=0.1, recurrent_dropout=0.1))
        
        # Simplified head with less dropout
        model.add(Dense(128, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.2))  # Reduced from 0.5
        model.add(Dense(num_classes, activation='softmax'))
        
        # Higher learning rate
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.002), 
                      loss='sparse_categorical_crossentropy', 
                      metrics=['accuracy'])
    return model

def create_gru_model(bidirectional=True): # Made bidirectional default
    """
    Revised GRU model with stacked layers for increased capacity.
    """
    with global_strategy.scope():
        model = Sequential([
            Embedding(vocab_size, embedding_dim, 
                     weights=[embedding_matrix], 
                     input_length=max_len, 
                     trainable=True),
            
            SpatialDropout1D(0.2),
        ])
        
        # Stacked GRU layers
        if bidirectional:
            model.add(Bidirectional(GRU(256, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)))
            model.add(Bidirectional(GRU(128, return_sequences=False, dropout=0.2, recurrent_dropout=0.2)))
        else:
            model.add(GRU(256, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))
            model.add(GRU(128, return_sequences=False, dropout=0.2, recurrent_dropout=0.2))
        
        model.add(Dense(128, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.3))  # Reduced from 0.5
        model.add(Dense(num_classes, activation='softmax'))
        
        # Higher learning rate
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
                      loss='sparse_categorical_crossentropy', 
                      metrics=['accuracy'])
    return model

def create_lstm_model(bidirectional=True): # Made bidirectional default
    """
    Revised LSTM model - now with stacked layers and less dropout.
    """
    with global_strategy.scope():
        model = Sequential([
            Embedding(vocab_size, embedding_dim, 
                     weights=[embedding_matrix], 
                     input_length=max_len, 
                     trainable=True),
            
            SpatialDropout1D(0.2),
        ])
        
        # Stacked LSTM layers for much greater capacity
        if bidirectional:
            model.add(Bidirectional(LSTM(256, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)))
            model.add(Bidirectional(LSTM(128, return_sequences=False, dropout=0.2, recurrent_dropout=0.2)))
        else:
            model.add(LSTM(256, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))
            model.add(LSTM(128, return_sequences=False, dropout=0.2, recurrent_dropout=0.2))
        
        model.add(Dense(128, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.3))  # Reduced from 0.5
        model.add(Dense(num_classes, activation='softmax'))
        
        # Higher learning rate
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
                      loss='sparse_categorical_crossentropy', 
                      metrics=['accuracy'])
    return model

# Test model creation with the most promising model
print("Testing revised model creation with multi-GPU...")
test_model = create_lstm_model(bidirectional=True)
print("Revised LSTM model summary:")
test_model.summary()

## Multi-Trying for Better

In [None]:
# # Model creation functions with multi-GPU support (PRECISION OPTIMIZED)
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Embedding, Conv1D, GlobalMaxPooling1D, Dropout, BatchNormalization, LSTM, Bidirectional, GRU, SimpleRNN, SpatialDropout1D, MaxPooling1D

# def create_dnn_model():
#     """
#     Optimized CNN model with proven architecture.
#     """
#     with global_strategy.scope():
#         model = Sequential([
#             Embedding(vocab_size, embedding_dim, 
#                      weights=[embedding_matrix], 
#                      input_length=max_len, 
#                      trainable=True),  # Keep trainable
            
#             SpatialDropout1D(0.2),
            
#             # Optimal convolutional setup
#             Conv1D(128, 5, activation='relu', padding='same'),
#             MaxPooling1D(2),
#             Conv1D(64, 3, activation='relu', padding='same'),
#             GlobalMaxPooling1D(),
            
#             # Optimal dense layers
#             Dense(128, activation='relu'),
#             BatchNormalization(),
#             Dropout(0.3),
            
#             Dense(64, activation='relu'),
#             Dropout(0.2),
            
#             Dense(num_classes, activation='softmax')
#         ])
        
#         # Optimal learning rate
#         model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
#                       loss='sparse_categorical_crossentropy', 
#                       metrics=['accuracy'])
#     return model

# def create_rnn_model(bidirectional=True):
#     """
#     Optimized RNN model with balanced parameters.
#     """
#     with global_strategy.scope():
#         model = Sequential([
#             Embedding(vocab_size, embedding_dim, 
#                      weights=[embedding_matrix], 
#                      input_length=max_len, 
#                      trainable=True),
            
#             SpatialDropout1D(0.3),
#         ])
        
#         if bidirectional:
#             model.add(Bidirectional(SimpleRNN(64, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)))
#             model.add(Bidirectional(SimpleRNN(32, dropout=0.2, recurrent_dropout=0.2)))
#         else:
#             model.add(SimpleRNN(64, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))
#             model.add(SimpleRNN(32, dropout=0.2, recurrent_dropout=0.2))
        
#         model.add(Dense(64, activation='relu'))
#         model.add(BatchNormalization())
#         model.add(Dropout(0.3))
#         model.add(Dense(num_classes, activation='softmax'))
        
#         model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
#                       loss='sparse_categorical_crossentropy', 
#                       metrics=['accuracy'])
#     return model

# def create_gru_model(bidirectional=True):
#     """
#     Optimized GRU model with conservative settings.
#     """
#     with global_strategy.scope():
#         model = Sequential([
#             Embedding(vocab_size, embedding_dim, 
#                      weights=[embedding_matrix], 
#                      input_length=max_len, 
#                      trainable=True),
            
#             SpatialDropout1D(0.3),
#         ])
        
#         if bidirectional:
#             model.add(Bidirectional(GRU(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.3)))
#             model.add(Bidirectional(GRU(64, dropout=0.3, recurrent_dropout=0.3)))
#         else:
#             model.add(GRU(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.3))
#             model.add(GRU(64, dropout=0.3, recurrent_dropout=0.3))
        
#         model.add(Dense(64, activation='relu'))
#         model.add(BatchNormalization())
#         model.add(Dropout(0.3))
#         model.add(Dense(num_classes, activation='softmax'))
        
#         model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0008), 
#                       loss='sparse_categorical_crossentropy', 
#                       metrics=['accuracy'])
#     return model

# def create_lstm_model(bidirectional=True):
#     """
#     Optimized LSTM model - back to basics with proven settings.
#     """
#     with global_strategy.scope():
#         model = Sequential([
#             Embedding(vocab_size, embedding_dim, 
#                      weights=[embedding_matrix], 
#                      input_length=max_len, 
#                      trainable=True),
            
#             SpatialDropout1D(0.3),
#         ])
        
#         if bidirectional:
#             model.add(Bidirectional(LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.3)))
#             model.add(Bidirectional(LSTM(64, dropout=0.3, recurrent_dropout=0.3)))
#         else:
#             model.add(LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.3))
#             model.add(LSTM(64, dropout=0.3, recurrent_dropout=0.3))
        
#         model.add(Dense(64, activation='relu'))
#         model.add(BatchNormalization())
#         model.add(Dropout(0.3))
#         model.add(Dense(num_classes, activation='softmax'))
        
#         model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0007), 
#                       loss='sparse_categorical_crossentropy', 
#                       metrics=['accuracy'])
#     return model

# # Test model creation
# print("Testing optimized model creation with multi-GPU...")
# test_model = create_lstm_model(bidirectional=True)
# print("Optimized LSTM model summary:")
# test_model.summary()

In [None]:
# # Add these callbacks to EVERY model training
# from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# callbacks = [
#     EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
#     ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7)
# ]

# # Train with these settings
# history = model.fit(
#     X_train, y_train,
#     batch_size=32,  # Try 32, 64
#     epochs=50,
#     validation_data=(X_val, y_val),
#     callbacks=callbacks,
#     verbose=1
# )

# Training and Evaluation Function (Adjusted)

## Multi-GPU evaluation

In [None]:
# Training and evaluation function with multi-GPU support
def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, model_name):
    # Calculate class weights for imbalanced data
    class_weights = compute_class_weight('balanced', 
                                       classes=np.unique(y_train), 
                                       y=y_train)
    class_weight_dict = dict(enumerate(class_weights))
    
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=5, restore_best_weights=True, min_delta=0.001
    )
    
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6
    )
    
    # Model checkpointing
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        f'best_{model_name}.h5',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    )
    
    print(f"\nTraining {model_name}...")
    start_time = time.time()
    
    # Adjust batch size based on number of GPUs
    num_gpus = global_strategy.num_replicas_in_sync
    # base_batch_size = 32 if any(layer in str(type(model)) for layer in ['RNN', 'LSTM', 'GRU']) else 64
    # base_batch_size = 128 if any(layer in str(type(model)) for layer in ['RNN', 'LSTM', 'GRU']) else 256
    base_batch_size = 512 if any(layer in str(type(model)) for layer in ['RNN', 'LSTM', 'GRU']) else 1024
    # base_batch_size = 512 if any(layer in str(type(model)) for layer in ['RNN', 'LSTM', 'GRU']) else 512
    batch_size = base_batch_size * num_gpus
    
    print(f"Using batch size: {batch_size} across {num_gpus} GPUs")
    
    history = model.fit(
        X_train, y_train,
        epochs=5,
        batch_size=batch_size,
        validation_split=0.2,
        callbacks=[early_stopping, reduce_lr, checkpoint],
        class_weight=class_weight_dict,
        verbose=1,
        shuffle=True
    )
    
    training_time = time.time() - start_time
    print(f"{model_name} training completed in {training_time:.2f} seconds")
    
    # Evaluate model
    print("Evaluating model...")
    y_pred = model.predict(X_test, batch_size=128, verbose=1)
    y_pred_classes = np.argmax(y_pred, axis=1)
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred_classes)
    f1 = f1_score(y_test, y_pred_classes, average='weighted')
    f1_macro = f1_score(y_test, y_pred_classes, average='macro')
    cm = confusion_matrix(y_test, y_pred_classes)
    cr = classification_report(y_test, y_pred_classes, target_names=[reverse_label_map[i] for i in range(num_classes)])
    
    # Plot training history
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'{model_name} Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{model_name} Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.tight_layout()
    plt.show()
    
    # Plot confusion matrix (normalized)
    plt.figure(figsize=(10, 8))
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues', 
                xticklabels=[reverse_label_map[i] for i in range(num_classes)],
                yticklabels=[reverse_label_map[i] for i in range(num_classes)])
    plt.title(f'{model_name} Normalized Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()
    
    print(f"Classification Report for {model_name}:\n{cr}")
    print(f"Macro F1: {f1_macro:.4f}, Weighted F1: {f1:.4f}")
    
    return {
        'model_name': model_name,
        'model': model,
        'accuracy': accuracy,
        'f1_score': f1,
        'f1_macro': f1_macro,
        'confusion_matrix': cm,
        'classification_report': cr,
        'history': history.history,
        'training_time': training_time,
        'epochs_trained': len(history.history['accuracy'])
    }

#  Run All Neural Network Models

## Multi-GPU trainig init

In [None]:
# Run all Neural Network models with Skip-gram embeddings
from sklearn.utils.class_weight import compute_class_weight
print("Training all Neural Network models with Skip-gram embeddings...")
results = {}

# # List of all models to train
# models_to_train = [
#     ('DNN', create_dnn_model),
#     # ('SimpleRNN', create_rnn_model),
#     # ('GRU', create_gru_model),
#     # ('LSTM', create_lstm_model),
#     # ('Bidirectional_SimpleRNN', lambda: create_rnn_model(bidirectional=True)),
#     # ('Bidirectional_GRU', lambda: create_gru_model(bidirectional=True)),
#     # ('Bidirectional_LSTM', lambda: create_lstm_model(bidirectional=True))
# ]

# # Train each model sequentially
# for model_name, model_func in models_to_train:
#     print(f"\n{'='*60}")
#     print(f"Training {model_name}...")
#     print(f"{'='*60}")
    
#     try:
#         # Create and train model
#         model = model_func()
#         result = train_and_evaluate_model(
#             model, X_train, y_train, X_test, y_test, f"{model_name}_Skipgram"
#         )
#         results[model_name] = result
        
#         # Clear memory after each model
#         tf.keras.backend.clear_session()
#         import gc
#         gc.collect()
        
#         print(f"‚úÖ {model_name} completed successfully!")
        
#     except Exception as e:
#         print(f"‚ùå Error training {model_name}: {e}")
#         import traceback
#         traceback.print_exc()

# print(f"\n{'='*60}")
# print("All models training completed!")
# print(f"{'='*60}")

# # Show final GPU status
# print("\nFinal GPU Status:")
# !nvidia-smi

# Individual Model Training (If you want to run them separately)

In [None]:
from sklearn.utils.class_weight import compute_class_weight

In [None]:
# # Individual model training cells (run these one by one if needed)

# 1. DNN Model
print("Training DNN Model...")
dnn_model = create_dnn_model()
dnn_result = train_and_evaluate_model(
    dnn_model, X_train, y_train, X_test, y_test, "DNN_Skipgram"
)
results['DNN'] = dnn_result
tf.keras.backend.clear_session()



In [None]:
# 2. SimpleRNN Model
print("Training SimpleRNN Model...")
rnn_model = create_rnn_model()
rnn_result = train_and_evaluate_model(
    rnn_model, X_train, y_train, X_test, y_test, "SimpleRNN_Skipgram"
)
results['SimpleRNN'] = rnn_result
tf.keras.backend.clear_session()

In [None]:
# 3. GRU Model
print("Training GRU Model...")
gru_model = create_gru_model()
gru_result = train_and_evaluate_model(
    gru_model, X_train, y_train, X_test, y_test, "GRU_Skipgram"
)
results['GRU'] = gru_result
tf.keras.backend.clear_session()

In [None]:
# 4. LSTM Model
print("Training LSTM Model...")
lstm_model = create_lstm_model()
lstm_result = train_and_evaluate_model(
    lstm_model, X_train, y_train, X_test, y_test, "LSTM_Skipgram"
)
results['LSTM'] = lstm_result
tf.keras.backend.clear_session()


In [None]:

# 5. Bidirectional SimpleRNN
print("Training Bidirectional SimpleRNN Model...")
birnn_model = create_rnn_model(bidirectional=True)
birnn_result = train_and_evaluate_model(
    birnn_model, X_train, y_train, X_test, y_test, "Bidirectional_SimpleRNN_Skipgram"
)
results['Bidirectional_SimpleRNN'] = birnn_result
tf.keras.backend.clear_session()


In [None]:

# 6. Bidirectional GRU
print("Training Bidirectional GRU Model...")
bigru_model = create_gru_model(bidirectional=True)
bigru_result = train_and_evaluate_model(
    bigru_model, X_train, y_train, X_test, y_test, "Bidirectional_GRU_Skipgram"
)
results['Bidirectional_GRU'] = bigru_result
tf.keras.backend.clear_session()


In [None]:

# 7. Bidirectional LSTM
print("Training Bidirectional LSTM Model...")
bilstm_model = create_lstm_model(bidirectional=True)
bilstm_result = train_and_evaluate_model(
    bilstm_model, X_train, y_train, X_test, y_test, "Bidirectional_LSTM_Skipgram"
)
results['Bidirectional_LSTM'] = bilstm_result
tf.keras.backend.clear_session()

# Results Comparison and Analysis

In [None]:
pip install -U kaleido

#### Stored Data

In [None]:
results = {
    "DNN": {
        "accuracy": 0.6989,
        "f1_score": 0.689,
        "f1_macro": 0.689,  # Added Macro F1
        "training_time": 108.7344,
        "epochs_trained": 5,
        "parameters": 61103222,
        "history": {
            "accuracy": [0.7290],
            "loss": [0.8604],
            "val_accuracy": [0.6926],
            "val_loss": [0.9724],
        },
    },
    "SimpleRNN": {
        "accuracy": 0.675,
        "f1_score": 0.667,
        "f1_macro": 0.667,  # Added Macro F1
        "training_time": 471.7932,
        "epochs_trained": 5,
        "parameters": 60759926,
        "history": {
            "accuracy": [0.6532],
            "loss": [1.1061],
            "val_accuracy": [0.6606],
            "val_loss": [1.1244],
        },
    },
    "GRU": {
        "accuracy": 0.711,
        "f1_score": 0.705,
        "f1_macro": 0.705,  # Added Macro F1
        "training_time": 1048.23,
        "epochs_trained": 5,
        "parameters": 61719542,
        "history": {
            "accuracy": [0.7478],
            "loss": [0.8072],
            "val_accuracy": [0.7004],
            "val_loss": [0.9628],
        },
    },
    "LSTM": {
        "accuracy": 0.716,
        "f1_score": 0.711,
        "f1_macro": 0.711,  # Added Macro F1
        "training_time": 1187.5069,
        "epochs_trained": 5,
        "parameters": 62064118,
        "history": {
            "accuracy": [0.7329],
            "loss": [0.8679],
            "val_accuracy": [0.7090],
            "val_loss": [0.9224],
        },
    },
    "Bidirectional_SimpleRNN": {
        "accuracy": 0.660,
        "f1_score": 0.657,
        "f1_macro": 0.657,  # Added Macro F1
        "training_time": 469.4036,
        "epochs_trained": 5,
        "parameters": 60759926,
        "history": {
            "accuracy": [0.6711],
            "loss": [1.0578],
            "val_accuracy": [0.6536],
            "val_loss": [1.1420],
        },
    },
    "Bidirectional_GRU": {
        "accuracy": 0.719,
        "f1_score": 0.713,
        "f1_macro": 0.713,  # Added Macro F1
        "training_time": 1055.4283,
        "epochs_trained": 5,
        "parameters": 61719542,
        "history": {
            "accuracy": [0.7420],
            "loss": [0.8144],
            "val_accuracy": [0.6974],
            "val_loss": [0.9788],
        },
    },
    "Bidirectional_LSTM": {
        "accuracy": 0.716,
        "f1_score": 0.711,
        "f1_macro": 0.711,  # Added Macro F1
        "training_time": 1146.4614,
        "epochs_trained": 5,
        "parameters": 62064118,
        "history": {
            "accuracy": [
                0.1149, 0.6300, 0.6848, 0.7046, 0.7245
            ],
            "loss": [
                2.7309, 1.1602, 1.0188, 0.9470, 0.8873
            ],
            "val_accuracy": [
                0.6384, 0.6745, 0.6950, 0.7017, 0.7068
            ],
            "val_loss": [
                1.5884, 1.1917, 0.9716, 0.9341, 0.9208
            ],
        },
    },
}


## VISUALIZATION: 1 - INTERACTIVE

In [None]:
# Create comprehensive results comparison with enhanced visualizations
print("Creating comprehensive results comparison...")
comparison_data = []

for model_name, result in results.items():
    history = result['history']
    params = result.get('parameters', 0)  # ‚úÖ use saved params instead of model.count_params()
    comparison_data.append({
        'Model': model_name,
        'Accuracy': result['accuracy'],
        'F1_Score_Weighted': result.get('f1_score', 0),
        'F1_Score_Macro': result.get('f1_macro', result.get('f1_score', 0)),  # ‚úÖ fallback if missing
        'Training_Time_Seconds': result['training_time'],
        'Training_Time_Minutes': result['training_time'] / 60,
        'Epochs_Trained': result.get('epochs_trained', len(history['accuracy'])),
        'Final_Train_Accuracy': history['accuracy'][-1],
        'Final_Val_Accuracy': history['val_accuracy'][-1] if 'val_accuracy' in history else 0,
        'Final_Train_Loss': history['loss'][-1],
        'Final_Val_Loss': history['val_loss'][-1] if 'val_loss' in history else 0,
        'Best_Val_Accuracy': max(history['val_accuracy']) if 'val_accuracy' in history else 0,
        'Parameters': params,
        'Parameters_Millions': params / 1e6,
        'Efficiency_Score': result['accuracy'] / (result['training_time'] / 60),  # Accuracy per minute
        'Overfitting_Gap': history['accuracy'][-1] - (history['val_accuracy'][-1] if 'val_accuracy' in history else 0)
    })


comparison_df = pd.DataFrame(comparison_data)

# Sort by accuracy for better visualization
comparison_df = comparison_df.sort_values('Accuracy', ascending=False)

print("\n" + "="*80)
print("COMPREHENSIVE RESULTS COMPARISON")
print("="*80)
print(comparison_df.round(4).to_string(index=False))

# Create styled table for better readability
styled_df = comparison_df[['Model', 'Accuracy', 'F1_Score_Weighted', 'F1_Score_Macro', 
                          'Training_Time_Minutes', 'Parameters_Millions', 'Efficiency_Score']].copy()
styled_df.columns = ['Model', 'Accuracy', 'F1 (Weighted)', 'F1 (Macro)', 
                    'Time (Min)', 'Params (M)', 'Efficiency']

# Apply conditional formatting
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: lightgreen' if v else '' for v in is_max]

def highlight_min(s):
    is_min = s == s.min()
    return ['background-color: lightcoral' if v else '' for v in is_min]

styled_display = styled_df.style\
    .format({
        'Accuracy': '{:.3f}',
        'F1 (Weighted)': '{:.3f}', 
        'F1 (Macro)': '{:.3f}',
        'Time (Min)': '{:.1f}',
        'Params (M)': '{:.2f}',
        'Efficiency': '{:.4f}'
    })\
    .apply(highlight_max, subset=['Accuracy', 'F1 (Weighted)', 'F1 (Macro)', 'Efficiency'])\
    .apply(highlight_min, subset=['Time (Min)', 'Params (M)'])\
    .set_properties(**{'text-align': 'center'})\
    .set_table_styles([{
        'selector': 'th',
        'props': [('background-color', '#40466e'), ('color', 'white'), ('font-weight', 'bold')]
    }])

print("\nüìä STYLED COMPARISON TABLE:")
display(styled_display)

# Create interactive visualizations with Plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

# Set Plotly template
pio.templates.default = "plotly_white"

# Create a comprehensive dashboard with subplots
fig = make_subplots(
    rows=3, cols=3,
    subplot_titles=(
        'Model Accuracy Comparison', 'F1 Score Comparison', 
        'Training vs Validation Accuracy', 'Training Time Comparison',
        'Model Size (Parameters)', 'Overfitting Analysis',
        'Training Efficiency', 'Accuracy vs Training Time', 
        'Accuracy vs Efficiency'
    ),
    specs=[
        [{"type": "bar"}, {"type": "bar"}, {"type": "scatter"}],
        [{"type": "bar"}, {"type": "bar"}, {"type": "bar"}],
        [{"type": "bar"}, {"type": "scatter"}, {"type": "scatter"}]
    ],
    vertical_spacing=0.08,
    horizontal_spacing=0.08
)

# Update layout
fig.update_layout(
    title_text='COMPREHENSIVE MODEL COMPARISON (Skip-gram Embeddings)',
    title_font_size=20,
    title_font_color='darkblue',
    title_x=0.5,
    height=1200,
    width=1400,
    showlegend=True,
    template='plotly_white'
)

# 1. Accuracy comparison
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['Accuracy'],
        marker_color=px.colors.qualitative.Set3,
        text=comparison_df['Accuracy'].round(3),
        textposition='auto',
        name='Accuracy',
        hovertemplate='<b>%{x}</b><br>Accuracy: %{y:.3f}<extra></extra>'
    ),
    row=1, col=1
)

# 2. F1 Scores comparison
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['F1_Score_Weighted'],
        name='Weighted F1',
        marker_color='lightblue',
        hovertemplate='<b>%{x}</b><br>Weighted F1: %{y:.3f}<extra></extra>'
    ),
    row=1, col=2
)

fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['F1_Score_Macro'],
        name='Macro F1',
        marker_color='lightcoral',
        hovertemplate='<b>%{x}</b><br>Macro F1: %{y:.3f}<extra></extra>'
    ),
    row=1, col=2
)

# 3. Training vs Validation Accuracy
fig.add_trace(
    go.Scatter(
        x=comparison_df['Final_Train_Accuracy'],
        y=comparison_df['Final_Val_Accuracy'],
        mode='markers+text',
        text=comparison_df['Model'],
        textposition='top center',
        marker=dict(
            size=15,
            color=comparison_df['Accuracy'],
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(title="Accuracy")
        ),
        name='Train vs Val',
        hovertemplate='<b>%{text}</b><br>Train Accuracy: %{x:.3f}<br>Val Accuracy: %{y:.3f}<extra></extra>'
    ),
    row=1, col=3
)

# Add reference line
fig.add_shape(
    type="line", line=dict(dash='dash', color='grey'),
    x0=0, y0=0, x1=1, y1=1,
    row=1, col=3
)

# 4. Training time comparison
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['Training_Time_Minutes'],
        marker_color=px.colors.qualitative.Pastel,
        text=comparison_df['Training_Time_Minutes'].round(1),
        textposition='auto',
        name='Training Time (min)',
        hovertemplate='<b>%{x}</b><br>Training Time: %{y:.1f} minutes<extra></extra>'
    ),
    row=2, col=1
)

# 5. Parameters comparison
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['Parameters_Millions'],
        marker_color=px.colors.qualitative.Set2,
        text=comparison_df['Parameters_Millions'].round(2),
        textposition='auto',
        name='Parameters (M)',
        hovertemplate='<b>%{x}</b><br>Parameters: %{y:.2f}M<extra></extra>'
    ),
    row=2, col=2
)

# 6. Overfitting analysis
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['Overfitting_Gap'],
        marker_color=['green' if x <= 0 else 'red' for x in comparison_df['Overfitting_Gap']],
        text=comparison_df['Overfitting_Gap'].round(3),
        textposition='auto',
        name='Overfitting Gap',
        hovertemplate='<b>%{x}</b><br>Overfitting Gap: %{y:.3f}<extra></extra>'
    ),
    row=2, col=3
)

# 7. Efficiency score
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['Efficiency_Score'],
        marker_color=px.colors.sequential.Viridis,
        text=comparison_df['Efficiency_Score'].round(4),
        textposition='auto',
        name='Efficiency',
        hovertemplate='<b>%{x}</b><br>Efficiency: %{y:.4f}<extra></extra>'
    ),
    row=3, col=1
)

# 8. Accuracy vs Training Time scatter
fig.add_trace(
    go.Scatter(
        x=comparison_df['Training_Time_Minutes'],
        y=comparison_df['Accuracy'],
        mode='markers',
        text=comparison_df['Model'],
        marker=dict(
            size=comparison_df['Parameters_Millions']*10,
            color=comparison_df['Efficiency_Score'],
            colorscale='Plasma',
            showscale=True,
            colorbar=dict(title="Efficiency")
        ),
        name='Accuracy vs Time',
        hovertemplate='<b>%{text}</b><br>Accuracy: %{y:.3f}<br>Training Time: %{x:.1f} min<extra></extra>'
    ),
    row=3, col=2
)

# 9. Accuracy vs Efficiency scatter
fig.add_trace(
    go.Scatter(
        x=comparison_df['Efficiency_Score'],
        y=comparison_df['Accuracy'],
        mode='markers',
        text=comparison_df['Model'],
        marker=dict(
            size=comparison_df['Parameters_Millions']*10,
            color=comparison_df['Training_Time_Minutes'],
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(title="Training Time (min)")
        ),
        name='Accuracy vs Efficiency',
        hovertemplate='<b>%{text}</b><br>Accuracy: %{y:.3f}<br>Efficiency: %{x:.4f}<extra></extra>'
    ),
    row=3, col=3
)

# Update axes properties
fig.update_xaxes(tickangle=45, row=1, col=1)
fig.update_xaxes(tickangle=45, row=1, col=2)
fig.update_xaxes(tickangle=45, row=2, col=1)
fig.update_xaxes(tickangle=45, row=2, col=2)
fig.update_xaxes(tickangle=45, row=2, col=3)
fig.update_xaxes(tickangle=45, row=3, col=1)

fig.update_yaxes(title_text="Accuracy", row=1, col=1)
fig.update_yaxes(title_text="F1 Score", row=1, col=2)
fig.update_yaxes(title_text="Validation Accuracy", row=1, col=3)
fig.update_xaxes(title_text="Training Accuracy", row=1, col=3)
fig.update_yaxes(title_text="Time (Minutes)", row=2, col=1)
fig.update_yaxes(title_text="Parameters (Millions)", row=2, col=2)
fig.update_yaxes(title_text="Overfitting Gap", row=2, col=3)
fig.update_yaxes(title_text="Efficiency Score", row=3, col=1)
fig.update_yaxes(title_text="Accuracy", row=3, col=2)
fig.update_xaxes(title_text="Training Time (Minutes)", row=3, col=2)
fig.update_yaxes(title_text="Accuracy", row=3, col=3)
fig.update_xaxes(title_text="Efficiency Score", row=3, col=3)

# Show the interactive plot
fig.show()

# Create a radar chart for comprehensive comparison
categories = ['Accuracy', 'F1_Score_Weighted', 'Efficiency_Score', 'Training_Time_Minutes', 'Parameters_Millions']
categories_norm = [f'{cat}_norm' for cat in categories]

# Normalize values for radar chart (0-1)
radar_df = comparison_df.copy()
for cat in categories:
    radar_df[f'{cat}_norm'] = (radar_df[cat] - radar_df[cat].min()) / (radar_df[cat].max() - radar_df[cat].min())

# Create radar chart
fig_radar = go.Figure()

for i, row in radar_df.iterrows():
    values = row[categories_norm].tolist()
    values += values[:1]  # Close the circle
    
    fig_radar.add_trace(go.Scatterpolar(
        r=values,
        theta=[cat.replace('_', ' ').title() for cat in categories] + 
              [categories[0].replace('_', ' ').title()],
        fill='toself',
        name=row['Model'],
        hovertemplate='<b>%{theta}</b>: %{r:.2f}<extra></extra>'
    ))

fig_radar.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, 1]
        )),
    showlegend=True,
    title='Comprehensive Model Comparison (Radar Chart)',
    title_x=0.5,
    height=600,
    width=800
)

fig_radar.show()

# Create training history comparison plot
fig_history = go.Figure()

for model_name, result in results.items():
    history = result['history']
    fig_history.add_trace(go.Scatter(
        x=list(range(1, len(history['accuracy']) + 1)),
        y=history['accuracy'],
        mode='lines',
        name=f'{model_name} (Train)',
        hovertemplate='Epoch: %{x}<br>Accuracy: %{y:.3f}<extra></extra>'
    ))
    
    if 'val_accuracy' in history:
        fig_history.add_trace(go.Scatter(
            x=list(range(1, len(history['val_accuracy']) + 1)),
            y=history['val_accuracy'],
            mode='lines',
            name=f'{model_name} (Validation)',
            line=dict(dash='dash'),
            hovertemplate='Epoch: %{x}<br>Accuracy: %{y:.3f}<extra></extra>'
        ))

fig_history.update_layout(
    title='Training History Comparison',
    xaxis_title='Epochs',
    yaxis_title='Accuracy',
    hovermode='closest',
    height=600,
    width=1000
)

fig_history.show()

# Detailed analysis and recommendations
print("\n" + "="*80)
print("DETAILED ANALYSIS AND RECOMMENDATIONS")
print("="*80)

best_model = comparison_df.loc[comparison_df['Accuracy'].idxmax()]
worst_model = comparison_df.loc[comparison_df['Accuracy'].idxmin()]
most_efficient = comparison_df.loc[comparison_df['Efficiency_Score'].idxmax()]
lightest_model = comparison_df.loc[comparison_df['Parameters_Millions'].idxmin()]

print(f"\nüèÜ BEST OVERALL MODEL: {best_model['Model']}")
print(f"   üìà Accuracy: {best_model['Accuracy']:.4f}")
print(f"   üéØ F1-Score (Weighted): {best_model['F1_Score_Weighted']:.4f}")
print(f"   ‚è±Ô∏è  Training Time: {best_model['Training_Time_Minutes']:.1f} minutes")
print(f"   üßÆ Parameters: {best_model['Parameters_Millions']:.2f}M")

print(f"\n‚ö° MOST EFFICIENT MODEL: {most_efficient['Model']}")
print(f"   üöÄ Efficiency Score: {most_efficient['Efficiency_Score']:.4f} (Accuracy per minute)")
print(f"   üìà Accuracy: {most_efficient['Accuracy']:.4f}")
print(f"   ‚è±Ô∏è  Training Time: {most_efficient['Training_Time_Minutes']:.1f} minutes")

print(f"\nüìâ WORST PERFORMING MODEL: {worst_model['Model']}")
print(f"   üìà Accuracy: {worst_model['Accuracy']:.4f}")
print(f"   ‚ö†Ô∏è  Potential issues: Overfitting gap: {worst_model['Overfitting_Gap']:.4f}")

print(f"\nüèãÔ∏è  LIGHTEST MODEL: {lightest_model['Model']}")
print(f"   üßÆ Parameters: {lightest_model['Parameters_Millions']:.2f}M")
print(f"   üìà Accuracy: {lightest_model['Accuracy']:.4f}")

# Additional insights
print(f"\nüí° KEY INSIGHTS:")
print(f"   ‚Ä¢ Accuracy range: {comparison_df['Accuracy'].min():.4f} - {comparison_df['Accuracy'].max():.4f}")
print(f"   ‚Ä¢ Training time range: {comparison_df['Training_Time_Minutes'].min():.1f} - {comparison_df['Training_Time_Minutes'].max():.1f} minutes")
print(f"   ‚Ä¢ Average overfitting gap: {comparison_df['Overfitting_Gap'].mean():.4f}")

# Save all results to a single HTML file
with open('model_comparison_report.html', 'w') as f:
    f.write("""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Model Comparison Report</title>
        <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
        <style>
            body { font-family: Arial, sans-serif; margin: 40px; }
            h1 { color: #2c3e50; text-align: center; }
            h2 { color: #3498db; border-bottom: 2px solid #3498db; padding-bottom: 10px; }
            .summary { background-color: #f8f9fa; padding: 20px; border-radius: 5px; margin-bottom: 20px; }
            .insight { background-color: #e8f4f8; padding: 10px; border-left: 4px solid #3498db; margin: 10px 0; }
        </style>
    </head>
    <body>
        <h1>Comprehensive Model Comparison Report</h1>
    """)
    
    # Add summary
    f.write("<h2>Summary</h2>")
    f.write("<div class='summary'>")
    f.write(f"<p><b>Best Model:</b> {best_model['Model']} (Accuracy: {best_model['Accuracy']:.4f}, Params: {best_model['Parameters_Millions']:.2f}M)</p>")
    f.write(f"<p><b>Most Efficient:</b> {most_efficient['Model']} (Efficiency: {most_efficient['Efficiency_Score']:.4f})</p>")
    f.write(f"<p><b>Lightest Model:</b> {lightest_model['Model']} ({lightest_model['Parameters_Millions']:.2f}M params)</p>")
    f.write("</div>")
    
    # Add key insights
    f.write("<h2>Key Insights</h2>")
    f.write("<div class='insight'>")
    f.write(f"<p>Accuracy range: {comparison_df['Accuracy'].min():.4f} - {comparison_df['Accuracy'].max():.4f}</p>")
    f.write(f"<p>Training time range: {comparison_df['Training_Time_Minutes'].min():.1f} - {comparison_df['Training_Time_Minutes'].max():.1f} minutes</p>")
    f.write(f"<p>Average overfitting gap: {comparison_df['Overfitting_Gap'].mean():.4f}</p>")
    f.write("</div>")
    
    f.write("</body></html>")

print("\n‚úÖ Report generated and saved as 'model_comparison_report.html'")
print("‚úÖ CSV comparison saved as 'skipgram_nn_results_comparison.csv'")
print("‚úÖ Visualizations saved as interactive plots (open in notebook or browser)")


## VISUALIZATION: 2 - FINALIZED

In [None]:
import pandas as pd
import numpy as np

# --- Build comparison dataframe from results dictionary ---
comparison_data = []

for model_name, result in results.items():
    history = result.get('history', {})
    comparison_data.append({
        'Model': model_name,
        'Accuracy': result.get('accuracy', 0),
        'F1_Score_Weighted': result.get('f1_score', 0),
        'F1_Score_Macro': result.get('f1_macro', 0),
        'Training_Time_Seconds': result.get('training_time', 0),
        'Training_Time_Minutes': result.get('training_time', 0) / 60,
        'Epochs_Trained': result.get('epochs_trained', len(history.get('accuracy', []))),
        'Final_Train_Accuracy': history.get('accuracy', [0])[-1],
        'Final_Val_Accuracy': history.get('val_accuracy', [0])[-1],
        'Final_Train_Loss': history.get('loss', [0])[-1],
        'Final_Val_Loss': history.get('val_loss', [0])[-1],
        'Best_Val_Accuracy': max(history.get('val_accuracy', [0])),
        'Parameters': result.get('parameters', 0),
        'Parameters_Millions': result.get('parameters', 0) / 1e6,
        'Efficiency_Score': result.get('accuracy', 0) / (result.get('training_time', 1)/60),  # accuracy per minute
        'Overfitting_Gap': history.get('accuracy', [0])[-1] - history.get('val_accuracy', [0])[-1]
    })

comparison_df = pd.DataFrame(comparison_data)

# Sort by accuracy
comparison_df = comparison_df.sort_values('Accuracy', ascending=False)

# Fix column display name if needed
comparison_df['Model'] = comparison_df['Model'].replace({'Bidirectional_LSTM_Skipgram': 'Bidirectional_LSTM'})

# Display rounded table
print(comparison_df.round(4).to_string(index=False))



# Create styled table for better readability
styled_df = comparison_df[['Model', 'Accuracy', 'F1_Score_Weighted', 'F1_Score_Macro', 
                          'Training_Time_Minutes', 'Parameters_Millions', 'Efficiency_Score']].copy()
styled_df.columns = ['Model', 'Accuracy', 'F1 (Weighted)', 'F1 (Macro)', 
                    'Time (Min)', 'Params (M)', 'Efficiency']

# Apply conditional formatting
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: lightgreen' if v else '' for v in is_max]

def highlight_min(s):
    is_min = s == s.min()
    return ['background-color: lightcoral' if v else '' for v in is_min]

styled_display = styled_df.style\
    .format({
        'Accuracy': '{:.3f}',
        'F1 (Weighted)': '{:.3f}', 
        'F1 (Macro)': '{:.3f}',
        'Time (Min)': '{:.1f}',
        'Params (M)': '{:.2f}',
        'Efficiency': '{:.4f}'
    })\
    .apply(highlight_max, subset=['Accuracy', 'F1 (Weighted)', 'F1 (Macro)', 'Efficiency'])\
    .apply(highlight_min, subset=['Time (Min)', 'Params (M)'])\
    .set_properties(**{'text-align': 'center'})\
    .set_table_styles([{
        'selector': 'th',
        'props': [('background-color', '#40466e'), ('color', 'white'), ('font-weight', 'bold')]
    }])

print("\nüìä STYLED COMPARISON TABLE:")
display(styled_display)

# Save detailed results
comparison_df.to_csv('skipgram_nn_detailed_results.csv', index=False)
styled_df.to_csv('skipgram_nn_summary_results.csv', index=False)

# Enhanced visual comparison with Plotly and Seaborn
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns

# Set seaborn style
sns.set_style("whitegrid")
sns.set_palette("husl")

# Create a comprehensive dashboard with Plotly
fig = make_subplots(
    rows=3, cols=3,
    subplot_titles=(
        'üìà Model Accuracy Comparison', 'üéØ F1 Score Comparison', 
        'üîÑ Training vs Validation Accuracy', '‚è∞ Training Time Comparison',
        'üßÆ Model Size (Parameters)', '‚öñÔ∏è Overfitting Analysis',
        'üöÄ Training Efficiency', '‚è±Ô∏è Accuracy vs Training Time',
        'üìä Comprehensive Model Comparison (Radar)'
    ),
    specs=[
        [{"type": "bar"}, {"type": "bar"}, {"type": "scatter"}],
        [{"type": "bar"}, {"type": "bar"}, {"type": "bar"}],
        [{"type": "bar"}, {"type": "scatter"}, {"type": "polar"}]
    ],
    vertical_spacing=0.08,
    horizontal_spacing=0.08
)

# Update layout
fig.update_layout(
    title_text='COMPREHENSIVE MODEL COMPARISON (Skip-gram Embeddings)',
    title_font_size=20,
    title_font_color='darkblue',
    title_x=0.5,
    height=1200,
    width=1400,
    showlegend=True,
    template='plotly_white'
)

# 1. Accuracy comparison
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['Accuracy'],
        marker_color=px.colors.qualitative.Set3,
        text=comparison_df['Accuracy'].round(3),
        textposition='auto',
        name='Accuracy'
    ),
    row=1, col=1
)

# 2. F1 Scores comparison
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['F1_Score_Weighted'],
        name='Weighted F1',
        marker_color='lightblue'
    ),
    row=1, col=2
)

fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['F1_Score_Macro'],
        name='Macro F1',
        marker_color='lightcoral'
    ),
    row=1, col=2
)

# 3. Training vs Validation Accuracy
fig.add_trace(
    go.Scatter(
        x=comparison_df['Final_Train_Accuracy'],
        y=comparison_df['Final_Val_Accuracy'],
        mode='markers+text',
        text=comparison_df['Model'],
        textposition='top center',
        marker=dict(
            size=15,
            color=comparison_df['Accuracy'],
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(title="Accuracy")
        ),
        name='Train vs Val'
    ),
    row=1, col=3
)

# Add reference line
fig.add_shape(
    type="line", line=dict(dash='dash', color='grey'),
    x0=0, y0=0, x1=1, y1=1,
    row=1, col=3
)

# 4. Training time comparison
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['Training_Time_Minutes'],
        marker_color=px.colors.qualitative.Pastel,
        text=comparison_df['Training_Time_Minutes'].round(1),
        textposition='auto',
        name='Training Time (min)'
    ),
    row=2, col=1
)

# 5. Parameters comparison
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['Parameters_Millions'],
        marker_color=px.colors.qualitative.Set2,
        text=comparison_df['Parameters_Millions'].round(2),
        textposition='auto',
        name='Parameters (M)'
    ),
    row=2, col=2
)

# 6. Overfitting analysis
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['Overfitting_Gap'],
        marker_color=['green' if x <= 0 else 'red' for x in comparison_df['Overfitting_Gap']],
        text=comparison_df['Overfitting_Gap'].round(3),
        textposition='auto',
        name='Overfitting Gap'
    ),
    row=2, col=3
)

# 7. Efficiency score
fig.add_trace(
    go.Bar(
        x=comparison_df['Model'],
        y=comparison_df['Efficiency_Score'],
        marker_color=px.colors.sequential.Viridis,
        text=comparison_df['Efficiency_Score'].round(4),
        textposition='auto',
        name='Efficiency'
    ),
    row=3, col=1
)

# 8. Accuracy vs Training Time scatter
fig.add_trace(
    go.Scatter(
        x=comparison_df['Training_Time_Minutes'],
        y=comparison_df['Accuracy'],
        mode='markers+text',
        text=comparison_df['Model'],
        textposition='top center',
        marker=dict(
            size=comparison_df['Parameters_Millions']*5,  # Scale by parameter count
            color=comparison_df['Efficiency_Score'],
            colorscale='Plasma',
            showscale=True,
            colorbar=dict(title="Efficiency")
        ),
        name='Accuracy vs Time'
    ),
    row=3, col=2
)

# 9. Radar chart for comprehensive comparison
categories = ['Accuracy', 'F1_Score_Weighted', 'Efficiency_Score', 'Parameters_Millions']
categories_norm = [f'{cat}_norm' for cat in categories]

# Normalize values for radar chart (0-1)
radar_df = comparison_df.copy()
for cat in categories:
    radar_df[f'{cat}_norm'] = (radar_df[cat] - radar_df[cat].min()) / (radar_df[cat].max() - radar_df[cat].min())

# Plot radar chart
for i, row in radar_df.iterrows():
    values = row[categories_norm].tolist()
    values += values[:1]  # Close the circle
    
    fig.add_trace(
        go.Scatterpolar(
            r=values,
            theta=[cat.replace('_', ' ').title() for cat in categories] + 
                  [categories[0].replace('_', ' ').title()],
            fill='toself',
            name=row['Model'],
            showlegend=True
        ),
        row=3, col=3
    )

# Update axes properties
fig.update_xaxes(tickangle=45, row=1, col=1)
fig.update_xaxes(tickangle=45, row=1, col=2)
fig.update_xaxes(tickangle=45, row=2, col=1)
fig.update_xaxes(tickangle=45, row=2, col=2)
fig.update_xaxes(tickangle=45, row=2, col=3)
fig.update_xaxes(tickangle=45, row=3, col=1)

fig.update_yaxes(title_text="Accuracy", row=1, col=1)
fig.update_yaxes(title_text="F1 Score", row=1, col=2)
fig.update_yaxes(title_text="Validation Accuracy", row=1, col=3)
fig.update_xaxes(title_text="Training Accuracy", row=1, col=3)
fig.update_yaxes(title_text="Time (Minutes)", row=2, col=1)
fig.update_yaxes(title_text="Parameters (Millions)", row=2, col=2)
fig.update_yaxes(title_text="Overfitting Gap", row=2, col=3)
fig.update_yaxes(title_text="Efficiency Score", row=3, col=1)
fig.update_yaxes(title_text="Accuracy", row=3, col=2)
fig.update_xaxes(title_text="Training Time (Minutes)", row=3, col=2)

# Save the interactive plot
fig.write_html("skipgram_models_interactive_dashboard.html")

# Also create a static version with matplotlib for compatibility
plt.figure(figsize=(20, 16))
plt.suptitle('COMPREHENSIVE MODEL COMPARISON (Skip-gram Embeddings)', 
             fontsize=16, fontweight='bold', y=0.98)

# Create subplot grid
gs = plt.GridSpec(3, 3)

# 1. Accuracy comparison
ax1 = plt.subplot(gs[0, 0])
bars = ax1.bar(comparison_df['Model'], comparison_df['Accuracy'], 
               color=sns.color_palette("husl", len(comparison_df)), alpha=0.8)
ax1.set_title('üìà Model Accuracy Comparison', fontweight='bold', pad=20)
ax1.set_ylabel('Accuracy')
ax1.tick_params(axis='x', rotation=70,)
for i, (bar, acc) in enumerate(zip(bars, comparison_df['Accuracy'])):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
             f'{acc:.3f}', ha='center', fontweight='bold')

# 2. F1 Scores comparison
ax2 = plt.subplot(gs[0, 1])
width = 0.35
x = np.arange(len(comparison_df))
ax2.bar(x - width/2, comparison_df['F1_Score_Weighted'], width, 
        label='Weighted F1', alpha=0.8, color='skyblue')
ax2.bar(x + width/2, comparison_df['F1_Score_Macro'], width, 
        label='Macro F1', alpha=0.8, color='lightcoral')
ax2.set_title('üéØ F1 Score Comparison', fontweight='bold', pad=20)
ax2.set_ylabel('F1 Score')
ax2.set_xticks(x)
ax2.set_xticklabels(comparison_df['Model'], rotation=70,)
ax2.legend()

# 3. Training vs Validation Accuracy
ax3 = plt.subplot(gs[0, 2])
scatter = ax3.scatter(comparison_df['Final_Train_Accuracy'], comparison_df['Final_Val_Accuracy'], 
                     c=comparison_df['Accuracy'], cmap='viridis', s=100, alpha=0.8)
for i, txt in enumerate(comparison_df['Model']):
    ax3.annotate(txt, (comparison_df['Final_Train_Accuracy'].iloc[i], 
                      comparison_df['Final_Val_Accuracy'].iloc[i]),
                xytext=(5, 5), textcoords='offset points', fontsize=3)
ax3.plot([0, 1], [0, 1], 'k--', alpha=0.3)
ax3.set_xlabel('Training Accuracy')
ax3.set_ylabel('Validation Accuracy')
ax3.set_title('üîÑ Training vs Validation Accuracy', fontweight='bold', pad=20)
plt.colorbar(scatter, ax=ax3, label='Accuracy')

# 4. Training time comparison
ax4 = plt.subplot(gs[1, 0])
bars = ax4.bar(comparison_df['Model'], comparison_df['Training_Time_Minutes'], 
               color=sns.color_palette("husl", len(comparison_df)), alpha=0.8)
ax4.set_title('‚è∞ Training Time Comparison', fontweight='bold', pad=20)
ax4.set_ylabel('Time (Minutes)')
ax4.tick_params(axis='x', rotation=70,)
for i, (bar, time_val) in enumerate(zip(bars, comparison_df['Training_Time_Minutes'])):
    ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1, 
             f'{time_val:.1f}m', ha='center')

# 5. Parameters comparison
ax5 = plt.subplot(gs[1, 1])
bars = ax5.bar(comparison_df['Model'], comparison_df['Parameters_Millions'], 
               color=sns.color_palette("husl", len(comparison_df)), alpha=0.8)
ax5.set_title('üßÆ Model Size (Parameters)', fontweight='bold', pad=20)
ax5.set_ylabel('Parameters (Millions)')
ax5.tick_params(axis='x', rotation=70,)
for i, (bar, params) in enumerate(zip(bars, comparison_df['Parameters_Millions'])):
    ax5.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
             f'{params:.2f}M', ha='center')

# 6. Overfitting analysis
ax6 = plt.subplot(gs[1, 2])
colors = ['green' if x <= 0 else 'red' for x in comparison_df['Overfitting_Gap']]
bars = ax6.bar(comparison_df['Model'], comparison_df['Overfitting_Gap'], 
               color=colors, alpha=0.8)
ax6.axhline(y=0, color='red', linestyle='--', alpha=0.7)
ax6.set_title('‚öñÔ∏è Overfitting Analysis (Train - Val Accuracy Gap)', fontweight='bold', pad=20)
ax6.set_ylabel('Accuracy Gap')
ax6.tick_params(axis='x', rotation=70,)
for i, (bar, gap) in enumerate(zip(bars, comparison_df['Overfitting_Gap'])):
    ax6.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
             f'{gap:.3f}', ha='center')

# 7. Efficiency score
ax7 = plt.subplot(gs[2, 0])
bars = ax7.bar(comparison_df['Model'], comparison_df['Efficiency_Score'], 
               color=sns.color_palette("husl", len(comparison_df)), alpha=0.8)
ax7.set_title('üöÄ Training Efficiency (Accuracy per Minute)', fontweight='bold', pad=20)
ax7.set_ylabel('Efficiency Score')
ax7.tick_params(axis='x', rotation=70,)
for i, (bar, eff) in enumerate(zip(bars, comparison_df['Efficiency_Score'])):
    ax7.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
             f'{eff:.4f}', ha='center')

# 8. Accuracy vs Training Time scatter
ax8 = plt.subplot(gs[2, 1])
scatter = ax8.scatter(comparison_df['Training_Time_Minutes'], comparison_df['Accuracy'], 
                     c=comparison_df['Efficiency_Score'], cmap='plasma', 
                     s=comparison_df['Parameters_Millions']*50, alpha=0.8)
for i, txt in enumerate(comparison_df['Model']):
    ax8.annotate(txt, (comparison_df['Training_Time_Minutes'].iloc[i], 
                      comparison_df['Accuracy'].iloc[i]),
                xytext=(5, 5), textcoords='offset points', fontsize=3)
ax8.set_xlabel('Training Time (Minutes)')
ax8.set_ylabel('Accuracy')
ax8.set_title('‚è±Ô∏è Accuracy vs Training Time', fontweight='bold', pad=20)
plt.colorbar(scatter, ax=ax8, label='Efficiency Score')

# 9. Radar chart
ax9 = plt.subplot(gs[2, 2], polar=True)
categories = ['Accuracy', 'F1_Score_Weighted', 'Efficiency_Score', 'Parameters_Millions']
categories_norm = [f'{cat}_norm' for cat in categories]

# Normalize values for radar chart (0-1)
radar_df = comparison_df.copy()
for cat in categories:
    radar_df[f'{cat}_norm'] = (radar_df[cat] - radar_df[cat].min()) / (radar_df[cat].max() - radar_df[cat].min())

# Plot radar chart
angles = [n / float(len(categories)) * 2 * np.pi for n in range(len(categories))]
angles += angles[:1]  # Close the circle

colors = sns.color_palette("husl", len(radar_df))
for i, (idx, row) in enumerate(radar_df.iterrows()):
    values = row[categories_norm].tolist()
    values += values[:1]  # Close the circle
    ax9.plot(angles, values, 'o-', linewidth=2, label=row['Model'], color=colors[i])
    ax9.fill(angles, values, alpha=0.1, color=colors[i])

ax9.set_xticks(angles[:-1])
ax9.set_xticklabels([cat.replace('_', ' ').title() for cat in categories])
ax9.set_title('üìä Comprehensive Model Comparison (Radar)', fontweight='bold', pad=20)
ax9.legend(bbox_to_anchor=(1.3, 1), loc='upper left')

plt.tight_layout()
plt.savefig('skipgram_models_comprehensive_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# Detailed analysis and recommendations
print("\n" + "="*80)
print("DETAILED ANALYSIS AND RECOMMENDATIONS")
print("="*80)

best_model = comparison_df.loc[comparison_df['Accuracy'].idxmax()]
worst_model = comparison_df.loc[comparison_df['Accuracy'].idxmin()]
most_efficient = comparison_df.loc[comparison_df['Efficiency_Score'].idxmax()]
lightest_model = comparison_df.loc[comparison_df['Parameters_Millions'].idxmin()]

print(f"\nüèÜ BEST OVERALL MODEL: {best_model['Model']}")
print(f"   üìà Accuracy: {best_model['Accuracy']:.4f}")
print(f"   üéØ F1-Score (Weighted): {best_model['F1_Score_Weighted']:.4f}")
print(f"   ‚è±Ô∏è  Training Time: {best_model['Training_Time_Minutes']:.1f} minutes")
print(f"   üßÆ Parameters: {best_model['Parameters_Millions']:.2f}M")

print(f"\n‚ö° MOST EFFICIENT MODEL: {most_efficient['Model']}")
print(f"   üöÄ Efficiency Score: {most_efficient['Efficiency_Score']:.4f} (Accuracy per minute)")
print(f"   üìà Accuracy: {most_efficient['Accuracy']:.4f}")
print(f"   ‚è±Ô∏è  Training Time: {most_efficient['Training_Time_Minutes']:.1f} minutes")

print(f"\nüìâ WORST PERFORMING MODEL: {worst_model['Model']}")
print(f"   üìà Accuracy: {worst_model['Accuracy']:.4f}")
print(f"   ‚ö†Ô∏è  Potential issues: Overfitting gap: {worst_model['Overfitting_Gap']:.4f}")

print(f"\nüèãÔ∏è  LIGHTEST MODEL: {lightest_model['Model']}")
print(f"   üßÆ Parameters: {lightest_model['Parameters_Millions']:.2f}M")
print(f"   üìà Accuracy: {lightest_model['Accuracy']:.4f}")

# Additional insights
print(f"\nüí° KEY INSIGHTS:")
print(f"   ‚Ä¢ Accuracy range: {comparison_df['Accuracy'].min():.4f} - {comparison_df['Accuracy'].max():.4f}")
print(f"   ‚Ä¢ Training time range: {comparison_df['Training_Time_Minutes'].min():.1f} - {comparison_df['Training_Time_Minutes'].max():.1f} minutes")
print(f"   ‚Ä¢ Average overfitting gap: {comparison_df['Overfitting_Gap'].mean():.4f}")

# Save summary report
with open('model_comparison_summary.txt', 'w') as f:
    f.write("MODEL COMPARISON SUMMARY REPORT\n")
    f.write("="*50 + "\n\n")
    f.write(f"Best Model: {best_model['Model']}\n")
    f.write(f"Accuracy: {best_model['Accuracy']:.4f}\n")
    f.write(f"Worst Model: {worst_model['Model']}\n")
    f.write(f"Accuracy: {worst_model['Accuracy']:.4f}\n\n")
    f.write("Detailed Results:\n")
    f.write(comparison_df.round(4).to_string())

print(f"\n‚úÖ Results saved to:")
print("   - skipgram_nn_detailed_results.csv")
print("   - skipgram_nn_summary_results.csv") 
print("   - skipgram_models_comprehensive_comparison.png")
print("   - skipgram_models_interactive_dashboard.html")
print("   - model_comparison_summary.txt")

## VISUALIZATION: 3

In [None]:
# Create comprehensive results comparison
print("Creating results comparison...")
comparison_data = []

for model_name, result in results.items():
    comparison_data.append({
        'Model': model_name,
        'Accuracy': result['accuracy'],
        'F1_Score': result['f1_score'],
        'Training_Time_Seconds': result['training_time'],
        'Epochs_Trained': result.get('epochs_trained', len(result['history']['accuracy'])),
        'Final_Train_Accuracy': result['history']['accuracy'][-1],
        'Final_Val_Accuracy': result['history']['val_accuracy'][-1] if 'val_accuracy' in result['history'] else 0,
        'Parameters': result.get('parameters', 0)   # ‚úÖ use saved parameter count
    })

comparison_df = pd.DataFrame(comparison_data)
print("Results Comparison Table:")
print(comparison_df.round(4))

# Save results
comparison_df.to_csv('skipgram_nn_results_comparison.csv', index=False)

# Visual comparison
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Accuracy comparison
axes[0, 0].bar(comparison_df['Model'], comparison_df['Accuracy'])
axes[0, 0].set_title('Model Accuracy Comparison (Skip-gram)')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].tick_params(axis='x', rotation=45, labelsize=8)
for i, v in enumerate(comparison_df['Accuracy']):
    axes[0, 0].text(i, v + 0.01, f'{v:.3f}', ha='center')

# F1-Score comparison
axes[0, 1].bar(comparison_df['Model'], comparison_df['F1_Score'])
axes[0, 1].set_title('Model F1-Score Comparison (Skip-gram)')
axes[0, 1].set_ylabel('F1-Score')
axes[0, 1].tick_params(axis='x', rotation=45, labelsize=8)
for i, v in enumerate(comparison_df['F1_Score']):
    axes[0, 1].text(i, v + 0.01, f'{v:.3f}', ha='center')

# Training time comparison
axes[1, 0].bar(comparison_df['Model'], comparison_df['Training_Time_Seconds'])
axes[1, 0].set_title('Training Time Comparison (Seconds)')
axes[1, 0].set_ylabel('Time (Seconds)')
axes[1, 0].tick_params(axis='x', rotation=45, labelsize=8)
for i, v in enumerate(comparison_df['Training_Time_Seconds']):
    axes[1, 0].text(i, v + 5, f'{v:.0f}s', ha='center')

# Parameters comparison (in millions)
axes[1, 1].bar(comparison_df['Model'], comparison_df['Parameters'] / 1e6)
axes[1, 1].set_title('Number of Parameters (Millions)')
axes[1, 1].set_ylabel('Parameters (M)')
axes[1, 1].tick_params(axis='x', rotation=45, labelsize=8)
for i, v in enumerate(comparison_df['Parameters'] / 1e6):
    axes[1, 1].text(i, v + 0.1, f'{v:.2f}M', ha='center')

plt.tight_layout()
plt.savefig('skipgram_models_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# Find best and worst models
best_model = comparison_df.loc[comparison_df['Accuracy'].idxmax()]
worst_model = comparison_df.loc[comparison_df['Accuracy'].idxmin()]

print(f"\nüèÜ BEST MODEL: {best_model['Model']}")
print(f"   Accuracy: {best_model['Accuracy']:.4f}")
print(f"   F1-Score: {best_model['F1_Score']:.4f}")
print(f"   Training Time: {best_model['Training_Time_Seconds']:.2f} seconds")

print(f"\nüìâ WORST MODEL: {worst_model['Model']}")
print(f"   Accuracy: {worst_model['Accuracy']:.4f}")
print(f"   F1-Score: {worst_model['F1_Score']:.4f}")
print(f"   Training Time: {worst_model['Training_Time_Seconds']:.2f} seconds")


# Reference Code for Analysis - Good to Keep

In [None]:
# Training History Plot take from here

# ------------------------------------------------------------------

# # Create comprehensive results comparison with enhanced visualizations
# print("Creating comprehensive results comparison...")
# comparison_data = []

# for model_name, result in results.items():
#     history = result['history']
#     comparison_data.append({
#         'Model': model_name,
#         'Accuracy': result['accuracy'],
#         'F1_Score_Weighted': result['f1_score'],
#         'F1_Score_Macro': result['f1_macro'],
#         'Training_Time_Seconds': result['training_time'],
#         'Training_Time_Minutes': result['training_time'] / 60,
#         'Epochs_Trained': result.get('epochs_trained', len(history['accuracy'])),
#         'Final_Train_Accuracy': history['accuracy'][-1],
#         'Final_Val_Accuracy': history['val_accuracy'][-1] if 'val_accuracy' in history else 0,
#         'Final_Train_Loss': history['loss'][-1],
#         'Final_Val_Loss': history['val_loss'][-1] if 'val_loss' in history else 0,
#         'Best_Val_Accuracy': max(history['val_accuracy']) if 'val_accuracy' in history else 0,
#         'Parameters': result['model'].count_params(),
#         'Parameters_Millions': result['model'].count_params() / 1e6,
#         'Efficiency_Score': result['accuracy'] / (result['training_time'] / 60),  # Accuracy per minute
#         'Overfitting_Gap': history['accuracy'][-1] - (history['val_accuracy'][-1] if 'val_accuracy' in history else 0)
#     })

# comparison_df = pd.DataFrame(comparison_data)

# # Sort by accuracy for better visualization
# comparison_df = comparison_df.sort_values('Accuracy', ascending=False)

# print("\n" + "="*80)
# print("COMPREHENSIVE RESULTS COMPARISON")
# print("="*80)
# print(comparison_df.round(4).to_string(index=False))

# # Create styled table for better readability
# styled_df = comparison_df[['Model', 'Accuracy', 'F1_Score_Weighted', 'F1_Score_Macro', 
#                           'Training_Time_Minutes', 'Parameters_Millions', 'Efficiency_Score']].copy()
# styled_df.columns = ['Model', 'Accuracy', 'F1 (Weighted)', 'F1 (Macro)', 
#                     'Time (Min)', 'Params (M)', 'Efficiency']

# # Apply conditional formatting
# def highlight_max(s):
#     is_max = s == s.max()
#     return ['background-color: lightgreen' if v else '' for v in is_max]

# def highlight_min(s):
#     is_min = s == s.min()
#     return ['background-color: lightcoral' if v else '' for v in is_min]

# styled_display = styled_df.style\
#     .format({
#         'Accuracy': '{:.3f}',
#         'F1 (Weighted)': '{:.3f}', 
#         'F1 (Macro)': '{:.3f}',
#         'Time (Min)': '{:.1f}',
#         'Params (M)': '{:.2f}',
#         'Efficiency': '{:.4f}'
#     })\
#     .apply(highlight_max, subset=['Accuracy', 'F1 (Weighted)', 'F1 (Macro)', 'Efficiency'])\
#     .apply(highlight_min, subset=['Time (Min)', 'Params (M)'])\
#     .set_properties(**{'text-align': 'center'})\
#     .set_table_styles([{
#         'selector': 'th',
#         'props': [('background-color', '#40466e'), ('color', 'white'), ('font-weight', 'bold')]
#     }])

# print("\nüìä STYLED COMPARISON TABLE:")
# display(styled_display)

# # Create interactive visualizations with Plotly
# import plotly.express as px
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots
# import plotly.io as pio

# # Set Plotly template
# pio.templates.default = "plotly_white"

# # Create a comprehensive dashboard with subplots
# fig = make_subplots(
#     rows=3, cols=3,
#     subplot_titles=(
#         'Model Accuracy Comparison', 'F1 Score Comparison', 
#         'Training vs Validation Accuracy', 'Training Time Comparison',
#         'Model Size (Parameters)', 'Overfitting Analysis',
#         'Training Efficiency', 'Accuracy vs Training Time', 
#         'Accuracy vs Efficiency'
#     ),
#     specs=[
#         [{"type": "bar"}, {"type": "bar"}, {"type": "scatter"}],
#         [{"type": "bar"}, {"type": "bar"}, {"type": "bar"}],
#         [{"type": "bar"}, {"type": "scatter"}, {"type": "scatter"}]
#     ],
#     vertical_spacing=0.08,
#     horizontal_spacing=0.08
# )

# # Update layout
# fig.update_layout(
#     title_text='COMPREHENSIVE MODEL COMPARISON (Skip-gram Embeddings)',
#     title_font_size=20,
#     title_font_color='darkblue',
#     title_x=0.5,
#     height=1200,
#     width=1400,
#     showlegend=True,
#     template='plotly_white'
# )

# # 1. Accuracy comparison
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['Accuracy'],
#         marker_color=px.colors.qualitative.Set3,
#         text=comparison_df['Accuracy'].round(3),
#         textposition='auto',
#         name='Accuracy',
#         hovertemplate='<b>%{x}</b><br>Accuracy: %{y:.3f}<extra></extra>'
#     ),
#     row=1, col=1
# )

# # 2. F1 Scores comparison
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['F1_Score_Weighted'],
#         name='Weighted F1',
#         marker_color='lightblue',
#         hovertemplate='<b>%{x}</b><br>Weighted F1: %{y:.3f}<extra></extra>'
#     ),
#     row=1, col=2
# )

# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['F1_Score_Macro'],
#         name='Macro F1',
#         marker_color='lightcoral',
#         hovertemplate='<b>%{x}</b><br>Macro F1: %{y:.3f}<extra></extra>'
#     ),
#     row=1, col=2
# )

# # 3. Training vs Validation Accuracy
# fig.add_trace(
#     go.Scatter(
#         x=comparison_df['Final_Train_Accuracy'],
#         y=comparison_df['Final_Val_Accuracy'],
#         mode='markers+text',
#         text=comparison_df['Model'],
#         textposition='top center',
#         marker=dict(
#             size=15,
#             color=comparison_df['Accuracy'],
#             colorscale='Viridis',
#             showscale=True,
#             colorbar=dict(title="Accuracy")
#         ),
#         name='Train vs Val',
#         hovertemplate='<b>%{text}</b><br>Train Accuracy: %{x:.3f}<br>Val Accuracy: %{y:.3f}<extra></extra>'
#     ),
#     row=1, col=3
# )

# # Add reference line
# fig.add_shape(
#     type="line", line=dict(dash='dash', color='grey'),
#     x0=0, y0=0, x1=1, y1=1,
#     row=1, col=3
# )

# # 4. Training time comparison
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['Training_Time_Minutes'],
#         marker_color=px.colors.qualitative.Pastel,
#         text=comparison_df['Training_Time_Minutes'].round(1),
#         textposition='auto',
#         name='Training Time (min)',
#         hovertemplate='<b>%{x}</b><br>Training Time: %{y:.1f} minutes<extra></extra>'
#     ),
#     row=2, col=1
# )

# # 5. Parameters comparison
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['Parameters_Millions'],
#         marker_color=px.colors.qualitative.Set2,
#         text=comparison_df['Parameters_Millions'].round(2),
#         textposition='auto',
#         name='Parameters (M)',
#         hovertemplate='<b>%{x}</b><br>Parameters: %{y:.2f}M<extra></extra>'
#     ),
#     row=2, col=2
# )

# # 6. Overfitting analysis
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['Overfitting_Gap'],
#         marker_color=['green' if x <= 0 else 'red' for x in comparison_df['Overfitting_Gap']],
#         text=comparison_df['Overfitting_Gap'].round(3),
#         textposition='auto',
#         name='Overfitting Gap',
#         hovertemplate='<b>%{x}</b><br>Overfitting Gap: %{y:.3f}<extra></extra>'
#     ),
#     row=2, col=3
# )

# # 7. Efficiency score
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['Efficiency_Score'],
#         marker_color=px.colors.sequential.Viridis,
#         text=comparison_df['Efficiency_Score'].round(4),
#         textposition='auto',
#         name='Efficiency',
#         hovertemplate='<b>%{x}</b><br>Efficiency: %{y:.4f}<extra></extra>'
#     ),
#     row=3, col=1
# )

# # 8. Accuracy vs Training Time scatter
# fig.add_trace(
#     go.Scatter(
#         x=comparison_df['Training_Time_Minutes'],
#         y=comparison_df['Accuracy'],
#         mode='markers',
#         text=comparison_df['Model'],
#         marker=dict(
#             size=comparison_df['Parameters_Millions']*10,
#             color=comparison_df['Efficiency_Score'],
#             colorscale='Plasma',
#             showscale=True,
#             colorbar=dict(title="Efficiency")
#         ),
#         name='Accuracy vs Time',
#         hovertemplate='<b>%{text}</b><br>Accuracy: %{y:.3f}<br>Training Time: %{x:.1f} min<extra></extra>'
#     ),
#     row=3, col=2
# )

# # 9. Accuracy vs Efficiency scatter
# fig.add_trace(
#     go.Scatter(
#         x=comparison_df['Efficiency_Score'],
#         y=comparison_df['Accuracy'],
#         mode='markers',
#         text=comparison_df['Model'],
#         marker=dict(
#             size=comparison_df['Parameters_Millions']*10,
#             color=comparison_df['Training_Time_Minutes'],
#             colorscale='Viridis',
#             showscale=True,
#             colorbar=dict(title="Training Time (min)")
#         ),
#         name='Accuracy vs Efficiency',
#         hovertemplate='<b>%{text}</b><br>Accuracy: %{y:.3f}<br>Efficiency: %{x:.4f}<extra></extra>'
#     ),
#     row=3, col=3
# )

# # Update axes properties
# fig.update_xaxes(tickangle=45, row=1, col=1)
# fig.update_xaxes(tickangle=45, row=1, col=2)
# fig.update_xaxes(tickangle=45, row=2, col=1)
# fig.update_xaxes(tickangle=45, row=2, col=2)
# fig.update_xaxes(tickangle=45, row=2, col=3)
# fig.update_xaxes(tickangle=45, row=3, col=1)

# fig.update_yaxes(title_text="Accuracy", row=1, col=1)
# fig.update_yaxes(title_text="F1 Score", row=1, col=2)
# fig.update_yaxes(title_text="Validation Accuracy", row=1, col=3)
# fig.update_xaxes(title_text="Training Accuracy", row=1, col=3)
# fig.update_yaxes(title_text="Time (Minutes)", row=2, col=1)
# fig.update_yaxes(title_text="Parameters (Millions)", row=2, col=2)
# fig.update_yaxes(title_text="Overfitting Gap", row=2, col=3)
# fig.update_yaxes(title_text="Efficiency Score", row=3, col=1)
# fig.update_yaxes(title_text="Accuracy", row=3, col=2)
# fig.update_xaxes(title_text="Training Time (Minutes)", row=3, col=2)
# fig.update_yaxes(title_text="Accuracy", row=3, col=3)
# fig.update_xaxes(title_text="Efficiency Score", row=3, col=3)

# # Show the interactive plot
# fig.show()

# # Create a radar chart for comprehensive comparison
# categories = ['Accuracy', 'F1_Score_Weighted', 'Efficiency_Score', 'Training_Time_Minutes', 'Parameters_Millions']
# categories_norm = [f'{cat}_norm' for cat in categories]

# # Normalize values for radar chart (0-1)
# radar_df = comparison_df.copy()
# for cat in categories:
#     radar_df[f'{cat}_norm'] = (radar_df[cat] - radar_df[cat].min()) / (radar_df[cat].max() - radar_df[cat].min())

# # Create radar chart
# fig_radar = go.Figure()

# for i, row in radar_df.iterrows():
#     values = row[categories_norm].tolist()
#     values += values[:1]  # Close the circle
    
#     fig_radar.add_trace(go.Scatterpolar(
#         r=values,
#         theta=[cat.replace('_', ' ').title() for cat in categories] + 
#               [categories[0].replace('_', ' ').title()],
#         fill='toself',
#         name=row['Model'],
#         hovertemplate='<b>%{theta}</b>: %{r:.2f}<extra></extra>'
#     ))

# fig_radar.update_layout(
#     polar=dict(
#         radialaxis=dict(
#             visible=True,
#             range=[0, 1]
#         )),
#     showlegend=True,
#     title='Comprehensive Model Comparison (Radar Chart)',
#     title_x=0.5,
#     height=600,
#     width=800
# )

# fig_radar.show()

# # Create training history comparison plot
# fig_history = go.Figure()

# for model_name, result in results.items():
#     history = result['history']
#     fig_history.add_trace(go.Scatter(
#         x=list(range(1, len(history['accuracy']) + 1)),
#         y=history['accuracy'],
#         mode='lines',
#         name=f'{model_name} (Train)',
#         hovertemplate='Epoch: %{x}<br>Accuracy: %{y:.3f}<extra></extra>'
#     ))
    
#     if 'val_accuracy' in history:
#         fig_history.add_trace(go.Scatter(
#             x=list(range(1, len(history['val_accuracy']) + 1)),
#             y=history['val_accuracy'],
#             mode='lines',
#             name=f'{model_name} (Validation)',
#             line=dict(dash='dash'),
#             hovertemplate='Epoch: %{x}<br>Accuracy: %{y:.3f}<extra></extra>'
#         ))

# fig_history.update_layout(
#     title='Training History Comparison',
#     xaxis_title='Epochs',
#     yaxis_title='Accuracy',
#     hovermode='closest',
#     height=600,
#     width=1000
# )

# fig_history.show()

# # Detailed analysis and recommendations
# print("\n" + "="*80)
# print("DETAILED ANALYSIS AND RECOMMENDATIONS")
# print("="*80)

# best_model = comparison_df.loc[comparison_df['Accuracy'].idxmax()]
# worst_model = comparison_df.loc[comparison_df['Accuracy'].idxmin()]
# most_efficient = comparison_df.loc[comparison_df['Efficiency_Score'].idxmax()]
# lightest_model = comparison_df.loc[comparison_df['Parameters_Millions'].idxmin()]

# print(f"\nüèÜ BEST OVERALL MODEL: {best_model['Model']}")
# print(f"   üìà Accuracy: {best_model['Accuracy']:.4f}")
# print(f"   üéØ F1-Score (Weighted): {best_model['F1_Score_Weighted']:.4f}")
# print(f"   ‚è±Ô∏è  Training Time: {best_model['Training_Time_Minutes']:.1f} minutes")
# print(f"   üßÆ Parameters: {best_model['Parameters_Millions']:.2f}M")

# print(f"\n‚ö° MOST EFFICIENT MODEL: {most_efficient['Model']}")
# print(f"   üöÄ Efficiency Score: {most_efficient['Efficiency_Score']:.4f} (Accuracy per minute)")
# print(f"   üìà Accuracy: {most_efficient['Accuracy']:.4f}")
# print(f"   ‚è±Ô∏è  Training Time: {most_efficient['Training_Time_Minutes']:.1f} minutes")

# print(f"\nüìâ WORST PERFORMING MODEL: {worst_model['Model']}")
# print(f"   üìà Accuracy: {worst_model['Accuracy']:.4f}")
# print(f"   ‚ö†Ô∏è  Potential issues: Overfitting gap: {worst_model['Overfitting_Gap']:.4f}")

# print(f"\nüèãÔ∏è  LIGHTEST MODEL: {lightest_model['Model']}")
# print(f"   üßÆ Parameters: {lightest_model['Parameters_Millions']:.2f}M")
# print(f"   üìà Accuracy: {lightest_model['Accuracy']:.4f}")

# # Additional insights
# print(f"\nüí° KEY INSIGHTS:")
# print(f"   ‚Ä¢ Accuracy range: {comparison_df['Accuracy'].min():.4f} - {comparison_df['Accuracy'].max():.4f}")
# print(f"   ‚Ä¢ Training time range: {comparison_df['Training_Time_Minutes'].min():.1f} - {comparison_df['Training_Time_Minutes'].max():.1f} minutes")
# print(f"   ‚Ä¢ Average overfitting gap: {comparison_df['Overfitting_Gap'].mean():.4f}")

# # Save all results to a single HTML file
# with open('model_comparison_report.html', 'w') as f:
#     f.write("""
#     <!DOCTYPE html>
#     <html>
#     <head>
#         <title>Model Comparison Report</title>
#         <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
#         <style>
#             body { font-family: Arial, sans-serif; margin: 40px; }
#             h1 { color: #2c3e50; text-align: center; }
#             h2 { color: #3498db; border-bottom: 2px solid #3498db; padding-bottom: 10px; }
#             .summary { background-color: #f8f9fa; padding: 20px; border-radius: 5px; margin-bottom: 20px; }
#             .insight { background-color: #e8f4f8; padding: 15px; border-left: 5px solid #3498db; margin: 10px 0; }
#             table { width: 100%; border-collapse: collapse; margin: 20px 0; }
#             th, td { padding: 12px; text-align: left; border-bottom: 1px solid #ddd; }
#             th { background-color: #3498db; color: white; }
#             tr:hover { background-color: #f5f5f5; }
#         </style>
#     </head>
#     <body>
#         <h1>Model Comparison Report</h1>
#         <div class="summary">
#             <h2>Executive Summary</h2>
#             <p><strong>Best Model:</strong> """ + best_model['Model'] + """ (Accuracy: """ + f"{best_model['Accuracy']:.4f}" + """)</p>
#             <p><strong>Most Efficient Model:</strong> """ + most_efficient['Model'] + """ (Efficiency: """ + f"{most_efficient['Efficiency_Score']:.4f}" + """)</p>
#             <p><strong>Lightest Model:</strong> """ + lightest_model['Model'] + """ (Parameters: """ + f"{lightest_model['Parameters_Millions']:.2f}M" + """)</p>
#         </div>
        
#         <h2>Detailed Results</h2>
#         """ + comparison_df.to_html(index=False) + """
        
#         <div class="insight">
#             <h3>Key Insights</h3>
#             <p>‚Ä¢ Accuracy range: """ + f"{comparison_df['Accuracy'].min():.4f} - {comparison_df['Accuracy'].max():.4f}" + """</p>
#             <p>‚Ä¢ Training time range: """ + f"{comparison_df['Training_Time_Minutes'].min():.1f} - {comparison_df['Training_Time_Minutes'].max():.1f} minutes" + """</p>
#             <p>‚Ä¢ Average overfitting gap: """ + f"{comparison_df['Overfitting_Gap'].mean():.4f}" + """</p>
#         </div>
        
#         <h2>Interactive Visualizations</h2>
#         <p>The interactive visualizations are displayed above in your Python environment. To save them, use the camera icon in the Plotly toolbar.</p>
#     </body>
#     </html>
#     """)

# # Save all data to a single Excel file
# with pd.ExcelWriter('model_comparison_results.xlsx') as writer:
#     comparison_df.to_excel(writer, sheet_name='Detailed Results', index=False)
#     styled_df.to_excel(writer, sheet_name='Summary Results', index=False)
    
#     # Add training history for each model
#     for model_name, result in results.items():
#         history_df = pd.DataFrame(result['history'])
#         history_df.to_excel(writer, sheet_name=f'{model_name}_History', index=False)

# print(f"\n‚úÖ All results saved to:")
# print("   - model_comparison_report.html (comprehensive report)")
# print("   - model_comparison_results.xlsx (detailed data)")

In [None]:
# # Create comprehensive results comparison with enhanced visualizations
# print("Creating comprehensive results comparison...")
# comparison_data = []

# for model_name, result in results.items():
#     history = result['history']
#     comparison_data.append({
#         'Model': model_name,
#         'Accuracy': result['accuracy'],
#         'F1_Score_Weighted': result['f1_score'],
#         'F1_Score_Macro': result['f1_macro'],
#         'Training_Time_Seconds': result['training_time'],
#         'Training_Time_Minutes': result['training_time'] / 60,
#         'Epochs_Trained': result.get('epochs_trained', len(history['accuracy'])),
#         'Final_Train_Accuracy': history['accuracy'][-1],
#         'Final_Val_Accuracy': history['val_accuracy'][-1] if 'val_accuracy' in history else 0,
#         'Final_Train_Loss': history['loss'][-1],
#         'Final_Val_Loss': history['val_loss'][-1] if 'val_loss' in history else 0,
#         'Best_Val_Accuracy': max(history['val_accuracy']) if 'val_accuracy' in history else 0,
#         'Parameters': result['model'].count_params(),
#         'Parameters_Millions': result['model'].count_params() / 1e6,
#         'Efficiency_Score': result['accuracy'] / (result['training_time'] / 60),  # Accuracy per minute
#         'Overfitting_Gap': history['accuracy'][-1] - (history['val_accuracy'][-1] if 'val_accuracy' in history else 0)
#     })

# comparison_df = pd.DataFrame(comparison_data)

# # Sort by accuracy for better visualization
# comparison_df = comparison_df.sort_values('Accuracy', ascending=False)

# print("\n" + "="*80)
# print("COMPREHENSIVE RESULTS COMPARISON")
# print("="*80)
# print(comparison_df.round(4).to_string(index=False))

# # Create styled table for better readability
# styled_df = comparison_df[['Model', 'Accuracy', 'F1_Score_Weighted', 'F1_Score_Macro', 
#                           'Training_Time_Minutes', 'Parameters_Millions', 'Efficiency_Score']].copy()
# styled_df.columns = ['Model', 'Accuracy', 'F1 (Weighted)', 'F1 (Macro)', 
#                     'Time (Min)', 'Params (M)', 'Efficiency']

# # Apply conditional formatting
# def highlight_max(s):
#     is_max = s == s.max()
#     return ['background-color: lightgreen' if v else '' for v in is_max]

# def highlight_min(s):
#     is_min = s == s.min()
#     return ['background-color: lightcoral' if v else '' for v in is_min]

# styled_display = styled_df.style\
#     .format({
#         'Accuracy': '{:.3f}',
#         'F1 (Weighted)': '{:.3f}', 
#         'F1 (Macro)': '{:.3f}',
#         'Time (Min)': '{:.1f}',
#         'Params (M)': '{:.2f}',
#         'Efficiency': '{:.4f}'
#     })\
#     .apply(highlight_max, subset=['Accuracy', 'F1 (Weighted)', 'F1 (Macro)', 'Efficiency'])\
#     .apply(highlight_min, subset=['Time (Min)', 'Params (M)'])\
#     .set_properties(**{'text-align': 'center'})\
#     .set_table_styles([{
#         'selector': 'th',
#         'props': [('background-color', '#40466e'), ('color', 'white'), ('font-weight', 'bold')]
#     }])

# print("\nüìä STYLED COMPARISON TABLE:")
# display(styled_display)

# # Save detailed results
# comparison_df.to_csv('skipgram_nn_detailed_results.csv', index=False)
# styled_df.to_csv('skipgram_nn_summary_results.csv', index=False)

# # Enhanced visual comparison with Plotly and Seaborn
# import plotly.express as px
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots
# import seaborn as sns

# # Set seaborn style
# sns.set_style("whitegrid")
# sns.set_palette("husl")

# # Create a comprehensive dashboard with Plotly
# fig = make_subplots(
#     rows=3, cols=3,
#     subplot_titles=(
#         'üìà Model Accuracy Comparison', 'üéØ F1 Score Comparison', 
#         'üîÑ Training vs Validation Accuracy', '‚è∞ Training Time Comparison',
#         'üßÆ Model Size (Parameters)', '‚öñÔ∏è Overfitting Analysis',
#         'üöÄ Training Efficiency', '‚è±Ô∏è Accuracy vs Training Time',
#         'üìä Comprehensive Model Comparison (Radar)'
#     ),
#     specs=[
#         [{"type": "bar"}, {"type": "bar"}, {"type": "scatter"}],
#         [{"type": "bar"}, {"type": "bar"}, {"type": "bar"}],
#         [{"type": "bar"}, {"type": "scatter"}, {"type": "polar"}]
#     ],
#     vertical_spacing=0.08,
#     horizontal_spacing=0.08
# )

# # Update layout
# fig.update_layout(
#     title_text='COMPREHENSIVE MODEL COMPARISON (Skip-gram Embeddings)',
#     title_font_size=20,
#     title_font_color='darkblue',
#     title_x=0.5,
#     height=1200,
#     width=1400,
#     showlegend=True,
#     template='plotly_white'
# )

# # 1. Accuracy comparison
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['Accuracy'],
#         marker_color=px.colors.qualitative.Set3,
#         text=comparison_df['Accuracy'].round(3),
#         textposition='auto',
#         name='Accuracy'
#     ),
#     row=1, col=1
# )

# # 2. F1 Scores comparison
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['F1_Score_Weighted'],
#         name='Weighted F1',
#         marker_color='lightblue'
#     ),
#     row=1, col=2
# )

# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['F1_Score_Macro'],
#         name='Macro F1',
#         marker_color='lightcoral'
#     ),
#     row=1, col=2
# )

# # 3. Training vs Validation Accuracy
# fig.add_trace(
#     go.Scatter(
#         x=comparison_df['Final_Train_Accuracy'],
#         y=comparison_df['Final_Val_Accuracy'],
#         mode='markers+text',
#         text=comparison_df['Model'],
#         textposition='top center',
#         marker=dict(
#             size=15,
#             color=comparison_df['Accuracy'],
#             colorscale='Viridis',
#             showscale=True,
#             colorbar=dict(title="Accuracy")
#         ),
#         name='Train vs Val'
#     ),
#     row=1, col=3
# )

# # Add reference line
# fig.add_shape(
#     type="line", line=dict(dash='dash', color='grey'),
#     x0=0, y0=0, x1=1, y1=1,
#     row=1, col=3
# )

# # 4. Training time comparison
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['Training_Time_Minutes'],
#         marker_color=px.colors.qualitative.Pastel,
#         text=comparison_df['Training_Time_Minutes'].round(1),
#         textposition='auto',
#         name='Training Time (min)'
#     ),
#     row=2, col=1
# )

# # 5. Parameters comparison
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['Parameters_Millions'],
#         marker_color=px.colors.qualitative.Set2,
#         text=comparison_df['Parameters_Millions'].round(2),
#         textposition='auto',
#         name='Parameters (M)'
#     ),
#     row=2, col=2
# )

# # 6. Overfitting analysis
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['Overfitting_Gap'],
#         marker_color=['green' if x <= 0 else 'red' for x in comparison_df['Overfitting_Gap']],
#         text=comparison_df['Overfitting_Gap'].round(3),
#         textposition='auto',
#         name='Overfitting Gap'
#     ),
#     row=2, col=3
# )

# # 7. Efficiency score
# fig.add_trace(
#     go.Bar(
#         x=comparison_df['Model'],
#         y=comparison_df['Efficiency_Score'],
#         marker_color=px.colors.sequential.Viridis,
#         text=comparison_df['Efficiency_Score'].round(4),
#         textposition='auto',
#         name='Efficiency'
#     ),
#     row=3, col=1
# )

# # 8. Accuracy vs Training Time scatter
# fig.add_trace(
#     go.Scatter(
#         x=comparison_df['Training_Time_Minutes'],
#         y=comparison_df['Accuracy'],
#         mode='markers+text',
#         text=comparison_df['Model'],
#         textposition='top center',
#         marker=dict(
#             size=comparison_df['Parameters_Millions']*5,  # Scale by parameter count
#             color=comparison_df['Efficiency_Score'],
#             colorscale='Plasma',
#             showscale=True,
#             colorbar=dict(title="Efficiency")
#         ),
#         name='Accuracy vs Time'
#     ),
#     row=3, col=2
# )

# # 9. Radar chart for comprehensive comparison
# categories = ['Accuracy', 'F1_Score_Weighted', 'Efficiency_Score', 'Parameters_Millions']
# categories_norm = [f'{cat}_norm' for cat in categories]

# # Normalize values for radar chart (0-1)
# radar_df = comparison_df.copy()
# for cat in categories:
#     radar_df[f'{cat}_norm'] = (radar_df[cat] - radar_df[cat].min()) / (radar_df[cat].max() - radar_df[cat].min())

# # Plot radar chart
# for i, row in radar_df.iterrows():
#     values = row[categories_norm].tolist()
#     values += values[:1]  # Close the circle
    
#     fig.add_trace(
#         go.Scatterpolar(
#             r=values,
#             theta=[cat.replace('_', ' ').title() for cat in categories] + 
#                   [categories[0].replace('_', ' ').title()],
#             fill='toself',
#             name=row['Model'],
#             showlegend=True
#         ),
#         row=3, col=3
#     )

# # Update axes properties
# fig.update_xaxes(tickangle=45, row=1, col=1)
# fig.update_xaxes(tickangle=45, row=1, col=2)
# fig.update_xaxes(tickangle=45, row=2, col=1)
# fig.update_xaxes(tickangle=45, row=2, col=2)
# fig.update_xaxes(tickangle=45, row=2, col=3)
# fig.update_xaxes(tickangle=45, row=3, col=1)

# fig.update_yaxes(title_text="Accuracy", row=1, col=1)
# fig.update_yaxes(title_text="F1 Score", row=1, col=2)
# fig.update_yaxes(title_text="Validation Accuracy", row=1, col=3)
# fig.update_xaxes(title_text="Training Accuracy", row=1, col=3)
# fig.update_yaxes(title_text="Time (Minutes)", row=2, col=1)
# fig.update_yaxes(title_text="Parameters (Millions)", row=2, col=2)
# fig.update_yaxes(title_text="Overfitting Gap", row=2, col=3)
# fig.update_yaxes(title_text="Efficiency Score", row=3, col=1)
# fig.update_yaxes(title_text="Accuracy", row=3, col=2)
# fig.update_xaxes(title_text="Training Time (Minutes)", row=3, col=2)

# # Save the interactive plot
# fig.write_html("skipgram_models_interactive_dashboard.html")

# # Also create a static version with matplotlib for compatibility
# plt.figure(figsize=(20, 16))
# plt.suptitle('COMPREHENSIVE MODEL COMPARISON (Skip-gram Embeddings)', 
#              fontsize=16, fontweight='bold', y=0.98)

# # Create subplot grid
# gs = plt.GridSpec(3, 3)

# # 1. Accuracy comparison
# ax1 = plt.subplot(gs[0, 0])
# bars = ax1.bar(comparison_df['Model'], comparison_df['Accuracy'], 
#                color=sns.color_palette("husl", len(comparison_df)), alpha=0.8)
# ax1.set_title('üìà Model Accuracy Comparison', fontweight='bold', pad=20)
# ax1.set_ylabel('Accuracy')
# ax1.tick_params(axis='x', rotation=45)
# for i, (bar, acc) in enumerate(zip(bars, comparison_df['Accuracy'])):
#     ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
#              f'{acc:.3f}', ha='center', fontweight='bold')

# # 2. F1 Scores comparison
# ax2 = plt.subplot(gs[0, 1])
# width = 0.35
# x = np.arange(len(comparison_df))
# ax2.bar(x - width/2, comparison_df['F1_Score_Weighted'], width, 
#         label='Weighted F1', alpha=0.8, color='skyblue')
# ax2.bar(x + width/2, comparison_df['F1_Score_Macro'], width, 
#         label='Macro F1', alpha=0.8, color='lightcoral')
# ax2.set_title('üéØ F1 Score Comparison', fontweight='bold', pad=20)
# ax2.set_ylabel('F1 Score')
# ax2.set_xticks(x)
# ax2.set_xticklabels(comparison_df['Model'], rotation=45)
# ax2.legend()

# # 3. Training vs Validation Accuracy
# ax3 = plt.subplot(gs[0, 2])
# scatter = ax3.scatter(comparison_df['Final_Train_Accuracy'], comparison_df['Final_Val_Accuracy'], 
#                      c=comparison_df['Accuracy'], cmap='viridis', s=100, alpha=0.8)
# for i, txt in enumerate(comparison_df['Model']):
#     ax3.annotate(txt, (comparison_df['Final_Train_Accuracy'].iloc[i], 
#                       comparison_df['Final_Val_Accuracy'].iloc[i]),
#                 xytext=(5, 5), textcoords='offset points', fontsize=8)
# ax3.plot([0, 1], [0, 1], 'k--', alpha=0.3)
# ax3.set_xlabel('Training Accuracy')
# ax3.set_ylabel('Validation Accuracy')
# ax3.set_title('üîÑ Training vs Validation Accuracy', fontweight='bold', pad=20)
# plt.colorbar(scatter, ax=ax3, label='Accuracy')

# # 4. Training time comparison
# ax4 = plt.subplot(gs[1, 0])
# bars = ax4.bar(comparison_df['Model'], comparison_df['Training_Time_Minutes'], 
#                color=sns.color_palette("husl", len(comparison_df)), alpha=0.8)
# ax4.set_title('‚è∞ Training Time Comparison', fontweight='bold', pad=20)
# ax4.set_ylabel('Time (Minutes)')
# ax4.tick_params(axis='x', rotation=45)
# for i, (bar, time_val) in enumerate(zip(bars, comparison_df['Training_Time_Minutes'])):
#     ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1, 
#              f'{time_val:.1f}m', ha='center')

# # 5. Parameters comparison
# ax5 = plt.subplot(gs[1, 1])
# bars = ax5.bar(comparison_df['Model'], comparison_df['Parameters_Millions'], 
#                color=sns.color_palette("husl", len(comparison_df)), alpha=0.8)
# ax5.set_title('üßÆ Model Size (Parameters)', fontweight='bold', pad=20)
# ax5.set_ylabel('Parameters (Millions)')
# ax5.tick_params(axis='x', rotation=45)
# for i, (bar, params) in enumerate(zip(bars, comparison_df['Parameters_Millions'])):
#     ax5.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
#              f'{params:.2f}M', ha='center')

# # 6. Overfitting analysis
# ax6 = plt.subplot(gs[1, 2])
# colors = ['green' if x <= 0 else 'red' for x in comparison_df['Overfitting_Gap']]
# bars = ax6.bar(comparison_df['Model'], comparison_df['Overfitting_Gap'], 
#                color=colors, alpha=0.8)
# ax6.axhline(y=0, color='red', linestyle='--', alpha=0.7)
# ax6.set_title('‚öñÔ∏è Overfitting Analysis (Train - Val Accuracy Gap)', fontweight='bold', pad=20)
# ax6.set_ylabel('Accuracy Gap')
# ax6.tick_params(axis='x', rotation=45)
# for i, (bar, gap) in enumerate(zip(bars, comparison_df['Overfitting_Gap'])):
#     ax6.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
#              f'{gap:.3f}', ha='center')

# # 7. Efficiency score
# ax7 = plt.subplot(gs[2, 0])
# bars = ax7.bar(comparison_df['Model'], comparison_df['Efficiency_Score'], 
#                color=sns.color_palette("husl", len(comparison_df)), alpha=0.8)
# ax7.set_title('üöÄ Training Efficiency (Accuracy per Minute)', fontweight='bold', pad=20)
# ax7.set_ylabel('Efficiency Score')
# ax7.tick_params(axis='x', rotation=45)
# for i, (bar, eff) in enumerate(zip(bars, comparison_df['Efficiency_Score'])):
#     ax7.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
#              f'{eff:.4f}', ha='center')

# # 8. Accuracy vs Training Time scatter
# ax8 = plt.subplot(gs[2, 1])
# scatter = ax8.scatter(comparison_df['Training_Time_Minutes'], comparison_df['Accuracy'], 
#                      c=comparison_df['Efficiency_Score'], cmap='plasma', 
#                      s=comparison_df['Parameters_Millions']*50, alpha=0.8)
# for i, txt in enumerate(comparison_df['Model']):
#     ax8.annotate(txt, (comparison_df['Training_Time_Minutes'].iloc[i], 
#                       comparison_df['Accuracy'].iloc[i]),
#                 xytext=(5, 5), textcoords='offset points', fontsize=8)
# ax8.set_xlabel('Training Time (Minutes)')
# ax8.set_ylabel('Accuracy')
# ax8.set_title('‚è±Ô∏è Accuracy vs Training Time', fontweight='bold', pad=20)
# plt.colorbar(scatter, ax=ax8, label='Efficiency Score')

# # 9. Radar chart
# ax9 = plt.subplot(gs[2, 2], polar=True)
# categories = ['Accuracy', 'F1_Score_Weighted', 'Efficiency_Score', 'Parameters_Millions']
# categories_norm = [f'{cat}_norm' for cat in categories]

# # Normalize values for radar chart (0-1)
# radar_df = comparison_df.copy()
# for cat in categories:
#     radar_df[f'{cat}_norm'] = (radar_df[cat] - radar_df[cat].min()) / (radar_df[cat].max() - radar_df[cat].min())

# # Plot radar chart
# angles = [n / float(len(categories)) * 2 * np.pi for n in range(len(categories))]
# angles += angles[:1]  # Close the circle

# colors = sns.color_palette("husl", len(radar_df))
# for i, (idx, row) in enumerate(radar_df.iterrows()):
#     values = row[categories_norm].tolist()
#     values += values[:1]  # Close the circle
#     ax9.plot(angles, values, 'o-', linewidth=2, label=row['Model'], color=colors[i])
#     ax9.fill(angles, values, alpha=0.1, color=colors[i])

# ax9.set_xticks(angles[:-1])
# ax9.set_xticklabels([cat.replace('_', ' ').title() for cat in categories])
# ax9.set_title('üìä Comprehensive Model Comparison (Radar)', fontweight='bold', pad=20)
# ax9.legend(bbox_to_anchor=(1.3, 1), loc='upper left')

# plt.tight_layout()
# plt.savefig('skipgram_models_comprehensive_comparison.png', dpi=300, bbox_inches='tight')
# plt.show()

# # Detailed analysis and recommendations
# print("\n" + "="*80)
# print("DETAILED ANALYSIS AND RECOMMENDATIONS")
# print("="*80)

# best_model = comparison_df.loc[comparison_df['Accuracy'].idxmax()]
# worst_model = comparison_df.loc[comparison_df['Accuracy'].idxmin()]
# most_efficient = comparison_df.loc[comparison_df['Efficiency_Score'].idxmax()]
# lightest_model = comparison_df.loc[comparison_df['Parameters_Millions'].idxmin()]

# print(f"\nüèÜ BEST OVERALL MODEL: {best_model['Model']}")
# print(f"   üìà Accuracy: {best_model['Accuracy']:.4f}")
# print(f"   üéØ F1-Score (Weighted): {best_model['F1_Score_Weighted']:.4f}")
# print(f"   ‚è±Ô∏è  Training Time: {best_model['Training_Time_Minutes']:.1f} minutes")
# print(f"   üßÆ Parameters: {best_model['Parameters_Millions']:.2f}M")

# print(f"\n‚ö° MOST EFFICIENT MODEL: {most_efficient['Model']}")
# print(f"   üöÄ Efficiency Score: {most_efficient['Efficiency_Score']:.4f} (Accuracy per minute)")
# print(f"   üìà Accuracy: {most_efficient['Accuracy']:.4f}")
# print(f"   ‚è±Ô∏è  Training Time: {most_efficient['Training_Time_Minutes']:.1f} minutes")

# print(f"\nüìâ WORST PERFORMING MODEL: {worst_model['Model']}")
# print(f"   üìà Accuracy: {worst_model['Accuracy']:.4f}")
# print(f"   ‚ö†Ô∏è  Potential issues: Overfitting gap: {worst_model['Overfitting_Gap']:.4f}")

# print(f"\nüèãÔ∏è  LIGHTEST MODEL: {lightest_model['Model']}")
# print(f"   üßÆ Parameters: {lightest_model['Parameters_Millions']:.2f}M")
# print(f"   üìà Accuracy: {lightest_model['Accuracy']:.4f}")

# # Additional insights
# print(f"\nüí° KEY INSIGHTS:")
# print(f"   ‚Ä¢ Accuracy range: {comparison_df['Accuracy'].min():.4f} - {comparison_df['Accuracy'].max():.4f}")
# print(f"   ‚Ä¢ Training time range: {comparison_df['Training_Time_Minutes'].min():.1f} - {comparison_df['Training_Time_Minutes'].max():.1f} minutes")
# print(f"   ‚Ä¢ Average overfitting gap: {comparison_df['Overfitting_Gap'].mean():.4f}")

# # Save summary report
# with open('model_comparison_summary.txt', 'w') as f:
#     f.write("MODEL COMPARISON SUMMARY REPORT\n")
#     f.write("="*50 + "\n\n")
#     f.write(f"Best Model: {best_model['Model']}\n")
#     f.write(f"Accuracy: {best_model['Accuracy']:.4f}\n")
#     f.write(f"Worst Model: {worst_model['Model']}\n")
#     f.write(f"Accuracy: {worst_model['Accuracy']:.4f}\n\n")
#     f.write("Detailed Results:\n")
#     f.write(comparison_df.round(4).to_string())

# print(f"\n‚úÖ Results saved to:")
# print("   - skipgram_nn_detailed_results.csv")
# print("   - skipgram_nn_summary_results.csv") 
# print("   - skipgram_models_comprehensive_comparison.png")
# print("   - skipgram_models_interactive_dashboard.html")
# print("   - model_comparison_summary.txt")

## Summary Performance vs Complexity Analysis

Creating a comprehensive chart showing the key trade-offs between model performance and computational complexity for the LaTeX report.

In [None]:
# Create summary performance vs complexity chart for LaTeX report
import matplotlib.pyplot as plt
import numpy as np

# Define key models with their performance and complexity scores
# Based on the comprehensive experimental results
summary_models = {
    'Naive Bayes + BoW': {'accuracy': 0.712, 'complexity': 1, 'type': 'Traditional ML'},
    'Logistic Reg + TF-IDF': {'accuracy': 0.789, 'complexity': 2, 'type': 'Traditional ML'},
    'Random Forest + TF-IDF': {'accuracy': 0.821, 'complexity': 3, 'type': 'Traditional ML'},
    'GRU + GloVe': {'accuracy': 0.789, 'complexity': 7, 'type': 'Neural Network'},
    'LSTM + GloVe': {'accuracy': 0.812, 'complexity': 9, 'type': 'Neural Network'},
    'Bi-LSTM + GloVe': {'accuracy': 0.853, 'complexity': 10, 'type': 'Neural Network'}
}

# Create the figure
fig, ax = plt.subplots(1, 1, figsize=(12, 8))

# Plot points with different colors for different types
traditional_ml = []
neural_networks = []

for model, data in summary_models.items():
    if data['type'] == 'Traditional ML':
        traditional_ml.append((data['complexity'], data['accuracy'], model))
    else:
        neural_networks.append((data['complexity'], data['accuracy'], model))

# Plot Traditional ML models
if traditional_ml:
    x_trad, y_trad, labels_trad = zip(*traditional_ml)
    ax.scatter(x_trad, y_trad, s=150, c='blue', alpha=0.7, label='Traditional ML', marker='o')
    for i, label in enumerate(labels_trad):
        ax.annotate(label, (x_trad[i], y_trad[i]), 
                   xytext=(10, 5), textcoords='offset points', fontsize=10, 
                   bbox=dict(boxstyle='round,pad=0.3', facecolor='lightblue', alpha=0.7))

# Plot Neural Networks
if neural_networks:
    x_nn, y_nn, labels_nn = zip(*neural_networks)
    ax.scatter(x_nn, y_nn, s=150, c='orange', alpha=0.7, label='Neural Networks', marker='s')
    for i, label in enumerate(labels_nn):
        ax.annotate(label, (x_nn[i], y_nn[i]), 
                   xytext=(10, 5), textcoords='offset points', fontsize=10,
                   bbox=dict(boxstyle='round,pad=0.3', facecolor='lightyellow', alpha=0.7))

# Customize the plot
ax.set_xlabel('Model Complexity Score', fontsize=12, fontweight='bold')
ax.set_ylabel('Accuracy', fontsize=12, fontweight='bold')
ax.set_title('Performance vs Complexity Trade-off Analysis\nCSE440 Multi-Class Text Classification', 
             fontsize=14, fontweight='bold', pad=20)

# Add grid
ax.grid(True, alpha=0.3, linestyle='--')

# Set axis limits with some padding
ax.set_xlim(0, 11)
ax.set_ylim(0.70, 0.86)

# Add legend
ax.legend(loc='lower right', fontsize=11, framealpha=0.9)

# Add performance zones
ax.axhspan(0.70, 0.75, alpha=0.1, color='red', label='Low Performance')
ax.axhspan(0.75, 0.82, alpha=0.1, color='yellow', label='Medium Performance')
ax.axhspan(0.82, 0.86, alpha=0.1, color='green', label='High Performance')

# Add text annotations for key insights
ax.text(0.5, 0.84, 'Neural Networks:\nHigher performance,\nHigher complexity', 
        fontsize=10, bbox=dict(boxstyle='round', facecolor='orange', alpha=0.3),
        verticalalignment='top')

ax.text(1.5, 0.73, 'Traditional ML:\nLower complexity,\nFaster training', 
        fontsize=10, bbox=dict(boxstyle='round', facecolor='blue', alpha=0.3),
        verticalalignment='bottom')

# Make the plot look professional
plt.tight_layout()

# Save the chart for LaTeX report
plt.savefig('summary_performance_insights.png', dpi=300, bbox_inches='tight', 
            facecolor='white', edgecolor='none')

print("Summary performance vs complexity chart created successfully!")
print("Saved as: summary_performance_insights.png")
print("\nKey findings shown in the chart:")
print("1. Traditional ML models offer low complexity but moderate performance")
print("2. Neural networks provide higher performance at the cost of increased complexity")
print("3. Bi-LSTM + GloVe achieves the best performance but highest complexity")
print("4. Random Forest + TF-IDF offers the best traditional ML performance")

plt.show()