<a href="https://colab.research.google.com/github/Jnanasagara/machine-learning-lab/blob/main/lab9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from lime.lime_tabular import LimeTabularExplainer
from sklearn.pipeline import make_pipeline

# ----------------- Load dataset -----------------
# Ensure the file path is correct for your environment
df = pd.read_csv('/content/drive/MyDrive/ml-stuttering-events-dataset/cleaned-sep28k.csv')

# Drop any potential unnamed index columns
if "Unnamed: 0" in df.columns:
    df = df.drop(columns=["Unnamed: 0"])

# Separate features (X) and target (y)
X = df.drop(columns=["Stuttering"])
y = df["Stuttering"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ----------------- A1 & A2: Stacking Classifier and Pipeline -----------------
# Define the base classifiers for the stacking model
base_estimators = [
    ("lr", make_pipeline(StandardScaler(), LogisticRegression(max_iter=2000, solver="lbfgs", random_state=42))),
    ("rf", RandomForestClassifier(n_estimators=100, random_state=42)),
    ("svc", make_pipeline(StandardScaler(), SVC(probability=True, random_state=42)))
]

# Define the meta-model (final estimator)
meta_model = LogisticRegression(max_iter=2000, solver="lbfgs", random_state=42)

# Create the stacking classifier
stacking_classifier = StackingClassifier(
    estimators=base_estimators,
    final_estimator=meta_model,
    cv=5
)

# Create the main pipeline with a scaler and the stacking classifier
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("stacking", stacking_classifier)
])

# Train the pipeline
pipeline.fit(X_train, y_train)

# Make predictions on the test set
y_pred = pipeline.predict(X_test)

print("A1 & A2: Pipeline with Stacking Classifier built and fitted successfully.\n")

# Evaluate the pipeline's performance
metrics = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "Precision": precision_score(y_test, y_pred),
    "Recall": recall_score(y_test, y_pred),
    "F1 Score": f1_score(y_test, y_pred)
}

print("Model Performance Metrics:")
print(pd.DataFrame([metrics]))

# ----------------- A3: LIME Explanation -----------------
print("\nA3: LIME Explanation for a single test instance:")

# Initialize the LIME explainer
explainer = LimeTabularExplainer(
    training_data=X_train.values,
    feature_names=X_train.columns.tolist(),
    class_names=["No Stuttering", "Stuttering"],
    mode="classification"
)

# Pick the first instance from the test set for explanation
instance_to_explain = X_test.iloc[0]

# Explain the instance's prediction
explanation = explainer.explain_instance(
    data_row=instance_to_explain.values,
    predict_fn=pipeline.predict_proba,
    num_features=8
)

# Get the predicted class using the trained pipeline
predicted_class = pipeline.predict(instance_to_explain.values.reshape(1, -1))[0]
class_names = ["No Stuttering", "Stuttering"]
predicted_class_name = class_names[predicted_class]

print("\nPredicted class:", predicted_class_name)
print("Explanation as a list of features and weights:")
print(explanation.as_list())




A1 & A2: Pipeline with Stacking Classifier built and fitted successfully.

Model Performance Metrics:
   Accuracy  Precision  Recall  F1 Score
0       1.0        1.0     1.0       1.0

A3: LIME Explanation for a single test instance:





Predicted class: No Stuttering
Explanation as a list of features and weights:
[('2.00 < NoStutteredWords <= 3.00', -0.8631881250112811), ('WordRep <= 0.00', -0.11182983257101221), ('SoundRep <= 0.00', -0.09991735176597963), ('Block <= 0.00', -0.06221757582412078), ('Prolongation <= 0.00', -0.05730593893970203), ('Interjection <= 0.00', -0.010966064480371887), ('7 > -1.22', -0.007942090422270568), ('1 <= 50.75', -0.004246118574535176)]


