<a href="https://colab.research.google.com/github/2303A52487/Explainable-AI/blob/main/Explainable_AI_Assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

QUESTION - 01

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error


# 1. Dataset

data = {
    "GoogleAds_(₹1000s)": [1, 2, 3, 1, 2],
    "BooksSold": [100, 130, 160, 110, 140]
}
df = pd.DataFrame(data)


# 2. Baseline value

baseline = df["BooksSold"].mean()

# 3. Linear Regression

X = df[["GoogleAds_(₹1000s)"]]
y = df["BooksSold"]

model = LinearRegression()
model.fit(X, y)

intercept = model.intercept_
coef = model.coef_[0]


# 4. Predictions & SHAP values

df["Predicted_BooksSold"] = model.predict(X).round(4)
df["Baseline"] = round(baseline, 4)
df["SHAP"] = (df["Predicted_BooksSold"] - baseline).round(4)
df["Baseline_plus_SHAP"] = (df["Baseline"] + df["SHAP"]).round(4)

# 5. Residuals & Over/Under

df["Residual_(Actual-Predicted)"] = (df["BooksSold"] - df["Predicted_BooksSold"]).round(4)
df["Over_Under"] = df["Residual_(Actual-Predicted)"].apply(
    lambda r: "Underprediction (model too low)" if r > 0
    else ("Overprediction (model too high)" if r < 0 else "Exact")
)


# 6. Model performance

r2 = r2_score(y, df["Predicted_BooksSold"])
mse = mean_squared_error(y, df["Predicted_BooksSold"])
mae = mean_absolute_error(y, df["Predicted_BooksSold"])

# 7. Output results

print("=== Linear Regression Model ===")
print(f"Predicted_BooksSold = {intercept:.4f} + {coef:.4f} × GoogleAds_(₹1000s)")
print(f"Intercept: {intercept:.4f}")
print(f"Coefficient: {coef:.4f} (books per ₹1000 Google Ads)")
print("\n=== Baseline ===")
print(f"Baseline (mean BooksSold): {baseline:.4f}")

print("\n=== Model Performance ===")
print(f"R-squared: {r2:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")

print("\n=== Detailed Table ===")
print(df.to_string(index=False))

QUESTION - 02

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression


# 1. Dataset

data = {
    "Footfall": [100, 80, 120, 90, 60],
    "Promotions": [1, 0, 1, 0, 1],
    "Sales": [1500, 1000, 1700, 1100, 900]
}
df = pd.DataFrame(data)


# 2. Inputs & Target

X = df[["Footfall", "Promotions"]].astype(float)
y = df["Sales"].astype(float)

# ------------------------------
# 3. Fit Multiple Linear Regression
# ------------------------------
model = LinearRegression(fit_intercept=True)
model.fit(X, y)
coefs = model.coef_
intercept = model.intercept_

# 4. Baseline (mean of all Sales)

baseline = y.mean()


# 5. SHAP-like Contributions

means_x = X.mean()
shap_vals = (X - means_x) * coefs


# 6. Predictions

preds = model.predict(X)
sum_shap = shap_vals.sum(axis=1)
preds_from_baseline_shap = baseline + sum_shap


# 7. Create Results Table

results = df.copy()
results["Predicted_Sales"] = preds.round(2)
results["Baseline"] = baseline
results["SHAP_Footfall"] = shap_vals["Footfall"].round(2)
results["SHAP_Promotions"] = shap_vals["Promotions"].round(2)
results["Sum_SHAP"] = sum_shap.round(2)
results["Baseline+Sum_SHAP"] = preds_from_baseline_shap.round(2)
results["Residual"] = (results["Predicted_Sales"] - results["Sales"]).round(2)
results["Over_Under"] = results.apply(
    lambda r: "Overprediction" if r["Predicted_Sales"] > r["Sales"]
              else ("Underprediction" if r["Predicted_Sales"] < r["Sales"] else "Exact"),
    axis=1
)


# 8. Display Results

print("\n=== MODEL SUMMARY ===")
print(f"Intercept: {intercept:.4f}")
print(f"Coef_Footfall: {coefs[0]:.4f}")
print(f"Coef_Promotions: {coefs[1]:.4f}")
print(f"Baseline (mean Sales): {baseline:.2f}")
print("\nVerification that predictions match baseline + SHAP sum:",
      np.allclose(preds, preds_from_baseline_shap))

print("\n=== RESULTS TABLE ===")
print(results)

QUESTION - 03

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split


# 1. Load Diabetes Dataset

data = load_diabetes()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="disease_progression")

# Split into train & test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Fit Multiple Linear Regression

model = LinearRegression(fit_intercept=True)
model.fit(X_train, y_train)

# Model parameters
coefs = pd.Series(model.coef_, index=X.columns)
intercept = model.intercept_

# 3. Baseline (mean of target in training data)

baseline = y_train.mean()


# 4. Compute SHAP-like contributions (linear model method)

means_train = X_train.mean()
shap_vals_test = (X_test - means_train) * coefs

# Predictions from model
preds_test = model.predict(X_test)

# Sum of SHAP contributions per test row
sum_shap_test = shap_vals_test.sum(axis=1)

# Predictions from baseline + SHAP sum
preds_from_baseline_shap = baseline + sum_shap_test


# 5. Verify decomposition

assert np.allclose(preds_test, preds_from_baseline_shap), "SHAP decomposition failed!"


# 6. Create Results Table

results = X_test.copy()
results["Actual"] = y_test.values
results["Predicted"] = preds_test.round(2)
results["Baseline"] = baseline
for col in shap_vals_test.columns:
    results[f"SHAP_{col}"] = shap_vals_test[col].round(2)
results["Sum_SHAP"] = sum_shap_test.round(2)
results["Baseline+Sum_SHAP"] = preds_from_baseline_shap.round(2)
results["Residual"] = (results["Predicted"] - results["Actual"]).round(2)
results["Over_Under"] = results["Residual"].apply(lambda r: "Overprediction" if r > 0 else ("Underprediction" if r < 0 else "Exact"))


# 7. Output Model Summary & Results

print("\n=== MODEL SUMMARY ===")
print(f"Intercept: {intercept:.4f}")
print("Coefficients:")
print(coefs)
print(f"\nBaseline (mean of training target): {baseline:.2f}")
print("Verification that predictions match baseline + SHAP sum:",
      np.allclose(preds_test, preds_from_baseline_shap))

print("\n=== SAMPLE RESULTS (first 5 patients) ===")
print(results.head(5))

QUESTION - 04

In [None]:
# pip install pandas scikit-learn shap

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import shap

# 1. Example dataset (replace with your actual data)
data = {
    'study_time': [2, 3, 1, 4, 2, 3],
    'parent_edu': ['high', 'low', 'medium', 'high', 'low', 'medium'],
    'absences': [4, 10, 2, 0, 6, 1],
    'G3': [15, 10, 12, 18, 11, 14]
}
df = pd.DataFrame(data)

# 2. Features & target
X = df.drop(columns=['G3'])
y = df['G3']

# 3. Preprocess & split
cat_cols = X.select_dtypes(include=['object']).columns
pre = ColumnTransformer(
    [('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), cat_cols)],
    remainder='passthrough'
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

# 4. Train model
model = LinearRegression()
X_train_enc = pre.fit_transform(X_train)
X_test_enc = pre.transform(X_test)
model.fit(X_train_enc, y_train)

# 5. Baseline & SHAP
baseline = y_train.mean()
explainer = shap.LinearExplainer(model, X_train_enc)
shap_values = explainer.shap_values(X_test_enc)

# 6. Check formula
preds = model.predict(X_test_enc)
print("Baseline:", baseline)
print("Predictions:", preds)
print("Baseline + sum(SHAP):", baseline + shap_values.sum(axis=1))