In [None]:

# Khush Libaas – Final FAIR Jupyter Notebook (Simulated Version)

# --- SECTION 1: Setup & Simulated DBRepo Access
TRAIN_PID = "https://test.dbrepo.tuwien.ac.at/pid/9b858643-5ff0-495a-bcef-8c2e77d85713"
VALID_PID = "https://test.dbrepo.tuwien.ac.at/pid/0a1a4634-38cc-486a-9e4c-746819d350b4"
TEST_PID  = "https://test.dbrepo.tuwien.ac.at/pid/2e5a8c3f-9072-4d6d-ae5d-4d0342127cba"

import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import json

# Simulated file loading from DBRepo PIDs
train_df = pd.read_csv("khush_libaas_train.csv")
valid_df = pd.read_csv("khush_libaas_valid.csv")
test_df  = pd.read_csv("khush_libaas_test.csv")

# --- SECTION 2: Data Preparation
from sklearn.preprocessing import LabelEncoder

features = ['brand', 'price', 'size', 'color', 'gender', 'style', 'fabric_type', 'region_popularity']
target = 'event_type'
encoder_dict = {}

for col in features + [target]:
    le = LabelEncoder()
    all_data = pd.concat([train_df[col], valid_df[col], test_df[col]])
    le.fit(all_data)
    train_df[col] = le.transform(train_df[col])
    valid_df[col] = le.transform(valid_df[col])
    test_df[col]  = le.transform(test_df[col])
    encoder_dict[col] = le

X_train, y_train = train_df[features], train_df[target]
X_valid, y_valid = valid_df[features], valid_df[target]
X_test, y_test   = test_df[features],  test_df[target]

# --- SECTION 3: Model Training
model = KNeighborsClassifier(n_neighbors=3)
model.fit(pd.concat([X_train, X_valid]), pd.concat([y_train, y_valid]))

# --- SECTION 4: Evaluation & Output Saving
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred, output_dict=True)

# Save evaluation metrics
with open("evaluation_metrics.json", "w") as f:
    json.dump({
        "accuracy": acc,
        "classification_report": report
    }, f, indent=4)

# Plot confusion matrix
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.savefig("confusion_matrix.png")
plt.close()

# Plot feature importance (simulated via frequency)
importance = X_train.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False)
plt.figure(figsize=(8,6))
sns.barplot(x=importance.values, y=importance.index)
plt.title("Feature Importance (Simulated)")
plt.xlabel("Frequency")
plt.ylabel("Encoded Feature Value")
plt.savefig("feature_importance_chart.png")
plt.close()

# Generate Top 5 recommendations (simulated)
test_df['score'] = model.predict_proba(X_test).max(axis=1)
test_df['event_prediction'] = encoder_dict[target].inverse_transform(y_pred)
test_df.sort_values(by='score', ascending=False).head(5).to_csv("recommendations.csv", index=False)

# --- SECTION 5: Model Saving
with open("khush_libaas_model.pkl", "wb") as f:
    pickle.dump(model, f)

# --- SECTION 6: CodeMeta Metadata
code_meta = {
    "@context": "https://schema.org/",
    "@type": "SoftwareSourceCode",
    "name": "Khush Libaas Recommender Notebook",
    "codeRepository": "https://github.com/MohsinKhalid08/Khush-Libaas-FAIR",
    "programmingLanguage": "Python",
    "license": "MIT",
    "author": {
        "@type": "Person",
        "name": "Mohsin Khalid",
        "affiliation": "TU Wien"
    }
}
with open("codemeta.json", "w") as f:
    json.dump(code_meta, f, indent=4)

print("Notebook complete. Outputs saved. FAIR compliance simulated.")
