In [None]:
# Neural Network for real-world prediction (binary) + performance analysis
# Uses 'common_dataset.csv' (assumed uploaded to Colab working dir)

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras import layers, models

# --- 1) Load data ---
df = pd.read_csv('dataset.csv')   # file you uploaded
# target: 'purchased' (0/1)
# features: numeric and categorical (education_level)

# --- 2) Preprocess ---
# One-hot encode categorical column
X = pd.get_dummies(df.drop(columns=['purchased']), drop_first=True)

# Target
y = df['purchased'].values

# Train/test split (stratify to preserve class ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Scale numeric features (fit on train only)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# --- 3) Build simple MLP (inline hyperparameters) ---
tf.keras.backend.clear_session()
model = models.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')   # binary output
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# --- 4) Train (minimal) ---
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=1)

# --- 5) Predict and analyze performance ---
# Probabilities and class preds
y_prob = model.predict(X_test).ravel()
y_pred = (y_prob >= 0.5).astype(int)

# Metrics
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, zero_division=0)
rec = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)
roc = roc_auc_score(y_test, y_prob) if len(np.unique(y_test))>1 else float('nan')

print("---- Performance on test set ----")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"F1-score : {f1:.4f}")
print(f"ROC-AUC  : {roc:.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# --- 6) (Optional) Quick look at training curve final values ---
print(f"\nLast epoch — train loss: {history.history['loss'][-1]:.4f}, val_loss: {history.history['val_loss'][-1]:.4f}")


Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 115ms/step - accuracy: 0.5412 - loss: 0.6886 - val_accuracy: 0.2500 - val_loss: 0.8255
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.5482 - loss: 0.7008 - val_accuracy: 0.3750 - val_loss: 0.8125
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.5286 - loss: 0.6987 - val_accuracy: 0.3750 - val_loss: 0.8082
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.5660 - loss: 0.6884 - val_accuracy: 0.3750 - val_loss: 0.8026
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.5816 - loss: 0.6865 - val_accuracy: 0.3750 - val_loss: 0.7978
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.5425 - loss: 0.6952 - val_accuracy: 0.3750 - val_loss: 0.7948
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━