<a href="https://colab.research.google.com/github/CodeGeek2003/Selected-Topics-IS-Phase1/blob/main/final_phase.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Import Libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
from torch.utils.data import DataLoader, TensorDataset

# Set random seed
torch.manual_seed(42)


<torch._C.Generator at 0x7ec13ca69990>

In [None]:
df = pd.read_csv("customer_data.csv")

# Drop unnecessary columns
df = df.drop(columns=["id","education",'region','gender','age'])
df = df.drop(columns=["Unnamed: 11"])
df.head()


Unnamed: 0,income,purchase_frequency,purchase_amount,product_category,promotion_usage,satisfaction_score,loyalty_status
0,40682,frequent,18249,Books,0,6,Gold
1,15317,rare,4557,Clothing,1,6,Regular
2,38849,rare,11822,Clothing,0,6,Silver
3,11568,frequent,4098,Food,0,7,Regular
4,46952,occasional,19685,Clothing,1,5,Regular


In [None]:
# 2. Load and Preprocess Dataset

# Load your CSV file (you can upload it via Colab sidebar or use local path)

# Encode categorical features
cat_cols = ["product_category",'loyalty_status','purchase_frequency']
for col in cat_cols:
    df[col] = LabelEncoder().fit_transform(df[col].astype(str))

# Encode target label
label_encoder = LabelEncoder()
df["loyalty_status"] = label_encoder.fit_transform(df["loyalty_status"])  # e.g., Gold=0, Regular=1, Silver=2

# Separate features and target
X = df.drop("loyalty_status", axis=1)
y = df["loyalty_status"]

# Normalize numeric features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.long)

print("✅ Data preprocessing complete")


✅ Data preprocessing complete


In [None]:
df.head()

Unnamed: 0,income,purchase_frequency,purchase_amount,product_category,promotion_usage,satisfaction_score,loyalty_status
0,40682,0,18249,1,0,6,0
1,15317,2,4557,2,1,6,1
2,38849,2,11822,2,0,6,2
3,11568,0,4098,4,0,7,1
4,46952,1,19685,2,1,5,1


In [None]:
# 3. Define ANN Model

class ANNModel(nn.Module):
    def __init__(self, input_size, hidden1, hidden2, output_size):
        super(ANNModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden1, hidden2)
        self.relu2 = nn.ReLU()
        self.output = nn.Linear(hidden2, output_size)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        return self.output(x)


In [None]:
# 4. Cross Validation and Training

kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold = 1
accuracies = []
f1_scores = []

for train_idx, val_idx in kf.split(X_tensor):
    print(f"\n----- Fold {fold} -----")

    X_train, X_val = X_tensor[train_idx], X_tensor[val_idx]
    y_train, y_val = y_tensor[train_idx], y_tensor[val_idx]

    train_data = TensorDataset(X_train, y_train)
    val_data = TensorDataset(X_val, y_val)
    train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=16)

    model = ANNModel(input_size=X.shape[1], hidden1=32, hidden2=16, output_size=len(label_encoder.classes_))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    # Training loop
    for epoch in range(50):
        model.train()
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    y_pred, y_true = [], []
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            outputs = model(batch_x)
            _, predicted = torch.max(outputs.data, 1)
            y_pred.extend(predicted.numpy())
            y_true.extend(batch_y.numpy())

    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    accuracies.append(acc)
    f1_scores.append(f1)

    print(f"Accuracy: {acc:.4f}, F1 Score: {f1:.4f}")
    fold += 1



----- Fold 1 -----
Accuracy: 0.6008, F1 Score: 0.4510

----- Fold 2 -----
Accuracy: 0.5967, F1 Score: 0.4459

----- Fold 3 -----
Accuracy: 0.6008, F1 Score: 0.4510

----- Fold 4 -----
Accuracy: 0.6073, F1 Score: 0.4590

----- Fold 5 -----
Accuracy: 0.6012, F1 Score: 0.4515


In [None]:
# 5. Summary Report

print("\n===== Cross-Validation Summary =====")
print(f"Mean Accuracy: {np.mean(accuracies):.4f}")
print(f"Mean F1 Score: {np.mean(f1_scores):.4f}")



===== Cross-Validation Summary =====
Mean Accuracy: 0.6014
Mean F1 Score: 0.4517
