# 1.3



In [67]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [68]:
class PreProcess:
    def __init__(self):
        self.dataSet = pd.read_csv("./teleCust1000t.csv")

    def plot_heatmap(self):
        corr_matrix = self.dataSet.corr()
        plt.figure(figsize=(8, 6))
        sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
        plt.title('Correlation Matrix Heatmap')
        plt.show()

    def plot_histogram(self):
        plt.figure(figsize=(8, 6))
        sns.histplot(self.dataSet['ed'], kde=True, bins=30, color='blue')
        plt.title(f"Histogram of ed")
        plt.xlabel('ed')
        plt.ylabel("Frequency")
        plt.grid(True)
        plt.show()
        plt.figure(figsize=(8, 6))
        sns.histplot(self.dataSet['tenure'], kde=True, bins=30, color='blue')
        plt.title(f"Histogram of tenure")
        plt.xlabel('tenure')
        plt.ylabel("Frequency")
        plt.grid(True)
        plt.show()

    def normalize(self):
        X = self.dataSet.drop(columns=['custcat']).values
        y = self.dataSet['custcat'].values
        # Adjust labels to be zero-indexed
        self.y = y - y.min()

        scaler = MinMaxScaler()
        self.X = scaler.fit_transform(X)

    def split_data(self):
        X_train_full, X_test, y_train_full, y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=93)
        X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2, random_state=93)

        X_train = torch.tensor(X_train, dtype=torch.float32)
        X_val = torch.tensor(X_val, dtype=torch.float32)
        X_test = torch.tensor(X_test, dtype=torch.float32)
        y_train = torch.tensor(y_train, dtype=torch.long)
        y_val = torch.tensor(y_val, dtype=torch.long)
        y_test = torch.tensor(y_test, dtype=torch.long)

        # Create DataLoaders for batching
        train_dataset = TensorDataset(X_train, y_train)
        val_dataset = TensorDataset(X_val, y_val)
        test_dataset = TensorDataset(X_test, y_test)

        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
        val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

        # Number of input features and classes
        input_size = X_train.shape[1]
        num_classes = len(torch.unique(y_train))

        return {"input size": input_size, "classes": num_classes, "train": train_loader, "test": test_loader, "val": val_loader}

In [69]:
preprocess = PreProcess()
preprocess.plot_heatmap()
preprocess.plot_histogram()
preprocess.normalize()
data = preprocess.split_data()

In [None]:
input_size = data["input size"]
num_classes = data["classes"]
train_loader = data["train"]
val_loader = data["val"]
test_loader = data["test"]

In [None]:
# Model 1: Single hidden layer with optional Batch Normalization and Dropout
class Model1(nn.Module):
    def __init__(self, input_size, num_classes, use_batch_norm=False, dropout_prob=0.0):
        super(Model1, self).__init__()
        self.use_batch_norm = use_batch_norm
        self.hidden = nn.Linear(input_size, 80)
        self.batch_norm_hidden = nn.BatchNorm1d(80) if use_batch_norm else None
        self.dropout = nn.Dropout(dropout_prob) if dropout_prob > 0 else None
        self.output = nn.Linear(80, num_classes)

    def forward(self, x):
        x = self.hidden(x)
        if self.use_batch_norm:
            x = self.batch_norm_hidden(x)
        x = torch.relu(x)
        if self.dropout:
            x = self.dropout(x)
        x = self.output(x)
        return x

# Model 2: Two hidden layers with optional Batch Normalization and Dropout
class Model2(nn.Module):
    def __init__(self, input_size, num_classes, use_batch_norm=False, dropout_prob=0.0):
        super(Model2, self).__init__()
        self.use_batch_norm = use_batch_norm
        self.hidden1 = nn.Linear(input_size, 100)
        self.batch_norm_hidden1 = nn.BatchNorm1d(100) if use_batch_norm else None
        self.dropout1 = nn.Dropout(dropout_prob) if dropout_prob > 0 else None
        self.hidden2 = nn.Linear(100, 50)
        self.batch_norm_hidden2 = nn.BatchNorm1d(50) if use_batch_norm else None
        self.dropout2 = nn.Dropout(dropout_prob) if dropout_prob > 0 else None
        self.output = nn.Linear(50, num_classes)

    def forward(self, x):
        x = self.hidden1(x)
        if self.use_batch_norm:
            x = self.batch_norm_hidden1(x)
        x = torch.relu(x)
        if self.dropout1:
            x = self.dropout1(x)

        x = self.hidden2(x)
        if self.use_batch_norm:
            x = self.batch_norm_hidden2(x)
        x = torch.relu(x)
        if self.dropout2:
            x = self.dropout2(x)

        x = self.output(x)
        return x

In [None]:
class Model:
    """
    A flexible wrapper for neural network training and evaluation.

    Attributes:
        hidden_layer (int): Determines the type of model to use (1 or 2).
        optimizer (str): Specifies the optimizer ('SGD', 'Adam', or 'Adadelta').
        input_size (int): Number of input features.
        num_classes (int): Number of output classes.
        use_batch_norm (bool): Whether to use batch normalization.
        dropout_prob (float): Dropout probability (default: 0.0).
        weight_decay (float): Weight decay for regularization (default: 0.0).
    """
    def __init__(self, hidden_layer, optimizer, input_size, num_classes, 
                 use_batch_norm=False, dropout_prob=0.0, weight_decay=0.0):
        self.criterion = nn.CrossEntropyLoss()
        
        if hidden_layer == 1:
            self.model = Model1(input_size, num_classes, use_batch_norm, dropout_prob)
        elif hidden_layer == 2:
            self.model = Model2(input_size, num_classes, use_batch_norm, dropout_prob)
        else:
            raise ValueError("hidden_layer must be 1 or 2")

        if optimizer == "SGD":
            self.optimizer = optim.SGD(self.model.parameters(), lr=0.001, weight_decay=weight_decay)
        elif optimizer == "Adam":
            self.optimizer = optim.Adam(self.model.parameters(), lr=0.0001, weight_decay=weight_decay)
        elif optimizer == "Adadelta":
            self.optimizer = optim.Adadelta(self.model.parameters(), lr=0.01, weight_decay=weight_decay)
        else:
            raise ValueError("Unsupported optimizer. Choose from: 'SGD', 'Adam', 'Adadelta'")

    def train_model(self, train_loader, val_loader, epochs):
        train_losses = []
        val_losses = []

        for epoch in range(epochs):
            self.model.train()
            epoch_loss = 0
            for X_batch, y_batch in train_loader:
                self.optimizer.zero_grad()
                outputs = self.model(X_batch)
                loss = self.criterion(outputs, y_batch)
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.item()

            train_losses.append(epoch_loss / len(train_loader))

            self.model.eval()
            val_loss = 0
            with torch.no_grad():
                for X_batch, y_batch in val_loader:
                    outputs = self.model(X_batch)
                    loss = self.criterion(outputs, y_batch)
                    val_loss += loss.item()

            val_losses.append(val_loss / len(val_loader))
            print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_losses[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}")

        # Plot training and validation loss
        plt.figure(figsize=(12, 6))
        plt.plot(train_losses, label='Training Loss')
        plt.plot(val_losses, label='Validation Loss')
        plt.title('Loss over Epochs')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()

    def evaluate_model(self, test_loader):
        """
        Evaluate the model on a test dataset.

        Args:
            test_loader: DataLoader for test data.
        """
        self.model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                outputs = self.model(X_batch)
                test_loss += self.criterion(outputs, y_batch).item()
                preds = torch.argmax(outputs, dim=1)
                correct += (preds == y_batch).sum().item()

        accuracy = correct / len(test_loader.dataset)
        test_loss = test_loss / len(test_loader)

        print(f"Test Accuracy: {accuracy:.2f}, Test Loss: {test_loss:.2f}")

    def evaluate_and_display_random_samples(self, test_loader, num_samples=10):
        """
        Evaluate the model on a test dataset and display random samples.

        Args:
            test_loader: DataLoader for test data.
            num_samples: Number of random samples to display.
        """
        self.model.eval()
        all_inputs = []
        all_labels = []
        all_predictions = []

        # Collect all data from the test loader
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                outputs = self.model(X_batch)
                preds = torch.argmax(outputs, dim=1)
                all_inputs.append(X_batch)
                all_labels.append(y_batch)
                all_predictions.append(preds)

        # Concatenate all batches
        all_inputs = torch.cat(all_inputs)
        all_labels = torch.cat(all_labels)
        all_predictions = torch.cat(all_predictions)

        # Select random indices
        random_indices = np.random.choice(len(all_labels), num_samples, replace=False)

        print("Random Samples: Actual vs Predicted")
        for i, idx in enumerate(random_indices):
            actual_label = all_labels[idx].item()
            predicted_label = all_predictions[idx].item()
            print(f"Sample {i + 1}: Actual: {actual_label}, Predicted: {predicted_label}")

def create_train_evaluate(hidden_layer, optimizer, batch_norm=False, dropout_prob=0.0, weight_decay=0.0):
    """
    Create, train, and evaluate a model with specified parameters.

    Args:
        hidden_layer (int): Number of hidden layers.
        optimizer (str): Optimizer to use.
        batch_norm (bool): Whether to use batch normalization.
        dropout_prob (float, optional): Dropout probability.
        weight_decay (float, optional): Weight decay (L2 regularization).
    """
    model_instance = Model(
        hidden_layer=hidden_layer,
        optimizer=optimizer,
        input_size=input_size,
        num_classes=num_classes,
        use_batch_norm=batch_norm,
        dropout_prob=dropout_prob,
        weight_decay=weight_decay,
    )

    model_instance.train_model(train_loader, val_loader, epochs=200)
    model_instance.evaluate_model(test_loader)

    return model_instance

In [None]:
configs = [
    {"hidden_layer": 1, "optimizer": "SGD"},
    {"hidden_layer": 2, "optimizer": "SGD"},
]

for config in configs:
    model_instance = create_train_evaluate(**config)

In [None]:
configs = [
    {"hidden_layer": 1, "optimizer": "SGD", "batch_norm": True, "dropout_prob": 0.15},
    {"hidden_layer": 2, "optimizer": "SGD", "batch_norm": True, "dropout_prob": 0.3},
]

for config in configs:
    model_instance = create_train_evaluate(**config)

In [None]:
configs = [
    {"hidden_layer": 1, "optimizer": "SGD", "batch_norm": True, "dropout_prob": 0.15, "weight_decay": 1e-4},
    {"hidden_layer": 2, "optimizer": "SGD", "batch_norm": True, "dropout_prob": 0.3, "weight_decay": 1e-4},
]

for config in configs:
    model_instance = create_train_evaluate(**config)
        



        # {"hidden_layer": 1, "optimizer": "Adadelta", "batch_norm": True, "dropout_prob": 0.55, "weight_decay": 1e-4},
        # {"hidden_layer": 2, "optimizer": "Adadelta", "batch_norm": True, "dropout_prob": 0.7, "weight_decay": 1e-4},

In [None]:
configs = [
    {"hidden_layer": 1, "optimizer": "Adam", "batch_norm": True, "dropout_prob": 0.55, "weight_decay": 1e-4},
    {"hidden_layer": 2, "optimizer": "Adam", "batch_norm": True, "dropout_prob": 0.7, "weight_decay": 1e-4},
]

for config in configs:
    model_instance = create_train_evaluate(**config)
        

In [None]:
configs = [
    {"hidden_layer": 1, "optimizer": "Adadelta", "batch_norm": True, "dropout_prob": 0.55, "weight_decay": 1e-4},
    {"hidden_layer": 2, "optimizer": "Adadelta", "batch_norm": True, "dropout_prob": 0.7, "weight_decay": 1e-4},
]

for config in configs:
    model_instance = create_train_evaluate(**config)
        

In [None]:
configs = [
    {"hidden_layer": 1, "optimizer": "SGD", "batch_norm": True, "dropout_prob": 0.15},
    {"hidden_layer": 2, "optimizer": "SGD", "batch_norm": True, "dropout_prob": 0.3},
]

for config in configs:
    model_instance = create_train_evaluate(**config)
    model_instance.evaluate_and_display_random_samples(test_loader, num_samples=10)