In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix
from torch.utils.data import TensorDataset, DataLoader

In [2]:
# Check for available devices
if torch.cuda.is_available():
    device = torch.device("cuda")  # Use CUDA if available
else:
    device = torch.device("cpu")   # Use CPU if CUDA is not available

In [3]:
# Load data
file_path = "../20240312_Deep_Learning/NN_Classification/housepricedata.csv"
df = pd.read_csv(file_path)

In [4]:
# Separate features and target variable
X = df.drop("AboveMedianPrice", axis=1)
y = df["AboveMedianPrice"]

In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32, device=device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32, device=device)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32, device=device)

In [8]:
# Initialize classifiers
svm_classifier = SVC()
dt_classifier = DecisionTreeClassifier()
lr_classifier = LogisticRegression()
knn_classifier = KNeighborsClassifier()
nb_classifier = GaussianNB()

In [9]:
# Train classifiers
svm_classifier.fit(X_train_scaled, y_train)
dt_classifier.fit(X_train_scaled, y_train)
lr_classifier.fit(X_train_scaled, y_train)
knn_classifier.fit(X_train_scaled, y_train)
nb_classifier.fit(X_train_scaled, y_train)

In [10]:
# Make predictions
svm_pred = svm_classifier.predict(X_test_scaled)
dt_pred = dt_classifier.predict(X_test_scaled)
lr_pred = lr_classifier.predict(X_test_scaled)
knn_pred = knn_classifier.predict(X_test_scaled)
nb_pred = nb_classifier.predict(X_test_scaled)

In [11]:
# Calculate accuracy for each classifier
svm_accuracy = accuracy_score(y_test, svm_pred)
dt_accuracy = accuracy_score(y_test, dt_pred)
lr_accuracy = accuracy_score(y_test, lr_pred)
knn_accuracy = accuracy_score(y_test, knn_pred)
nb_accuracy = accuracy_score(y_test, nb_pred)

# Print accuracy for each classifier
print("SVM Accuracy:", svm_accuracy)
print("Decision Tree Accuracy:", dt_accuracy)
print("Logistic Regression Accuracy:", lr_accuracy)
print("KNN Accuracy:", knn_accuracy)
print("Naive Bayes Accuracy:", nb_accuracy)

SVM Accuracy: 0.9041095890410958
Decision Tree Accuracy: 0.886986301369863
Logistic Regression Accuracy: 0.9246575342465754
KNN Accuracy: 0.928082191780822
Naive Bayes Accuracy: 0.9006849315068494


In [12]:
# Combine predictions
ensemble_inputs = np.column_stack((svm_pred, dt_pred, lr_pred, knn_pred, nb_pred))

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(ensemble_inputs, dtype=torch.float32)
y_train_tensor = torch.tensor(y_test.values, dtype=torch.float32)

In [13]:
# Define a simple neural network model
class EnsembleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(EnsembleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

In [14]:
# Initialize the ensemble model
input_dim = ensemble_inputs.shape[1]
hidden_dim = 64
output_dim = 1
ensemble_model = EnsembleNN(input_dim, hidden_dim, output_dim)

In [15]:
# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(ensemble_model.parameters(), lr=0.001)

In [16]:
# Train the ensemble model
num_epochs = 100
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = ensemble_model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor.view(-1, 1))
    loss.backward()
    optimizer.step()

In [17]:
# Make predictions with the ensemble model
with torch.no_grad():
    ensemble_model.eval()
    ensemble_pred_prob = ensemble_model(X_train_tensor).numpy()
    ensemble_pred = (ensemble_pred_prob > 0.5).astype(int)

In [18]:
# Calculate accuracy
ensemble_accuracy = accuracy_score(y_test, ensemble_pred)
print("Ensemble Accuracy:", ensemble_accuracy)

Ensemble Accuracy: 0.928082191780822


In [19]:
# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, ensemble_pred)
print("Confusion Matrix:")
print(conf_matrix)

Confusion Matrix:
[[147  14]
 [  7 124]]
