# Hi :)

----

# Import libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from scipy.stats import mode
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Initialization

In [2]:
# Dataset and DataLoader
class CNNFeatureDataset(Dataset):
    def __init__(self, X, y):
        self.X= torch.tensor(X, dtype= torch.float32)
        self.y= torch.tensor(y, dtype= torch.long)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Define 1D CNN
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.network= nn.Sequential(
            nn.Conv1d(1, 16, kernel_size= 2),  # output: [batch, 16, 2]
            nn.ReLU(),
            nn.Flatten(),  # [batch, 16 * 2]
            nn.Linear(16 * 2, 32),
            nn.ReLU(),
            nn.Linear(32, 2)  # Binary classification (output: logits for 2 classes)
        )

    def forward(self, x):
        return self.network(x)

# ITG

# Loading the dataset

In [None]:
ITG_features_df= pd.read_csv('Dataset/ITG_features_df.csv')

# Boosting

In [None]:


# Step 1: Train first model (SVM)
svm = SVC(probability=True, random_state=42)
svm.fit(X_train, y_train)
y_pred1 = svm.predict(X_train)
errors1 = (y_pred1 != y_train).astype(int)

# Step 2: Reweight dataset (more weight on misclassified)
weights2 = np.where(errors1 == 1, 2, 1)  # double weight for misclassified
indices2 = np.random.choice(len(X_train), size=len(X_train), p=weights2 / weights2.sum())

X_train2, y_train2 = X_train[indices2], y_train[indices2]

# Step 3: Train second model (Random Forest)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train2, y_train2)
y_pred2 = rf.predict(X_train)
errors2 = (y_pred2 != y_train).astype(int)

# Step 4: Reweight again
weights3 = np.where(errors2 == 1, 2, 1)
indices3 = np.random.choice(len(X_train), size=len(X_train), p=weights3 / weights3.sum())

X_train3, y_train3 = X_train[indices3], y_train[indices3]

# Step 5: Train third model (XGBoost)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb.fit(X_train3, y_train3)

# Final prediction: majority vote from all models


# Get predictions on test set
pred1 = svm.predict(X_test)
pred2 = rf.predict(X_test)
pred3 = xgb.predict(X_test)

# Combine predictions
final_preds = np.array([pred1, pred2, pred3])
y_final = mode(final_preds, axis=0).mode.flatten()

# Evaluate


print("Accuracy:", accuracy_score(y_test, y_final))
print("Classification Report:\n", classification_report(y_test, y_final))

# Confusion Matrix
cm = confusion_matrix(y_test, y_final)
sns.heatmap(cm, annot=True, fmt='d')
plt.title("Manual Boosting - Majority Vote Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
