In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load features
df = pd.read_csv("extracted_features.csv")

# Assume last column is the label
X = df.iloc[:, 1:].values  
y = df.iloc[:, 0].values  

# Normalize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [16]:
from sklearn.decomposition import PCA

# Apply PCA to reduce dimensions while keeping 95% variance
pca = PCA(n_components=0.95)  
X_pca = pca.fit_transform(X_scaled)

# Split data again
X_train, X_test, y_train, y_test = train_test_split(X_pca, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Train Naïve Bayes Model
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Predict
y_pred = nb_model.predict(X_test)

# Evaluate
from sklearn.metrics import accuracy_score

# Predict on training data
y_train_pred = nb_model.predict(X_train)

# Predict on test data
y_test_pred = nb_model.predict(X_test)

# Calculate accuracy
accuracy_train = accuracy_score(y_train, y_train_pred)
accuracy_test = accuracy_score(y_test, y_test_pred)

print(f"PCA + Naïve Bayes Train Accuracy: {accuracy_train:.4f}")
print(f"PCA + Naïve Bayes Test Accuracy: {accuracy_test:.4f}")


PCA + Naïve Bayes Train Accuracy: 0.7030
PCA + Naïve Bayes Test Accuracy: 0.4557


In [13]:
from sklearn.svm import SVC

# Train Naïve Bayes for probability estimation
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Use NB's probability predictions as new features
X_train_nb = nb_model.predict_proba(X_train)
X_test_nb = nb_model.predict_proba(X_test)

# Train SVM on transformed data
svm = SVC(kernel='rbf', C=1, gamma='scale')
svm.fit(X_train_nb, y_train)

# Predict & Evaluate
y_pred_svm = svm.predict(X_test_nb)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"Naïve Bayes + SVM Accuracy: {accuracy_svm:.4f}")


Naïve Bayes + SVM Accuracy: 0.4552


In [14]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier

# Define models
nb_model = GaussianNB()
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Combine models using Voting Classifier
ensemble = VotingClassifier(estimators=[('nb', nb_model), ('rf', rf_model)], voting='soft')
ensemble.fit(X_train, y_train)

# Predict & Evaluate
y_pred_ensemble = ensemble.predict(X_test)
accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble)
print(f"Naïve Bayes + Random Forest Accuracy: {accuracy_ensemble:.4f}")


Naïve Bayes + Random Forest Accuracy: 0.4572
