In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("/content/df_one_hot_encoded_normalized.csv")
train_data = df[(df['Target']== '0') | (df['Target']== '1')] # Training data (rows labeled 0 and 1)

X = train_data.drop(columns=['Target'])
y = train_data['Target'].astype(int)

print("Training data shape:", X.shape)

Training data shape: (3630, 252)


In [2]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler1 = StandardScaler()
scaler1.fit(X_train)
X_train_scaled = scaler1.transform(X_train)
X_test_scaled = scaler1.transform(X_test)

logistic_model = LogisticRegression(max_iter=1000, random_state=42)
#logistic_model.fit(X_train_scaled, y_train)

svm_model = SVC(C= 100, gamma =1, kernel = 'linear', probability=True, random_state=42)
#svm_model.fit(X_train_scaled, y_train)

rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
#rf_model.fit(X_train_scaled, y_train)


voting_model = VotingClassifier(estimators=[
    ('logistic', logistic_model),
    ('svm', svm_model),
    ('random_forest', rf_model)
], voting='soft')  # 'soft' uses predicted probabilities for better performance

voting_model.fit(X_train, y_train)

y_pred_voting = voting_model.predict(X_test)

print("Voting Classifier Accuracy:", accuracy_score(y_test, y_pred_voting))
print("\nClassification Report:\n", classification_report(y_test, y_pred_voting))


Voting Classifier Accuracy: 0.8925619834710744

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.95      0.92       449
           1       0.91      0.80      0.85       277

    accuracy                           0.89       726
   macro avg       0.90      0.87      0.88       726
weighted avg       0.89      0.89      0.89       726



In [3]:
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(voting_model, X, y, cv=10, scoring='accuracy')

# Print the results
print("Cross-Validation Scores:", cv_scores)
print("Mean Accuracy:", cv_scores.mean())
print("Standard Deviation:", cv_scores.std())

Cross-Validation Scores: [0.89807163 0.91735537 0.89807163 0.90633609 0.92837466 0.89807163
 0.8815427  0.92561983 0.88429752 0.90909091]
Mean Accuracy: 0.9046831955922864
Standard Deviation: 0.015048485161154458
