In [13]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import VotingClassifier

# Set the data paths
dataset_paths = ['training_set_1.csv', 'training_set_2.csv', 'training_set_3.csv', 'training_set_4.csv', 'training_set_5.csv', 'training_set_6.csv']
test_set_path = 'testing_set.csv'

# Initialize a list to store individual models
models = []

# Loop over each dataset
for i, dataset_path in enumerate(dataset_paths, start=1):
    print(f"\nTraining on Dataset {i}")

    # Load the dataset using pandas
    df = pd.read_csv(dataset_path)

    # Extract features (X) and target variable (y)
    X = df.drop('target', axis=1)
    y = df['target']

    # Encode the target variable if it contains string labels
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)

    # Create an SVM model
    model = SVC(kernel='linear', C=1.0, probability=True)

    # Train the model
    model.fit(X, y)

    # Append the model to the list
    models.append(('model{}'.format(i), model))

# Create an ensemble of models using majority voting
ensemble_model = VotingClassifier(estimators=models, voting='hard')

# Load the common test set
common_test_df = pd.read_csv(test_set_path)
common_X_test = common_test_df.drop('target', axis=1)
common_y_test = label_encoder.transform(common_test_df['target'])

# Predict using the ensemble model
ensemble_model.fit(common_X_test, common_y_test)
y_pred_ensemble = ensemble_model.predict(common_X_test)

# Evaluate the ensemble model
accuracy_ensemble = accuracy_score(common_y_test, y_pred_ensemble)
conf_matrix_ensemble = confusion_matrix(common_y_test, y_pred_ensemble)
classification_rep_ensemble = classification_report(common_y_test, y_pred_ensemble)

# Display results for the ensemble model
print("\nResults for Ensemble Model on Common Test Set:")
print(f"Accuracy: {accuracy_ensemble * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix_ensemble)
print("Classification Report:")
print(classification_rep_ensemble)




Training on Dataset 1

Training on Dataset 2

Training on Dataset 3

Training on Dataset 4

Training on Dataset 5

Training on Dataset 6

Results for Ensemble Model on Common Test Set:
Accuracy: 100.00%
Confusion Matrix:
[[213   0   0]
 [  0 140   0]
 [  0   0  76]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       213
           1       1.00      1.00      1.00       140
           2       1.00      1.00      1.00        76

    accuracy                           1.00       429
   macro avg       1.00      1.00      1.00       429
weighted avg       1.00      1.00      1.00       429

