In [1]:
# Step 1: Create random classification data

from sklearn.datasets import make_classification

# Generating synthetic data with 1000 samples, 10 features (6 informative, 2 redundant)
X, y = make_classification(n_samples=1000, 
                           n_features=10, 
                           n_informative=6, 
                           n_redundant=2, 
                           random_state=42)

# Output the shape of data to confirm
X.shape, y.shape


((1000, 10), (1000,))

In [2]:
# Step 2: Split the data into training and testing sets

from sklearn.model_selection import train_test_split

# Split: 70% training, 30% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.3, 
                                                    random_state=42)

# Output the shape of train and test sets
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((700, 10), (300, 10), (700,), (300,))

In [3]:
# Step 3: Train individual models

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# Initialize models
log_reg = LogisticRegression()
tree = DecisionTreeClassifier()
svm = SVC(probability=True)  # enable probabilities for soft voting




In [4]:
# Fit models
log_reg.fit(X_train, y_train)
tree.fit(X_train, y_train)
svm.fit(X_train, y_train)

In [5]:
from sklearn.metrics import accuracy_score

# Predictions
log_pred = log_reg.predict(X_test)
tree_pred = tree.predict(X_test)
svm_pred = svm.predict(X_test)

# Accuracy scores
log_acc = accuracy_score(y_test, log_pred)
tree_acc = accuracy_score(y_test, tree_pred)
svm_acc = accuracy_score(y_test, svm_pred)

print("Logistic Regression Accuracy:", log_acc)
print("Decision Tree Accuracy:", tree_acc)
print("SVM Accuracy:", svm_acc)


Logistic Regression Accuracy: 0.8166666666666667
Decision Tree Accuracy: 0.86
SVM Accuracy: 0.8966666666666666


In [6]:
from sklearn.ensemble import VotingClassifier

# Create Voting Ensemble - using soft voting (uses predicted probabilities)
ensemble = VotingClassifier(
    estimators=[('lr', log_reg), ('dt', tree), ('svm', svm)],
    voting='soft'
)

In [7]:
ensemble.fit(X_train, y_train)

In [8]:
ensemble_pred = ensemble.predict(X_test)

In [9]:
from sklearn.metrics import accuracy_score
ensemble_acc = accuracy_score(y_test, ensemble_pred)

print("Ensemble Model Accuracy:", ensemble_acc)

Ensemble Model Accuracy: 0.8733333333333333
