In [14]:
# Step 1: Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier, StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [24]:
# Step 2: Generate sample data for water potability test
np.random.seed(0)  # for reproducibility

# Generate random data for features
data = np.random.rand(300, 9)

# Generate labels for potability (0: Not potable, 1: Potable)
labels = np.random.randint(0, 2, 300)

# Create a DataFrame to store the data
df = pd.DataFrame(data, columns=['ph', 'Hardness', 'Solids', 'Chloramines', 'Sulfate',
                                  'Conductivity', 'Organic_carbon', 'Trihalomethanes', 'Turbidity'])
df['Potability'] = labels

In [25]:
# Step 3: Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df.drop('Potability', axis=1), df['Potability'], test_size=0.2, random_state=0)

In [26]:
# Step 4: Initialize individual classifiers
dt_clf = DecisionTreeClassifier(random_state=0)
rf_clf = RandomForestClassifier(n_estimators=10, random_state=0)
ada_clf = AdaBoostClassifier(n_estimators=50, random_state=0)

In [27]:
# Step 5: Initialize ensemble methods
bagging_clf = BaggingClassifier(base_estimator=dt_clf, n_estimators=10, random_state=0)
stacking_clf = StackingClassifier(estimators=[('rf', rf_clf), ('ada', ada_clf)], final_estimator=dt_clf)

In [28]:
# Step 6: Train individual classifiers
dt_clf.fit(X_train, y_train)
rf_clf.fit(X_train, y_train)
ada_clf.fit(X_train, y_train)

In [29]:
# Step 7: Train ensemble methods
bagging_clf.fit(X_train, y_train)
stacking_clf.fit(X_train, y_train)



In [30]:
# Step 8: Make predictions
dt_pred = dt_clf.predict(X_test)
rf_pred = rf_clf.predict(X_test)
ada_pred = ada_clf.predict(X_test)
bagging_pred = bagging_clf.predict(X_test)
stacking_pred = stacking_clf.predict(X_test)

In [31]:
# Step 9: Evaluate models
print("Decision Tree Accuracy:", accuracy_score(y_test, dt_pred))
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print("AdaBoost Accuracy:", accuracy_score(y_test, ada_pred))
print("Bagging Accuracy:", accuracy_score(y_test, bagging_pred))
print("Stacking Accuracy:", accuracy_score(y_test, stacking_pred))

Decision Tree Accuracy: 0.6333333333333333
Random Forest Accuracy: 0.55
AdaBoost Accuracy: 0.48333333333333334
Bagging Accuracy: 0.6
Stacking Accuracy: 0.6666666666666666
