In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the data
mushrooms = pd.read_csv('mushrooms.csv')

# Preprocess the data
X = mushrooms.drop('class', axis=1)
y = mushrooms['class']
X = pd.get_dummies(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning for Decision Tree
dt_params = {'max_depth': [3, 5, 7, 9]}
dt_grid = GridSearchCV(DecisionTreeClassifier(), dt_params, cv=5)
dt_grid.fit(X_train, y_train)

# Hyperparameter tuning for Random Forest
rf_params = {'n_estimators': [50, 100, 150], 'max_depth': [3, 5, 7, 9]}
rf_grid = GridSearchCV(RandomForestClassifier(), rf_params, cv=5)
rf_grid.fit(X_train, y_train)

# Compare accuracies
dt_acc = accuracy_score(y_test, dt_grid.predict(X_test))
rf_acc = accuracy_score(y_test, rf_grid.predict(X_test))

print(f"Decision Tree accuracy: {dt_acc:.4f}")
print(f"Random Forest accuracy: {rf_acc:.4f}")


Decision Tree accuracy: 1.0000
Random Forest accuracy: 1.0000


In [2]:
from sklearn.ensemble import AdaBoostClassifier

# Hyperparameter tuning for AdaBoost
adaboost_params = {'n_estimators': [50, 100, 150], 'learning_rate': [0.01, 0.1, 1.0]}
adaboost_grid = GridSearchCV(AdaBoostClassifier(), adaboost_params, cv=5)
adaboost_grid.fit(X_train, y_train)

# Compare accuracies
adaboost_acc = accuracy_score(y_test, adaboost_grid.predict(X_test))

print(f"Decision Tree accuracy: {dt_acc:.4f}")
print(f"AdaBoost accuracy: {adaboost_acc:.4f}")


Decision Tree accuracy: 1.0000
AdaBoost accuracy: 1.0000


In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the diabetes dataset
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the models
log_reg = LogisticRegression()
svm_poly = SVC(kernel='poly')
decision_tree = DecisionTreeClassifier()

# Create the ensemble model
ensemble = VotingClassifier(estimators=[('lr', log_reg), ('svm', svm_poly), ('dt', decision_tree)], voting='hard')

# Train the ensemble model
ensemble.fit(X_train, y_train)

# Evaluate the ensemble model using mean squared error
y_pred = ensemble.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Ensemble Mean Squared Error: {mse:.4f}")



Ensemble Mean Squared Error: 6476.7528
