**Part 1**

In [45]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_csv('Breast_cancer_data.csv', sep=',')

# Split the dataset into features and target
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [46]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score

# Define the model
dtc = DecisionTreeClassifier(random_state=0)

# Define the hyperparameters to tune
params = {'max_depth': [1,2,3,4,5,6,7,8,9]}

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(dtc, params, cv=5, scoring='f1')
grid_search.fit(X_train, y_train)

# Get the best trained model
dtc_best = grid_search.best_estimator_

# Evaluate the model on the test set
y_pred = dtc_best.predict(X_test)
f1_dtc = f1_score(y_test, y_pred)
print('F1 score for Decision Tree:', f1_dtc)

F1 score for Decision Tree: 0.9185185185185185


In [47]:
from sklearn.ensemble import BaggingClassifier

# Define the model
dtc = DecisionTreeClassifier(random_state=0)
bc = BaggingClassifier(estimator=dtc, n_estimators=100, random_state=0)

# Define the hyperparameters to tune
params = {'estimator__max_depth': [1,2,3,4,5,6,7,8,9]}

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(bc, params, cv=5, scoring='f1')
grid_search.fit(X_train, y_train)

# Get the best trained model
bag_best = grid_search.best_estimator_

# Evaluate the model on the test set
y_pred = bag_best.predict(X_test)
f1_bag = f1_score(y_test, y_pred)
print('F1 score for Bagged Decision Tree:', f1_bag)

F1 score for Bagged Decision Tree: 0.9343065693430657


In [48]:
from sklearn.ensemble import RandomForestClassifier

# Define the model
rf = RandomForestClassifier(random_state=0)

# Define the hyperparameters to tune
params = {'max_depth': [1,2,3,4,5,6,7,8,9],}

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(rf, params, cv=5, scoring='f1')
grid_search.fit(X_train, y_train)

# Get the best trained model
rf_best = grid_search.best_estimator_

# Evaluate the model on the test set
y_pred = rf_best.predict(X_test)
f1_rf = f1_score(y_test, y_pred)
print('F1 score for Random Forest:', f1_rf)

F1 score for Random Forest: 0.9343065693430657


In [49]:
from sklearn.ensemble import AdaBoostClassifier

# Define the model
dtc = DecisionTreeClassifier(random_state=0)
ada = AdaBoostClassifier(estimator=dtc, n_estimators=100, random_state=0)

# Define the hyperparameters to tune
params = {'estimator__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9],
          'n_estimators': [10,50,100,500,1000]}

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(ada, params, cv=5, scoring='f1')
grid_search.fit(X_train, y_train)

# Get the best trained model
ada_best = grid_search.best_estimator_

# Evaluate the model on the test set
y_pred = ada_best.predict(X_test)
f1_ada = f1_score(y_test, y_pred)
print('F1 score for AdaBoost:', f1_ada)

F1 score for AdaBoost: 0.9


**Part 2**

In [50]:
from sklearn.ensemble import VotingClassifier

# Define the model
vc = VotingClassifier(estimators=[('dtc', dtc_best), ('bc', bag_best), ('rf', rf_best), ('ada', ada_best)], voting='soft')

# Fit the model
vc.fit(X_train, y_train)

# Evaluate the model on the test set
y_pred = vc.predict(X_test)
f1_vc = f1_score(y_test, y_pred)
print('F1 score for Voting Classifier:', f1_vc)

F1 score for Voting Classifier: 0.9197080291970803
