In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv('/content/data.csv')
data.drop(['id', 'Unnamed: 32'], axis=1, inplace=True)
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})

numerical_cols = data.select_dtypes(include=['number']).columns
for col in numerical_cols:
    data[col].fillna(data[col].mean(), inplace=True)


X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

base_model = DecisionTreeClassifier(max_depth=1, random_state=42)
base_model.fit(X_train, y_train)
base_predictions = base_model.predict_proba(X_test)[:, 1]

ada_model = AdaBoostClassifier(base_estimator=base_model, n_estimators=50, random_state=42)
ada_model.fit(X_train, y_train)
ada_predictions = ada_model.predict_proba(X_test)[:, 1]

base_roc_auc = roc_auc_score(y_test, base_predictions)
ada_roc_auc = roc_auc_score(y_test, ada_predictions)

results_df = pd.DataFrame({
    'Model': ['Base Decision Tree', 'AdaBoost'],
    'ROC AUC Score': [base_roc_auc, ada_roc_auc]
})
print(results_df)

plt.figure(figsize=(8, 6))
sns.barplot(x='Model', y='ROC AUC Score', data=results_df)
plt.title('Model Performance Comparison')
plt.show()

print("\nAdaBoost typically improves performance by combining multiple weak learners.")
