## Install Pycaret Module

In [None]:
# !pip install pycaret

## Import Modules

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
from pycaret.classification import *
%matplotlib inline
warnings.filterwarnings('ignore')

## Load the Dataset

In [None]:
df = pd.read_csv('/kaggle/input/breast-cancer-wisconsin-data/data.csv')
df.head()

In [None]:
# delete unnecessary columns
df = df.drop(columns=['id', 'Unnamed: 32'], axis=1)

In [None]:
# statistical info
df.describe()

In [None]:
# datatype info
df.info()

## Exploratory Data Analysis

In [None]:
sns.countplot(df['diagnosis'])

In [None]:
df_temp = df.drop(columns=['diagnosis'], axis=1)

In [None]:
# create dist plot
fig, ax = plt.subplots(ncols=6, nrows=5, figsize=(20, 20))
index = 0
ax = ax.flatten()

for col in df_temp.columns:
    sns.distplot(df[col], ax=ax[index])
    index+=1
plt.tight_layout(pad=0.5, w_pad=0.7, h_pad=5.0)

In [None]:
# create box plot
fig, ax = plt.subplots(ncols=6, nrows=5, figsize=(20, 20))
index = 0
ax = ax.flatten()

for col in df_temp.columns:
    sns.boxplot(y=col, data=df, ax=ax[index])
    index+=1
plt.tight_layout(pad=0.5, w_pad=0.7, h_pad=5.0)

## Create and Train the Model

In [None]:
# setup the data
clf = setup(df, target='diagnosis')

In [None]:
# train and test the models
compare_models()

In [None]:
# select the best model
model = create_model('catboost')

In [None]:
# hyperparameter tuning
best_model = tune_model(model)

In [None]:
evaluate_model(best_model)

In [None]:
# plot the results
plot_model(estimator=best_model, plot='confusion_matrix')