## 1. Importing Necessary Libraries

In [210]:
# Import packages
%matplotlib inline
import pandas as pd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import pickle


from sklearn import preprocessing
from matplotlib import pyplot as plt
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from itertools import chain
from stdnum import py
from pygments.lexers import go
from sklearn.metrics import precision_score, recall_score, confusion_matrix, roc_curve, precision_recall_curve, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from collections import Counter

## 2. Importing Dataset

In [202]:
data = pd.read_csv(r'dataset_breast_cancer.csv')

# show the data of how many rows and columns
print("Data Shape - ", data.shape)

Data Shape -  (569, 33)


# 3. Read the Data

In [203]:
data.head(5)

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [None]:
# description of the current data set
data.describe(include='all')

From the first look in the data description we can see that :
    - B = benign is the most frequent value in our target columns
    - Unnamed: 32nd columns is an empty column

In [None]:
data.info()

According to the data, all features are numerical values except the target value diagnosis which is an object: M = malignant, B = benign.

# 4. EDA (Exploratory Data Analysis)

## 4.1 Drop Out Unnecessary Column

As the 32nd colum is empty, we will drop it out.

In [None]:
data = data.drop('Unnamed: 32',axis=1)

## 4.2 Check the Missing Value

In [None]:
missing_values = data.isnull().sum()
percent_missing = data.isnull().sum()/data.shape[0]*100

value = {
    'missing_values ':missing_values,
    'percent_missing %':percent_missing
}
frame=pd.DataFrame(value)
frame

From the tables, it shows that the data is cleaned and no missing value.

In [None]:
# transformation of type of the target value to numerical
le = preprocessing.LabelEncoder()
data.diagnosis = le.fit_transform(data.diagnosis)
data.diagnosis

Diagnosis:

M = malignant => 1
B = benign => 0

Let's also drop out an id column since we also don't need it.

In [None]:
# drop the id columns
data = data.drop('id',axis=1)

## 4.3 Correlation Matrix with Heatmap

A graphical representation of a correlation matrix representing the correlation between different variables. The value of correlation can take any value
from -1 to 1.

In [None]:
#independent columns
X = data.iloc[:, 0:20]
#target column
y = data.iloc[:, -1]

#get correlations of each features in dataset
corrmatrix = data.corr()
top_corr_features = corrmatrix.index
plt.figure(figsize=(18, 18))

#plot heat map
g = sns.heatmap(data[top_corr_features].corr(), annot=True, cmap="RdYlGn")

From the above correlation heatmap, we could get some of the following information:

- Variables such as radius_worst & radious_mean, radius_worst & parameter_mean, are having strong positive correlation, just to name a few.
- Variables such as radius_worst & smoothness_se, and fractal_dimension_mean & radious_mean are having strong negative correlations: also, just to name a few.
- overall, to view this correlation graph: there are several variables that have no correlation and whose correlation value is near 0, while whose that have strong correlation is closer to 1.

## 4.4 Positive Correlated Features

In [None]:
# B = benign => 0
# M = malignant => 1
palette ={0 : 'lightblue', 1 : 'gold'}
edgecolor = 'grey'

# Plot +
fig = plt.figure(figsize=(12,12))

plt.subplot(221)
ax1 = sns.scatterplot(x = data['perimeter_mean'], y = data['radius_worst'], hue = "diagnosis",
                    data = data, palette = palette, edgecolor=edgecolor)
plt.title('perimeter mean vs radius worst')
plt.subplot(222)
ax2 = sns.scatterplot(x = data['area_mean'], y = data['radius_worst'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('area mean vs radius worst')
plt.subplot(223)
ax3 = sns.scatterplot(x = data['texture_mean'], y = data['texture_worst'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('texture mean vs texture worst')
plt.subplot(224)
ax4 = sns.scatterplot(x = data['area_worst'], y = data['radius_worst'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('area mean vs radius worst')

fig.suptitle('Positive correlated features', fontsize = 20)
plt.savefig('1')
plt.show()

## 4.5 Uncorrelated Features

In [None]:
# B = benign => 0
# M = malignant => 1

fig = plt.figure(figsize=(12,12))

plt.subplot(221)
ax1 = sns.scatterplot(x = data['smoothness_mean'], y = data['texture_mean'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('smoothness mean vs texture mean')
plt.subplot(222)
ax2 = sns.scatterplot(x = data['radius_mean'], y = data['fractal_dimension_worst'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('radius mean vs fractal dimension_worst')
plt.subplot(223)
ax3 = sns.scatterplot(x = data['texture_mean'], y = data['symmetry_mean'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('texture mean vs symmetry mean')
plt.subplot(224)
ax4 = sns.scatterplot(x = data['texture_mean'], y = data['symmetry_se'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('texture mean vs symmetry se')

fig.suptitle('Uncorrelated features', fontsize = 20)
plt.savefig('2')
plt.show()

## 4.6 Negative Correlated Features

In [None]:
# B = benign => 0
# M = malignant => 1

fig = plt.figure(figsize=(12,12))

plt.subplot(221)
ax1 = sns.scatterplot(x = data['area_mean'], y = data['fractal_dimension_mean'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('smoothness mean vs fractal dimension mean')
plt.subplot(222)
ax2 = sns.scatterplot(x = data['radius_mean'], y = data['fractal_dimension_mean'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('radius mean vs fractal dimension mean')
plt.subplot(223)
ax2 = sns.scatterplot(x = data['area_mean'], y = data['smoothness_se'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('area mean vs fractal smoothness se')
plt.subplot(224)
ax2 = sns.scatterplot(x = data['smoothness_se'], y = data['perimeter_mean'], hue = "diagnosis",
                    data = data, palette =palette, edgecolor=edgecolor)
plt.title('smoothness se vs perimeter mean')

fig.suptitle('Negative correlated features', fontsize = 20)
plt.savefig('3')
plt.show()

# 5. Data Vizualisation

## 5.1 Diagnosis

In [None]:
#bar chart
plt.rcParams['figure.figsize']=7,7
sns.set_style("darkgrid")
ax = sns.countplot(x=data.diagnosis , palette = "rocket", saturation =1.5)
plt.xlabel("diagnosis malignant = 1 / benign = 0 ", fontsize = 15 )
plt.ylabel("count", fontsize = 20)
plt.title('Number of diagnosis ', fontsize = 20)

In [None]:
#pie chart in percentile
# B = benign => 0
# M = malignant => 1
label=data.diagnosis.value_counts().index
count=data.diagnosis.value_counts().values
color = ['orange', '#8B5A8C']

plt.pie(count,labels=label)
plt.title('Distribution of diagnosis variable', fontsize = 20)
plt.figure(1, figsize=(20,15))
plt.pie(count, labels=label, colors=color, autopct='%1.1f%%')
plt.show()

Ploting the histogram of these values so we can better observer their values and data distribution. In order to do so, we are going to separate, for each histogram, the values depending on the diagnosis column.

## 5.2 Features vs Diagnosis

Observations:
- mean values of cell radius, perimeter, area, compactness, concavity and concave points can be used in classification of the cancer. Larger values of these parameters tends to show a correlation with malignant tumors.
- mean values of texture, smoothness, symmetry or fractual dimension does not show a particular preference of one diagnosis over the other. In any of the histograms there are no noticeable large outliers that warrants further cleanup.

In [None]:
# B = benign => 0
# M = malignant => 1

features_mean=list(data.columns[1:11])
# split dataframe into two based on diagnosis
dfM=data[data['diagnosis'] ==1]
dfB=data[data['diagnosis'] ==0]

#Stack the data
plt.rcParams.update({'font.size': 10})
fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(10,12))
axes = axes.ravel()
for idx,ax in enumerate(axes):
    ax.figure
    binwidth= (max(data[features_mean[idx]]) - min(data[features_mean[idx]]))/50
    ax.hist([dfM[features_mean[idx]],dfB[features_mean[idx]]], bins=np.arange(min(data[features_mean[idx]]), max(data[features_mean[idx]]) + binwidth, binwidth) , alpha=0.5,stacked=True, density = True, label=['Malignant','Benign'], color=['r','g'])
    ax.legend(loc='upper right')
    ax.set_title(features_mean[idx])
plt.tight_layout()
plt.show()

From these ten graphs we can observe, these features might be useful in predicting whether a patient has cancer or not due to the distinct grouping between malignant and benign. We can also see the most frequent malignant value of these features under each graph, although visually they may vary since we might not have an optimal bin width, bust most of them match with the values histograms show.

# 6 Feature Selection

In [None]:
# B = benign => 0
# M = malignant => 1

plt.rcParams['figure.figsize']=16,7
sns.set_style("ticks")

x = data.drop('diagnosis',axis=1)
y = data.diagnosis

model = ExtraTreesClassifier()
model.fit(x,y)

print(model.feature_importances_)
feat_importance = pd.Series(model.feature_importances_, index=x.columns)

feat_importance.nlargest(15).plot(kind='barh', fontsize=12)
plt.title('the 15th most important features are', fontsize=15)
plt.show()

Let's check our current column name.

In [None]:
data.columns

Drop out unimportant features, and choose just the best 15 according the bar chart.

In [None]:
data=data.drop(['texture_mean','smoothness_mean','compactness_mean','symmetry_mean','perimeter_se','compactness_se','concavity_se','concave points_se','smoothness_worst','symmetry_worst','fractal_dimension_worst', 'fractal_dimension_mean','texture_se','smoothness_se','symmetry_se','fractal_dimension_se'],axis=1)

In this section, we manipulate the data to prepare it for modeling. There are three main steps that we can take:

- Splitting the data into a training set, a validation set (to help me develop my models), and a test set (to help me evaluate the final version of each model);
- Resampling the training set so that all classes are equally represented;
- Scaling the data, which will help ensure that PCA and some machine learning algorithms work properly;
- Principal Components Analysis (PCA), which will reduce the dimensions of the data and eliminate any multicollinearity.

We do all these steps first to the training data so that we can check the outcome at each step. Once that's done, we put the essential preprocessing steps into a pipeline that I can use to transform the validation and test sets.

## 6.1 Train-Validation-Test Split

We will take 80% of the total dataset to use as training data. The remaining 20% of the original dataset will be devoted half of that to validation and half to be used as a true holdout set, which will be used to evaluate the final versions of each of my models.

In [None]:
# Split first into training and test datasets
X = data.drop('diagnosis', axis=1)
y = data.diagnosis


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=1,
                                                    stratify=y)

In [None]:
# Split again into validation and true holdout (test) datasets
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5,
                                                random_state=1,
                                                stratify=y_test)

In [None]:
# Examine shapes of the subsets
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

As we will compare model performance on the raw data and the preprocessed version, so at this point (before preprocessing) we will save copies for later used.

In [None]:
# Save raw copies of train and validation sets before further preprocessing
X_train_raw = X_train.copy()
y_train_raw = y_train.copy()

X_val_raw = X_val.copy()
y_val_raw = y_val.copy()

## 6.2 Resampling

The preprocessing step is to deal with the class imbalance. As we can see earlier Benign: 0 is more than 60% pf the data set while Malignant:1 is only 30%
The overall strategy is to under-sample the bigger classes and over-sample the smaller ones so that both classes are the same size as the median-sized class.

First, Let's put the training data back into one DataFrame to make things a little easier.

In [206]:
# Concatenate X_train and y_train for resampling
df_train = pd.concat([X_train, y_train], axis=1)
print(len(df_train))
df_train.head()

455


Unnamed: 0,radius_mean,perimeter_mean,area_mean,concavity_mean,concave points_mean,radius_se,area_se,radius_worst,texture_worst,perimeter_worst,area_worst,compactness_worst,concavity_worst,concave points_worst,diagnosis
195,12.91,82.53,516.4,0.03873,0.02377,0.1942,15.75,13.88,22.0,90.81,600.6,0.1506,0.1764,0.08235,0
560,14.05,91.38,600.4,0.04462,0.04304,0.3645,29.84,15.3,33.17,100.2,706.7,0.2264,0.1326,0.1048,0
544,13.87,89.77,584.8,0.03688,0.02369,0.272,23.12,15.05,24.75,99.17,688.6,0.2037,0.1377,0.06845,0
495,14.87,96.12,680.9,0.06824,0.04951,0.2323,21.84,16.01,28.48,103.9,783.6,0.1388,0.17,0.1017,0
527,12.34,78.94,468.5,0.02958,0.02647,0.1166,8.955,13.61,19.27,87.22,564.9,0.2074,0.1791,0.107,0


In [207]:
# Check for class imbalance
df_train.diagnosis.value_counts()

0    285
1    170
Name: diagnosis, dtype: int64

Let's use RandomUnderSampler and SMOTE to undersample the larger classes and oversample the smaller one.

In [213]:
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

# Randomly undersample the larger classes
rus = RandomUnderSampler(random_state=2,
                         sampling_strategy={0:170, 1:170,})

X_rus, y_rus = rus.fit_resample(X_train, y_train)

# Check class counts
Counter(y_rus)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "c:\languages\python\python390\lib\site-packages\IPython\core\interactiveshell.py", line 3369, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\alici\AppData\Local\Temp\ipykernel_18488\198468957.py", line 1, in <cell line: 1>
    from imblearn.under_sampling import RandomUnderSampler
  File "c:\languages\python\python390\lib\site-packages\imblearn\__init__.py", line 52, in <module>
    from . import combine
  File "c:\languages\python\python390\lib\site-packages\imblearn\combine\__init__.py", line 5, in <module>
    from ._smote_enn import SMOTEENN
  File "c:\languages\python\python390\lib\site-packages\imblearn\combine\_smote_enn.py", line 11, in <module>
    from ..over_sampling import SMOTE
  File "c:\languages\python\python390\lib\site-packages\imblearn\over_sampling\__init__.py", line 8, in <module>
    from ._smote import SMOTE
  File "c:\languages\python\python390\lib\site-packages\imblearn\over_sampling

In [None]:
# Randomly oversample the smaller classes
smote = SMOTE(random_state=3, sampling_strategy={4:16408, 5:16408, 6:16408})

X_resampled, y_resampled = smote.fit_resample(X_rus, y_rus)

# Check class counts
Counter(y_resampled)

## 6.3 Scaling

## 6.4 PCA

# 7. Define Functions

This part is essential to measure the performance of a model : roc, cross validation, learning curve.

## 7.1. Confusion Matrix and Show Metrics

The confusion matrix, also known as the error matrix, allows visualization of the performance of an algorithm :

true positive (TP) : Malignant tumour correctly identified as malignant
true negative (TN) : Benign tumour correctly identified as benign
false positive (FP) : Benign tumour incorrectly identified as malignant
false negative (FN) : Malignant tumour incorrectly identified as benign

Metrics :

Accuracy : (TP +TN) / (TP + TN + FP +FN)
Precision : TP / (TP + FP)
Recall : TP / (TP + FN)

In [None]:
# Confusion matrix
def plot_confusion_matrix(cm, classes,
                          normalize = False,
                          title = 'Confusion matrix"',
                          cmap = plt.cm.Blues) :
    plt.imshow(cm, interpolation = 'nearest', cmap = cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation = 0)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])) :
        plt.text(j, i, cm[i, j],
                 horizontalalignment = 'center',
                 color = 'white' if cm[i, j] > thresh else 'black')

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
# Show metrics
cm = confusion_matrix(y_test, y_score)

def show_metrics():
    tp = cm[1,1]
    fn = cm[1,0]
    fp = cm[0,1]
    tn = cm[0,0]
    #print('Accuracy  =     {:.3f}'.format((tp+tn)/(tp+tn+fp+fn)))
    print('Precision =     {:.3f}'.format(tp/(tp+fp)))
    print('Recall    =     {:.3f}'.format(tp/(tp+fn)))
    print('F1_score  =     {:.3f}'.format(2*(((tp/(tp+fp))*(tp/(tp+fn)))/
                                                 ((tp/(tp+fp))+(tp/(tp+fn))))))

## 7.2 Precision – Recall curve

The precision-recall curve shows the tradeoff between precision and recall for different threshold

In [None]:
# Precision-recall curve
def plot_precision_recall():
    plt.step(recall, precision, color = 'b', alpha = 0.2,
             where = 'post')
    plt.fill_between(recall, precision, step ='post', alpha = 0.2,
                 color = 'b')

    plt.plot(recall, precision, linewidth=2)
    plt.xlim([0.0,1])
    plt.ylim([0.0,1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision Recall Curve')
    plt.show();

## 7.3. ROC curve¶

The ROC curve is created by plotting the true positive rate (TPR) against the false positive rate (FPR) at various threshold settings.

In [None]:
# ROC curve
def plot_roc():
    plt.plot(fpr, tpr, label = 'ROC curve', linewidth = 2)
    plt.plot([0,1],[0,1], 'k--', linewidth = 2)
   # plt.xlim([0.0,0.001])
   # plt.ylim([0.0,1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.show();

## 7.4  Learning curve

The Learning curve determines cross-validated training and test scores.

In [None]:
# Learning curve
def plot_learning_curve(estimator, title, X, y, ylim = None, cv = None,
                        n_jobs = 1, train_sizes = np.linspace(.1, 1.0, 5)):

    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel('Training examples')
    plt.ylabel('Score')
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv = cv, n_jobs = n_jobs, train_sizes = train_sizes)
    train_scores_mean = np.mean(train_scores, axis = 1)
    train_scores_std = np.std(train_scores, axis = 1)
    test_scores_mean = np.mean(test_scores, axis = 1)
    test_scores_std = np.std(test_scores, axis = 1)
    plt.grid()
    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha = 0.1, color = "g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color = "r",
             label = "Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color = "g",
             label = "Cross-validation score")
    plt.legend(loc = "best")
    return plt

## 7.5 Cross validation metrics

Cross-validation is a technique to evaluate predictive models by partitioning the original sample into a training set to train the model, and a test set to evaluate it.

In [None]:
# Cross val metric
def cross_val_metrics(model) :
    scores = ['accuracy', 'precision', 'recall']
    for sc in scores:
        scores = cross_val_score(model, X, y, cv = 5, scoring = sc)
        print('[%s] : %0.5f (+/- %0.5f)'%(sc, scores.mean(), scores.std()))

# 8. Machine Learning Applications

y = diagnosis (target)
X = features (radius_mean, area_se, ....)

In [None]:
# Def X and y
y = np.array(data.diagnosis.tolist())
data = data.drop('diagnosis', 1)
X = np.array(data.as_matrix())

Standard scaler (X) to help to rescale the attributes so that they have mean as 0 and variance as 1.
The ultimate goal to perform standardization is to bring down all the features to a common scale without distorting the differences in the range of the values.

In [None]:
# Normalization
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
# Train_test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.12, random_state = 42)

In [None]:
# Split again into validation and true holdout (test) datasets
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5,
                                                random_state=1,
                                                stratify=y_test)

In [None]:
# Examine shapes of the subsets
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

## 8.1


In [None]:
#models

In [None]:
#findout best parameter

In [None]:
#fit the models

In [None]:
#visualisation of the result

In [None]:
#validation

In [None]:
#Visualie the result

In [None]:
#Hyperparameter tunning

In [None]:
#visualise the result

# 9. Displaying Best Model

In [None]:
models = pd.DataFrame({
    'Model': ['Support Vector Machines', 'KNN', 'Logistic Regression',
              'Random Forest', 'Decision Tree', 'LightGBM', 'GBM', 'GBM2', 'AdaBoost',
              'XGBoost', 'CatBoost', 'Naive Bayes'],
    'Score': [acc_svm, acc_knn, acc_logreg, acc_randomforest, acc_dt, acc_lgb,
              acc_gbm, acc_gbm2, acc_adaboost, acc_xgboost, acc_catboost, acc_nb]})
models.sort_values(by='Score', ascending=False)
plt.rcParams['figure.figsize'] = 15, 6
sns.set_style("darkgrid")
ax = sns.barplot(x=models.Model, y=models.Score, palette="rocket", saturation=1.5)
plt.xlabel("Classifier Models", fontsize=20)
plt.ylabel("% of Accuracy", fontsize=20)
plt.title("Accuracy of different Classifier Models", fontsize=20)
plt.xticks(fontsize=12, horizontalalignment='center', rotation=8)
plt.yticks(fontsize=13)
for p in ax.patches:
    width, height = p.get_width(), p.get_height()
    x, y = p.get_xy()
    ax.annotate(f'{height:.2%}', (x + width / 2, y + height * 1.02), ha='center', fontsize='x-large')
plt.show()