<a href="https://colab.research.google.com/github/RochaErik/AlgorithmComparison/blob/main/AlgorithmComparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install catboost
!pip install lightgbm
!pip install xgboost



In [None]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

In [None]:
wine_df = pd.read_csv('/content/drive/MyDrive/DatasetSeminario/Wine/wine.data', header=None)

In [None]:
wine_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [None]:
wine_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       178 non-null    int64  
 1   1       178 non-null    float64
 2   2       178 non-null    float64
 3   3       178 non-null    float64
 4   4       178 non-null    float64
 5   5       178 non-null    int64  
 6   6       178 non-null    float64
 7   7       178 non-null    float64
 8   8       178 non-null    float64
 9   9       178 non-null    float64
 10  10      178 non-null    float64
 11  11      178 non-null    float64
 12  12      178 non-null    float64
 13  13      178 non-null    int64  
dtypes: float64(11), int64(3)
memory usage: 19.6 KB


In [None]:
wine_df.nunique()

0       3
1     126
2     133
3      79
4      63
5      53
6      97
7     132
8      39
9     101
10    132
11     78
12    122
13    121
dtype: int64

In [None]:
X = wine_df.iloc[:, 1:]
y = wine_df.iloc[:, 0]

In [None]:
X.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [None]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: 0, dtype: int64

In [None]:
# Since version 1.3.2 XGBoost needs target columns to start with 0 value

le = LabelEncoder()
y = le.fit_transform(y)

In [None]:
names = [
          'AdaBoost',
          'CatBoost',
          'LightGBM',
          'XGBoost'
        ]

classifiers = [
                AdaBoostClassifier(),
                CatBoostClassifier(silent=True),
                LGBMClassifier(verbosity=-1),
                XGBClassifier()
              ]

In [None]:
rkf = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42)

In [None]:
wine_scores_mean = []
wine_scores_std = []

for name, clf in zip(names, classifiers):
  results = cross_val_score(clf, X, y, cv=rkf)
  wine_scores_mean.append(results.mean()*100)
  wine_scores_std.append(results.std()*100)
  print(f'--------- {name} ---------')
  print(results)
  print('Accuracy: %.2f%% (%.2f%%)' % (results.mean()*100, results.std()*100))
  print('------------------------------')

--------- AdaBoost ---------
[0.94444444 0.88888889 0.72222222 0.88888889 1.         0.94444444
 0.94444444 0.94444444 1.         0.76470588 0.94444444 0.83333333
 0.88888889 0.83333333 0.88888889 0.83333333 1.         0.83333333
 0.82352941 0.94117647 0.94444444 0.88888889 0.94444444 1.
 1.         0.66666667 0.94444444 0.88888889 0.88235294 0.76470588
 1.         1.         0.88888889 1.         0.88888889 1.
 1.         0.77777778 0.82352941 0.94117647 0.94444444 0.83333333
 1.         1.         0.94444444 0.94444444 0.83333333 0.83333333
 0.94117647 0.88235294 0.94444444 1.         0.94444444 0.88888889
 0.94444444 0.88888889 0.94444444 0.83333333 0.94117647 0.88235294
 1.         0.88888889 0.72222222 0.88888889 1.         0.94444444
 0.94444444 0.88888889 0.88235294 0.88235294 0.66666667 0.88888889
 0.88888889 0.77777778 0.77777778 1.         1.         0.72222222
 1.         0.88235294 0.88888889 0.88888889 1.         0.88888889
 0.94444444 0.72222222 1.         0.83333333 1.  

In [None]:
wine_scores_mean

[89.83006535947712, 97.97712418300654, 97.42156862745098, 96.6764705882353]

In [None]:
wine_scores_std

[8.935178395579298, 3.2313949567723625, 3.20624544058637, 4.235287813680455]

In [None]:
Algo_results = pd.DataFrame()
Algo_results['Name'] = names
Algo_results['Results'] = wine_scores_mean

In [None]:
Algo_results

Unnamed: 0,Name,Results
0,AdaBoost,89.830065
1,CatBoost,97.977124
2,LightGBM,97.421569
3,XGBoost,96.676471
