# Metody ewolucyjne i uczenie się maszyn
## Na kilku zadaniach klasyfikacji z repozytorium UCI porównać metody xgboost i catboost
### Agnieszka Czaplicka, Bartosz Sowul

# test: instalacja potrzebnych modułów

In [1]:
!pip install catboost
!pip install xgboost
!pip install matplotlib
!pip install pandas
!pip install numpy
!pip install scikit_learn
!pip install hyperopt



ModuleNotFoundError: No module named 'lightgbm'

## Ładowanie potrzebnych modułów

In [3]:
import catboost as cb
from hyperopt import hp, tpe
from hyperopt.fmin import fmin
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score, StratifiedKFold
import xgboost as xgb

## Ładowanie danych

In [7]:
# przyklad na zbiorze breast

from sklearn.preprocessing import MinMaxScaler, Imputer
def prepare(filename):
    df = pd.read_csv(filename, header=None)
    df = df.replace('?', np.NaN)
    df[[6]] = df[[6]].fillna(value=0)

    array = df.values
    X = array[:,1:10]
    y = np.array([1 if elem == 2 else 0 for elem in df[10]])

    imputer = Imputer()
    X = imputer.fit_transform(X)
    scaler = MinMaxScaler(feature_range=(0, 1))
    X = scaler.fit_transform(X)
    return X, y

X, y = prepare('./data/breast/breast-cancer-wisconsin.data.txt')
X[:10]

array([[ 0.44444444,  0.        ,  0.        ,  0.        ,  0.11111111,
         0.1       ,  0.22222222,  0.        ,  0.        ],
       [ 0.44444444,  0.33333333,  0.33333333,  0.44444444,  0.66666667,
         1.        ,  0.22222222,  0.11111111,  0.        ],
       [ 0.22222222,  0.        ,  0.        ,  0.        ,  0.11111111,
         0.2       ,  0.22222222,  0.        ,  0.        ],
       [ 0.55555556,  0.77777778,  0.77777778,  0.        ,  0.22222222,
         0.4       ,  0.22222222,  0.66666667,  0.        ],
       [ 0.33333333,  0.        ,  0.        ,  0.22222222,  0.11111111,
         0.1       ,  0.22222222,  0.        ,  0.        ],
       [ 0.77777778,  1.        ,  1.        ,  0.77777778,  0.66666667,
         1.        ,  0.88888889,  0.66666667,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.11111111,
         1.        ,  0.22222222,  0.        ,  0.        ],
       [ 0.11111111,  0.        ,  0.11111111,  0.        ,  0

## XGBoost - optymalizacja hiperparametrów

In [None]:
#learning_rate
#gamma
#max_depth
#min_child_weight
#subsample
#colsample_bytree
#reg_alpha
#reg_lambda
#n_estimators

def objective(params):
    params = {
        'max_depth': int(params['max_depth']),
        'gamma': "{:.3f}".format(params['gamma']),
        'colsample_bytree': '{:.3f}'.format(params['colsample_bytree']),
    }
    
    clf = xgb.XGBClassifier(
        n_estimators=250,
        learning_rate=0.05,
        n_jobs=4,
        **params
    )
    
    score = cross_val_score(clf, X, Y, scoring=, cv=StratifiedKFold()).mean()
    print("Score {:.3f} params {}".format(score, params))
    return score

space = {
    'max_depth': hp.quniform('max_depth', 2, 8, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.3, 1.0),
    'gamma': hp.uniform('gamma', 0.0, 0.5),
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=10)

print("Hyperopt estimated optimum {}".format(best))

## CatBoost - optymalizacja hiperparametrów

In [None]:
#learning_rate
#depth
#l2_leaf_reg
#rsm
#random_strength
#iterations

def objective(params):
    params = {
        'max_depth': int(params['max_depth']),
        'gamma': "{:.3f}".format(params['gamma']),
        'colsample_bytree': '{:.3f}'.format(params['colsample_bytree']),
    }
    
    clf = xgb.XGBClassifier(
        n_estimators=250,
        learning_rate=0.05,
        n_jobs=4,
        **params
    )
    
    score = cross_val_score(clf, X, Y, scoring=, cv=StratifiedKFold()).mean()
    print("Score {:.3f} params {}".format(score, params))
    return score

space = {
    'max_depth': hp.quniform('max_depth', 2, 8, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.3, 1.0),
    'gamma': hp.uniform('gamma', 0.0, 0.5),
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=10)

print("Hyperopt estimated optimum {}".format(best))

## Open In Colab Badge

Anybody can open a copy of any github-hosted notebook within Colab. To make it easier to give people access to live views of GitHub-hosted notebooks,
colab provides a [shields.io](http://shields.io/)-style badge, which appears as follows:

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb)

The markdown for the above badge is the following:

```markdown
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb)
```

The HTML equivalent is:

```HTML
<a href="https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>
```

Remember to replace the notebook URL in this template with the notebook you want to link to.