# Eurybia - Overview
This tutorial will help you understand how Eurybia works with a simple use case

Contents:
- Compile Eurybia 
- Generate report


In [1]:
from category_encoders import OrdinalEncoder
from lightgbm import LGBMRegressor
from eurybia import SmartDrift
from sklearn.model_selection import train_test_split

## Import Dataset and split in training and production dataset

In [2]:
from eurybia.data.data_loader import data_loading
house_df, house_dict = data_loading('house_prices')

In [3]:
# Let us consider that the column "YrSold" corresponds to the reference date. 
#In 2006, a model was trained using data. And in 2007, we want to detect data drift on new data in production to predict
#house price
house_df_learning = house_df.loc[house_df['YrSold'] == 2006]
house_df_2007 = house_df.loc[house_df['YrSold'] == 2007]

In [4]:
y_df_learning=house_df_learning['SalePrice'].to_frame()
X_df_learning=house_df_learning[house_df_learning.columns.difference(['SalePrice','YrSold'])]

y_df_2007=house_df_2007['SalePrice'].to_frame()
X_df_2007=house_df_2007[house_df_2007.columns.difference(['SalePrice','YrSold'])]

## Building Supervized Model

In [5]:
from category_encoders import OrdinalEncoder

categorical_features = [col for col in X_df_learning.columns if X_df_learning[col].dtype == 'object']

encoder = OrdinalEncoder(
    cols=categorical_features,
    handle_unknown='ignore',
    return_df=True).fit(X_df_learning)

X_df_learning_encoded=encoder.transform(X_df_learning)

In [6]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X_df_learning_encoded, y_df_learning, train_size=0.75, random_state=1)

In [7]:
regressor = LGBMRegressor(n_estimators=200).fit(Xtrain,ytrain)

## Use Eurybia for data drift

In [8]:
from eurybia import SmartDrift

In [9]:
SD = SmartDrift(df_current=X_df_2007, df_baseline=X_df_learning, deployed_model=regressor, encoding=encoder)

In [10]:
%time SD.compile()

Backend: Shap TreeExplainer
CPU times: user 2min 6s, sys: 5min 38s, total: 7min 44s
Wall time: 12.3 s


In [11]:
SD.generate_report(    
    output_file='report_house_price_datadrift_2007.html',    
    title_story="Data drift",
    title_description="""House price Data drift 2007""",
    project_info_file="../eurybia/data/project_info_house_price.yml"  
    )

Report saved to ./report_house_price_datadrift_2007.html. To upload and share your report, create a free Datapane account by running `!datapane signup`.

For a more detailed tutorial on :
- Data validation : (https://github.com/MAIF/eurybia/tree/master/tutorial/data_validation)
- Data drift : (https://github.com/MAIF/eurybia/tree/master/tutorial/data_drift)
- Model drift : (https://github.com/MAIF/eurybia/tree/master/tutorial/model_drift)