# DEMO of Antakia on California Housing dataset 
### Let's load the dataset and shuffle it

In [1]:
import pandas as pd
from antakia.utils.examples import fetch_dataset, AVAILABLE_EXAMPLES
df = fetch_dataset('california_housing')
df

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,City,MedHouseVal,MedInc_shap,HouseAge_shap,AveRooms_shap,AveBedrms_shap,Population_shap,AveOccup_shap,Latitude_shap,Longitude_shap,City_shap
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23,4,4.526,1.908911,0.167049,0.108704,-0.015404,-0.022898,0.063015,-0.271882,0.341710,-0.085350
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22,70,3.585,1.625067,-0.054438,0.032429,-0.091859,0.044110,0.229414,-0.242004,0.310591,-0.010205
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24,70,3.521,1.464486,0.172200,0.152683,0.025594,-0.027486,-0.033160,-0.426897,0.358333,-0.033487
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,70,3.413,0.827802,0.178904,0.004158,0.032462,-0.036373,0.218895,-0.226761,0.423283,-0.013606
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,70,3.422,-0.093283,0.168596,0.102445,0.029359,-0.021585,0.509144,-0.272436,0.464937,0.011226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09,155,0.781,-0.805256,0.009748,-0.035147,0.008701,-0.007931,0.035342,-0.780151,0.168567,0.007701
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21,155,0.771,-0.577387,-0.010574,0.013063,0.036392,-0.006978,-0.103770,-0.831755,0.123591,0.017951
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22,155,0.923,-0.804805,0.007414,-0.035277,0.006611,-0.009792,0.133257,-0.749146,0.196270,0.004602
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32,155,0.847,-0.849971,-0.008248,-0.023258,0.021840,-0.017897,0.218681,-0.775130,0.156322,0.005074


### Then split it into train and test

In [2]:
df = df.sample(len(df))
limit = int(2000 / 0.8)
df = df.iloc[:limit]
split_row = int(len(df) * 0.8)
df_train = df[:split_row]
df_test = df[split_row:]

In [3]:
X_train = df_train.iloc[:,:8] # the dataset
y_train = df_train.iloc[:,9] # the target variable
shap_values_train = df_train.iloc[:,[10,11,12,13,14,15,16,17]] # the SHAP values from a previous model

X_test = df_test.iloc[:,:8] # the dataset
y_test = df_test.iloc[:,9] # the target variable

### and train the model

In [4]:
from sklearn.ensemble import GradientBoostingRegressor
regressor = GradientBoostingRegressor(random_state = 9)
regressor.fit(X_train, y_train)

### Let's explore the model

let's add some context

In [5]:
variables_df = pd.DataFrame(
    {'col_index': [0, 1, 2, 3, 4, 5, 6, 7],
    'descr': ['Median income', 'House age', 'Average nb rooms', 'Average nb bedrooms', 'Population', 'Average occupancy', 'Latitude', 'Longitude'],
    'type': ['float64', 'int', 'float64', 'float64', 'int', 'float64', 'float64', 'float64'],
    'unit': ['k$', 'years', 'rooms', 'rooms', 'people', 'ratio', 'degrees', 'degrees'],
    'critical': [True, False, False, False, False, False, False, False],
    'lat': [False, False, False, False, False, False, True, False],
    'lon': [False, False, False, False, False, False, False, True]},
    index=['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
)

We call AntakIA passing :
1. the train dataset, 
2. the model
3. the test dataset (Optional)
4. the already computed SHAP values (Optional)
5. a description of X variables

Here is the bare minimum to run AntakIA : 
```python
atk = AntakIA(X, y, regressor)
```

In [6]:
from antakia.antakia import AntakIA

atk = AntakIA(
    X_train, y_train,
    regressor,
    variables=variables_df,
    X_test=X_test, y_test=y_test,
    X_exp=shap_values_train
)

In [7]:
atk.start_gui()

Layout(children=[Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t2\x00\x00\n\xd5\x08\x06\x00\x00\x0…

Col(children=[AppBar(children=[Layout(children=[Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x04\…