# DEMO of Antakia on California Housing dataset 
### let's load the dataset and shuffle it

In [1]:
import pandas as pd
df = pd.read_csv('../data/california_housing.csv').drop(['Unnamed: 0'], axis=1)
df = df.sample(len(df))
limit = int(25000 / 0.8)
df = df.iloc[:limit]

### Then split it into train and test

In [2]:
split_row = int(len(df) * 0.8)
df_train = df[:split_row]
df_test = df[split_row:]

In [3]:
X_train = df_train.iloc[:,:8] # the dataset
y_train = df_train.iloc[:,9] # the target variable
shap_values_train = df_train.iloc[:,[10,11,12,13,14,15,16,17]] # the SHAP values from a previous model

X_test = df_train.iloc[:,:8] # the dataset
y_test = df_train.iloc[:,9] # the target variable

### and train the model

In [4]:
from sklearn.ensemble import GradientBoostingRegressor
regressor = GradientBoostingRegressor(random_state = 9)
regressor.fit(X_train, y_train)

### Let's explore the model

let's add some context

In [5]:
variables_df = pd.DataFrame(
    {'col_index': [0, 1, 2, 3, 4, 5, 6, 7],
    'descr': ['Median income', 'House age', 'Average nb rooms', 'Average nb bedrooms', 'Population', 'Average occupancy', 'Latitude', 'Longitude'],
    'type': ['float64', 'int', 'float64', 'float64', 'int', 'float64', 'float64', 'float64'],
    'unit': ['k$', 'years', 'rooms', 'rooms', 'people', 'ratio', 'degrees', 'degrees'],
    'critical': [True, False, False, False, False, False, False, False],
    'lat': [False, False, False, False, False, False, True, False],
    'lon': [False, False, False, False, False, False, False, True]},
    index=['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
)

We call AntakIA passing :
1. the train dataset, 
2. the model
3. the test dataset (Optional)
4. the already computed SHAP values (Optional)
5. a description of X variables

Here is the bare minimum to run AntakIA : 
```python
atk = AntakIA(X, y, regressor)
```

In [12]:
from antakia.antakia import AntakIA

atk = AntakIA(
    X_train, y_train,
    regressor,
    variables=variables_df,
    X_test=X_test, y_test=y_test,
    X_exp=shap_values_train
)

In [13]:
atk.start_gui()

Layout(children=[Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t2\x00\x00\n\xd5\x08\x06\x00\x00\x0…

Col(children=[AppBar(children=[Layout(children=[Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x04\…

In [8]:
y_c_train = y_train > 3
y_c_test = y_test > 3
from sklearn.ensemble import GradientBoostingClassifier
classifier = GradientBoostingClassifier(random_state = 9)
classifier.fit(X_train, y_c_train)

In [9]:
from antakia.antakia import AntakIA

atk = AntakIA(
    X_train, y_c_train.astype(int),
    classifier,
    variables=variables_df,
    X_test=X_test, y_test=y_c_test.astype(int)
)

In [10]:
atk.start_gui()

Layout(children=[Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t2\x00\x00\n\xd5\x08\x06\x00\x00\x0…

Col(children=[AppBar(children=[Layout(children=[Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x04\…

In [11]:
atk.gui.region_set.get(2).perfs

AttributeError: 'NoneType' object has no attribute 'perfs'