In [None]:
# These parameter are replaced by papermill during execution but can be used to work interactively on your report
# You need to use the generate_report once with the parameter working_dir='../working' 
# to use the following values. This way the objects used below are created in the directory.
dir_path = '../working'  
project_info_file = '../utils/project_info.yml'
config = dict(
    title_story="House prices report",
    title_description="""This document is a data science report of the kaggle house prices tutorial project. 
        It was generated using the Shapash library.""",
    metrics=[
        {
            'path': 'sklearn.metrics.mean_absolute_error',
            'name': 'Mean absolute error', 
        },
        {
            'path': 'sklearn.metrics.mean_squared_error',
            'name': 'Mean squared error',
        }
    ]
)

In [None]:
import os
import pandas as pd
from shapash import SmartExplainer
from shapash.report.project_report import ProjectReport
from shapash.report.common import load_saved_df

xpl = SmartExplainer.load(os.path.join(dir_path, 'smart_explainer.pickle'))

x_train = load_saved_df(os.path.join(dir_path, 'x_train.csv'))
y_train = load_saved_df(os.path.join(dir_path, 'y_train.csv'))
y_test = load_saved_df(os.path.join(dir_path, 'y_test.csv'))

report = ProjectReport(
    explainer=xpl, 
    project_info_file=project_info_file, 
    x_train=x_train, 
    y_train=y_train,
    y_test=y_test, 
    config=config
)

In [None]:
report.display_title_description()

In [None]:
report.display_project_information()

## Model information

In [None]:
report.display_model_analysis()

## Dataset analysis

In [None]:
report.display_dataset_analysis(multivariate_analysis=False)

### Relashionship with target variable

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df_train = report.x_train_pre
y_train = report.y_train
df_train['SalePrice'] = y_train

In [None]:
f, ax = plt.subplots(figsize=(8, 6))
fig = sns.boxplot(x='OverallQual', y="SalePrice", data=df_train)
fig.axis(ymin=0, ymax=800000)
plt.show()

### Relashionship between training variables

In [None]:
corr_matrix = df_train.corr()
f, ax = plt.subplots(figsize=(16, 12))
sns.heatmap(corr_matrix, vmax=.8, square=True, cmap="YlGnBu")
plt.show()

## Model explainability

In [None]:
# Note : Plotly graphs may not show correctly in notebook but still work in html output file.
report.display_model_explainability()

## Model performance

In [None]:
report.display_model_performance()

**The graph below represents y_pred vs y_test :**

In [None]:
y_test = report.y_test
y_pred = report.y_pred

sns.scatterplot(x=y_test, y=y_pred)
plt.xlabel('y_test')
plt.ylabel('y_pred')
plt.title('y_pred vs y_test')
plt.show()

You can add as many graphs, text, or other cells as you want.

The code will not be displayed. Only the markdown and output of the cells will be shown on the generated html file.