In [1]:
from dash import Dash, html, dash_table, dcc, callback, Output, Input
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
df = pd.read_csv("./../data/goodreads_data.csv", converters={"genres": lambda x: x[1:-1].split(",")})
df["publication_date"] = pd.to_datetime(df["published"].apply(lambda x: " ".join(str(x).split()[:3])), errors="coerce")
df["publication_year"] = df["publication_date"].dt.year.astype("float")
df["n_pages"] = df["pages_format"].apply(lambda x: int(x.split()[0]) if x.split()[0].isdigit() else np.nan).astype("float")
df["format"] = df["pages_format"].apply(lambda x: x.replace("Kindle Edition", "Kindle").split()[-1].lower() if x.replace("Kindle Edition", "Kindle").split()[-1].lower() in ["hardcover", "paperback", "ebook", "audiobook", "audio", "cd", "kindle"] else "other").astype("category")

dic_rating = {1: "one_star", 2: "two_stars", 3: "three_stars", 4: "four_stars", 5: "five_stars"}
for key, value in dic_rating.items():
    df[value] = df["ratings"].apply(lambda x: float(x[1:-1].split(",")[key-1])) / df["n_ratings"]
df["average_rating"] = df["ratings"].apply(lambda x: sum([(i+1) * float(x[1:-1].split(",")[i]) for i in range(5)])) / df["n_ratings"]
df["average_rating"] = df["average_rating"].round(1)

In [4]:
app = Dash(__name__)

app.layout = html.Div([
    html.H1(children="ÉCRITURE D'UN LIVRE POPULAIRE", style={'text-align': 'center'}),
    html.Hr(style={"height": "3px", "color": "#026b9c", "border": "none", "background-color": "#026b9c"}),
    html.Div([
        html.H2(children="Jeu de Données", style={"text-align": "center", "margin": "0", "color": "#2a303b"}),
        dcc.RadioItems(options=["Aperçu", "Détaillé", "Résumé Statistique"], value="Aperçu", id="radio-data", style={"display": "flex", "justify-content": "space-around", "margin": "20px 0 10px 0"}),
        dash_table.DataTable(data=df[["title", "author", "average_rating", "n_ratings", "n_reviews", "n_pages", "publication_year", "format", "language"]].to_dict("records"), page_size=10, id="data-table", style_table={"overflowX": "auto"})
    ], style={"background-color": "#f5f6f8", "padding": "20px", "border-radius": "10px", "margin-bottom": "30px"}),

    html.Div([
        html.Div([
            html.H2(children="Corrélations", style={"text-align": "center", "margin": "0", "color": "#2a303b"}),
            dcc.Graph(figure=px.imshow(df[["average_rating", "n_ratings", "n_reviews", "price", "publication_year", "n_pages"]].corr(), title="Matrice de corrélation entre les variables quantitatives les plus importantes"), id="correlation_matrix"),
        ], style={"background-color": "#f5f6f8", "padding": "20px", "border-radius": "10px"}),
        html.Div([
            html.Div([
                html.H2(children="Accessibilité", style={"text-align": "center", "margin": "0", "color": "#2a303b"}),
            ], style={"background-color": "#f5f6f8", "padding": "20px", "border-radius": "10px"}),
            html.Div([
                html.H2(children="Prix", style={"text-align": "center", "margin": "0", "color": "#2a303b"}),
            ], style={"display": "grid", "grid-template-rows": "1fr 1fr", "background-color": "#f5f6f8", "padding": "20px", "border-radius": "10px"}),
        ], style={"display": "grid", "grid-template": "1fr 1fr / 1fr", "gap": "10px"})
    ], style={"display": "grid", "grid-template-columns": "1fr 1fr", "gap": "10px", "margin-bottom": "30px"}),

    html.Div([
        html.Div([
            html.H2(children="Genres Littéraires", style={"text-align": "center", "margin": "0", "color": "#2a303b"}),
        ], style={"background-color": "#f5f6f8", "padding": "20px", "border-radius": "10px"}),
        html.Div([
            html.H2(children="Notes Attribuées", style={"text-align": "center", "margin": "0", "color": "#2a303b"}),
        ], style={"background-color": "#f5f6f8", "padding": "20px", "border-radius": "10px"}),
    ], style={"display": "grid", "grid-template-columns": "1fr 1fr", "gap": "10px", "margin-bottom": "30px"}),

    html.Div([
        html.H2(children="Groupes de Lecteurs", style={"text-align": "center", "margin": "0", "color": "#2a303b"}),
    ], style={"background-color": "#f5f6f8", "padding": "20px", "border-radius": "10px", "margin-bottom": "30px"}),


#     html.Div([
#         html.H2(children='Exploration des Données', style={'text-align': 'center', 'margin': '0', 'color': '#2a303b'}),
#         dcc.Graph(figure=px.bar(df, x='bins', y='freq', labels={'bins': 'Catégorie de Revenu', 'freq': 'Proportion'}, title='Médiane des revenus selon la localisation géographique'), id='freq-income-bar'),
#         html.Div([
#             dcc.Graph(figure=px.histogram(df, x='winner', y='votes_gop', labels={'winner': 'Parti gagnant', 'votes_gop': 'Nombre de votes'}, title='Résultats des élections'), id='votes-gop-hist'),
#             dcc.Graph(figure=px.bar(df, x='bins', y='freq', labels={'bins': 'Catégorie selon le nombre de votes', 'freq': 'Proportion des votes'}, title='Répartition des voix selon la localisation'), id='zipf-bar')
#         ], style={'display': 'grid', 'grid-template-columns': '1fr 2fr', 'gap': '10px'})
#     ], style={'padding': '20px', 'border-radius': '5px'})


], style={"font-family": "Helvetica", "background-color": "white", "padding": "10px", "border-radius": "5px", "margin-bottom": "10px"})

@callback(
    Output(component_id="data-table", component_property="data"),
    Input(component_id="radio-data", component_property="value")
)

def update_table(mode):
    if mode == "Aperçu":
        features = ["title", "author", "average_rating", "n_ratings", "format", "publication_year", "language"]
        return df[features].to_dict("records")
    elif mode == "Détaillé":
        features = ["title", "author", "average_rating", "n_ratings", "n_reviews", "n_pages", "publication_year", "format", "language"]
        return df[features].to_dict("records")
    elif mode == "Résumé Statistique":
        features = ["average_rating", "n_ratings", "n_reviews", "n_pages", "publication_year"]
        return df[features].describe().round(2).to_dict("records")

In [5]:
app.run(debug=True)

In [None]:
# Ajouter :
# Prix : Boxplot, prix moyen par année
# Notes : Boxplot, Violinplot, convergence, temps?, relation avec la nombre de livres écrits
# Genres : mostcommon, top5
# Accessibilité : pie chart et hist
# Matrice de corrélation