In [3]:
import dash
from dash import dcc, html, Input, Output
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score
from joblib import load

# Load saved data
df_results = pd.read_csv("data/df_results.csv")
trained_models = load("data/trained_models.joblib")
y_test = pd.read_csv("data/y_test.csv")["target"]

# Initialize Dash app
app = dash.Dash(__name__)
app.title = "Classification Evaluation Dashboard – AML"

# Layout
app.layout = html.Div([
    html.H1("📊 Classification Model Evaluation Dashboard", style={"textAlign": "center"}),

    html.Div([
        html.Label("Select a vectorization method:"),
        dcc.Dropdown(
            id='vec-selector',
            options=[{'label': v, 'value': v} for v in df_results["Vectorization"].unique()],
            value="TF-IDF"
        ),

        html.Label("Select a model:"),
        dcc.Dropdown(id='model-selector')
    ], style={"width": "50%", "margin": "auto"}),

    dcc.Graph(id='roc-curve'),
    dcc.Graph(id='confusion-matrix'),
    dcc.Graph(id='bar-metrics')
])

# Update model dropdown based on selected vectorization
@app.callback(
    Output('model-selector', 'options'),
    Output('model-selector', 'value'),
    Input('vec-selector', 'value')
)
def update_model_dropdown(selected_vec):
    available_models = list(trained_models[selected_vec].keys())
    options = [{'label': m, 'value': m} for m in available_models]
    return options, available_models[0]

# Update ROC curve, confusion matrix, and metrics barplot
@app.callback(
    Output('roc-curve', 'figure'),
    Output('confusion-matrix', 'figure'),
    Output('bar-metrics', 'figure'),
    Input('vec-selector', 'value'),
    Input('model-selector', 'value')
)
def update_dashboard(vectorization, model_name):
    model_data = trained_models[vectorization][model_name]
    y_pred = model_data["y_pred"]
    y_proba = model_data["y_proba"]

    # ROC curve
    if y_proba is not None:
        fpr, tpr, _ = roc_curve(y_test, y_proba)
        auc_score = roc_auc_score(y_test, y_proba)
        fig_roc = go.Figure()
        fig_roc.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f"AUC = {auc_score:.2f}"))
        fig_roc.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Random', line=dict(dash='dash')))
        fig_roc.update_layout(title="ROC Curve", xaxis_title="False Positive Rate", yaxis_title="True Positive Rate")
    else:
        fig_roc = go.Figure()
        fig_roc.update_layout(title="No probability scores available for this model.")

    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    fig_cm = px.imshow(
        cm, text_auto=True, color_continuous_scale='Blues',
        labels=dict(x="Predicted", y="Actual"),
        title="Confusion Matrix"
    )

    # Metric barplot
    metrics = df_results[
        (df_results["Model"] == model_name) &
        (df_results["Vectorization"] == vectorization)
    ].iloc[0]

    fig_bar = go.Figure([
        go.Bar(name="Accuracy", x=["Accuracy"], y=[metrics["Accuracy"]]),
        go.Bar(name="F1 Macro", x=["F1 Macro"], y=[metrics["F1_macro"]]),
        go.Bar(name="AUC", x=["AUC"], y=[metrics["AUC"]])
    ])
    fig_bar.update_layout(title="Model Metrics", barmode='group')

    return fig_roc, fig_cm, fig_bar

# Run the app
if __name__ == '__main__':
    app.run(debug=True, port=8054)


In [9]:
import dash
from dash import dcc, html, Input, Output
import pandas as pd
import plotly.express as px
import joblib

# === 🔁 Load data and models ===
df = pd.read_csv("data/restaurants_france_valid_topic.csv")
lda_model = joblib.load("data/lda_model.joblib")
count_vectorizer = joblib.load("data/count_vectorizer.joblib")


# === ✅ Generate keywords per topic ===
def get_sklearn_lda_keywords(model, feature_names, topn=10):
    topic_keywords = {}
    for topic_idx, topic in enumerate(model.components_):
        top_features_idx = topic.argsort()[:-topn - 1:-1]
        top_words = [feature_names[i] for i in top_features_idx]
        topic_keywords[topic_idx] = top_words
    return topic_keywords

feature_names = count_vectorizer.get_feature_names_out()
topic_keywords = get_sklearn_lda_keywords(lda_model, feature_names, topn=10)
num_topics = lda_model.n_components

# === 🌐 Initialize Dash app ===
app = dash.Dash(__name__)
app.title = "LDA Topic Dashboard – sklearn"

app.layout = html.Div([
    html.H1("📊 LDA Topic Analysis (Restaurants)", style={"textAlign": "center"}),

    dcc.Dropdown(
        id='city-filter',
        options=[{'label': c, 'value': c} for c in sorted(df['city'].dropna().unique())],
        value=None,
        placeholder="Filter by city...",
        multi=True
    ),

    dcc.Dropdown(
        id='topic-filter',
        options=[{'label': f"Topic {i}", 'value': i} for i in range(num_topics)],
        value=None,
        placeholder="Filter by topic..."
    ),

    dcc.Graph(id="topic-bar"),
    html.Div(id="keywords-display", style={"textAlign": "center", "fontSize": "18px", "marginTop": "20px"}),

    dcc.Graph(id="map-lda")
])

@app.callback(
    Output("topic-bar", "figure"),
    Output("map-lda", "figure"),
    Input("city-filter", "value"),
    Input("topic-filter", "value")
)
def update_graphs(selected_cities, selected_topic):
    filtered_df = df.copy()
    if selected_cities:
        filtered_df = filtered_df[filtered_df["city"].isin(selected_cities)]
    if selected_topic is not None:
        filtered_df = filtered_df[filtered_df["dominant_topic"] == selected_topic]

    fig_hist = px.histogram(df if not selected_cities else df[df['city'].isin(selected_cities)],
                            x='dominant_topic', nbins=num_topics,
                            title="Topic Distribution (LDA)")
    fig_hist.update_layout(xaxis_title="Topic", yaxis_title="Number of restaurants")

    fig_map = px.scatter_mapbox(
        filtered_df,
        lat="latitude",
        lon="longitude",
        color="dominant_topic",
        hover_name="restaurant_name",
        hover_data=["city", "avg_rating", "price_level"],
        mapbox_style="open-street-map",
        zoom=4,
        title="Restaurants filtered by topic (LDA)"
    )

    return fig_hist, fig_map

@app.callback(
    Output("keywords-display", "children"),
    Input("topic-filter", "value")
)
def show_keywords(selected_topic):
    if selected_topic is not None:
        words = topic_keywords.get(selected_topic, [])
        return f"🧠 Topic {selected_topic}: {', '.join(words)}"
    return "🧠 Select a topic to display its keywords."

if __name__ == '__main__':
    app.run(debug=True, port=8055)


❌ Colonnes 'vectorization' ou 'model' manquantes.


In [10]:
import dash
from dash import dcc, html, Input, Output
import pandas as pd
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")


# Load predictions from all model/vector combinations
df = pd.read_csv("data/restaurants_with_all_predictions.csv")

# Initialize Dash app
app = dash.Dash(__name__)
app.title = "Restaurant Recommendation – Multi-Model Comparison"

# Layout
app.layout = html.Div([
    html.H1("🍴 Restaurant Recommendations (Model + Vectorization Selection)", style={"textAlign": "center"}),

    html.Div([
        html.Label("Select vectorization method:"),
        dcc.Dropdown(
            id='vec-dropdown',
            options=[{"label": v, "value": v} for v in sorted(df['vectorization'].unique())],
            value=sorted(df['vectorization'].unique())[0]
        ),
    ], style={"width": "48%", "display": "inline-block"}),

    html.Div([
        html.Label("Select classification model:"),
        dcc.Dropdown(
            id='model-dropdown',
            options=[{"label": m, "value": m} for m in sorted(df['model'].unique())],
            value=sorted(df['model'].unique())[0]
        ),
    ], style={"width": "48%", "display": "inline-block", "float": "right"}),

    html.Br(),
    dcc.Graph(id='map-restaurant'),

    html.Br(),
    html.Div(id='tables-container')
])

# Callback
@app.callback(
    Output('map-restaurant', 'figure'),
    Output('tables-container', 'children'),
    Input('vec-dropdown', 'value'),
    Input('model-dropdown', 'value')
)
def update_dashboard(selected_vec, selected_model):
    # Filter
    filtered_df = df[(df['vectorization'] == selected_vec) & (df['model'] == selected_model)]

    # Map for predicted good restaurants only
    map_df = filtered_df[filtered_df['target_pred'] == 1]
    fig = px.scatter_map(
        map_df,
        lat='latitude',
        lon='longitude',
        hover_name='restaurant_name',
        hover_data=['city', 'avg_rating', 'keywords_clean', 'score'],
        color='score',
        color_continuous_scale='Blues',
        zoom=4,
        height=500,
        title=f"Recommended Restaurants ({selected_model} + {selected_vec})"
    )

    # Top 10 recommended restaurants
    top_df = map_df.sort_values(by='score', ascending=False).head(10)
    top_table = html.Table([
        html.Thead(html.Tr([html.Th(col) for col in ['Name', 'City', 'Avg Rating', 'Keywords', 'Score', 'Predicted Class']])),
        html.Tbody([
            html.Tr([
                html.Td(top_df.iloc[i]['restaurant_name']),
                html.Td(top_df.iloc[i]['city']),
                html.Td(f"{top_df.iloc[i]['avg_rating']:.1f}"),
                html.Td(top_df.iloc[i]['keywords_clean']),
                html.Td(f"{top_df.iloc[i]['score']:.2f}"),
                html.Td(str(top_df.iloc[i]['target_pred']))
            ]) for i in range(len(top_df))
        ])
    ])

    # Bottom 10 least recommended restaurants
    bottom_df = filtered_df[filtered_df['target_pred'] == 0].sort_values(by='score', ascending=True).head(10)
    bottom_table = html.Table([
        html.Thead(html.Tr([html.Th(col) for col in ['Name', 'City', 'Avg Rating', 'Keywords', 'Score', 'Predicted Class']])),
        html.Tbody([
            html.Tr([
                html.Td(bottom_df.iloc[i]['restaurant_name']),
                html.Td(bottom_df.iloc[i]['city']),
                html.Td(f"{bottom_df.iloc[i]['avg_rating']:.1f}"),
                html.Td(bottom_df.iloc[i]['keywords_clean']),
                html.Td(f"{bottom_df.iloc[i]['score']:.2f}"),
                html.Td(str(bottom_df.iloc[i]['target_pred']))
            ]) for i in range(len(bottom_df))
        ])
    ])

    tables = html.Div([
        html.H3("📈 Top 10 Recommended Restaurants"),
        top_table,
        html.Br(),
        html.H3("📉 Bottom 10 Restaurants According to the Model"),
        bottom_table
    ])

    return fig, tables

# Run app
if __name__ == '__main__':
    app.run(debug=True, port=8056)
