In [1]:
pip install dash pandas plotly numpy scikit-learn





[notice] A new release of pip available: 22.3.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Lecture des fichiers CSV
print("Loading data files...")
df_offre = pd.read_csv('../Data/DonneesCreees/structure_offre_TV.csv')
df_conso = pd.read_csv('../Data/DonneesCreees/structure_conso_TV.csv')
df_format = pd.read_csv('../Data/DonneesCreees/age_format3_frequencies.csv')
df_news1 = pd.read_csv('../Data/DonneesCreees/Frequencies_of_NEWS1_Responses_by_Age_Group.csv')
df_info_mode = pd.read_csv('../Data/DonneesCreees/age_group_main_info_mode_frequencies.csv')
df_info = pd.read_csv('../Data/DonneesCreees/lesfrancaisetlinformationpropre.csv')
df_social = pd.read_csv('../Data/DonneesCreees/social_media_frequencies.csv')

# Debug prints pour vérifier le chargement des données
print("\nShapes of loaded data:")
print(f"df_offre shape: {df_offre.shape}")
print(f"df_conso shape: {df_conso.shape}")
print(f"df_format shape: {df_format.shape}")
print(f"df_news1 shape: {df_news1.shape}")
print(f"df_info_mode shape: {df_info_mode.shape}")
print(f"df_social shape: {df_social.shape}")

# Préparation des données pour le modèle de prédiction
def prepare_data():
    print("Preparing prediction data...")
    df = pd.read_csv('../Data/DonneesCreees/filtered_age_group_and_main_info_mode.csv')
    age_mapping = {
        '15-17 ans': 16,
        '18-24 ans': 21,
        '25-34 ans': 29.5,
        '35-44 ans': 39.5,
        '45-59 ans': 52,
        '60-69 ans': 64.5,
        '70 ans et +': 75
    }
    df['age_numeric'] = df['RS2C_RECODE_AG_R'].map(age_mapping)
    df = df.dropna(subset=['age_numeric', 'NEWS1BIS_R'])
    print(f"Prediction data shape: {df.shape}")
    return df

# Classe pour le modèle de prédiction
class InfoModePredictor:
    def __init__(self):
        self.le = LabelEncoder()
        self.model = LogisticRegression(multi_class='multinomial', max_iter=1000, random_state=42)

    def fit(self, df):
        print("Fitting prediction model...")
        y = self.le.fit_transform(df['NEWS1BIS_R'])
        X = df[['age_numeric']].values
        self.model.fit(X, y)
        print("Model fitting completed")

    def predict_proba(self, ages):
        return self.model.predict_proba(ages.reshape(-1, 1))

    def get_classes(self):
        return self.le.classes_

# Initialisation du modèle
print("\nInitializing model...")
df_pred = prepare_data()
predictor = InfoModePredictor()
predictor.fit(df_pred)

# Création de l'application Dash
app = dash.Dash(__name__, suppress_callback_exceptions=True)

# Variables globales
categories_tv = [col for col in df_offre.columns if col != 'Année']
cols_info = ['Radio_Podcast', 'TV', 'Internet_Site', 'Search_Engine',
             'News_Portal', 'Social_Media', 'Video_Platform']
labels_info = {
    'Radio_Podcast': 'Radio / Podcast',
    'TV': 'Télévision',
    'Internet_Site': 'Sites Internet',
    'Search_Engine': 'Moteur de recherche',
    'News_Portal': 'Portail d\'actualités',
    'Social_Media': 'Réseaux sociaux',
    'Video_Platform': 'Plateformes vidéo'
}

# Préparation des options pour les réseaux sociaux
print("\nPreparing dropdown options...")
age_options = [{'label': 'Toutes les tranches d\'âge', 'value': 'all'}] + [
    {'label': age, 'value': age} for age in sorted(df_social['RS2C_RECODE_AG_R'].unique())
]

format_options = [{'label': 'Tous les formats', 'value': 'all'}] + [
    {'label': format.replace('D\'un format ', ''), 'value': format}
    for format in sorted(df_social['FORMAT3_LR3_FORMAT3_3_R'].unique())
]
# Layout principal
app.layout = html.Div([
    html.H1('Dashboard Global des Médias en France',
            style={'textAlign': 'center', 'marginBottom': 30, 'marginTop': 20}),

    dcc.Tabs(id='tabs', value='tab-tv', children=[
        dcc.Tab(label='Télévision', value='tab-tv'),
        dcc.Tab(label='Formats Vidéo', value='tab-video'),
        dcc.Tab(label='Modes d\'Information', value='tab-info'),
        dcc.Tab(label='Prédictions', value='tab-pred'),
        dcc.Tab(label='Réseaux Sociaux', value='tab-social'),
    ]),

    html.Div(id='tabs-content')
])

# Layout pour l'onglet TV
def get_tv_layout():
    print("Rendering TV layout")  # Debug print
    return html.Div([
        html.H2('Analyse de l\'offre et de la consommation TV (1998-2023)',
                style={'textAlign': 'center', 'marginBottom': 20, 'marginTop': 20}),

        html.Div([
            html.Label('Sélectionner une catégorie:'),
            dcc.Dropdown(
                id='tv-category-selector',
                options=[{'label': cat, 'value': cat} for cat in categories_tv],
                value='films'
            )
        ], style={'width': '50%', 'margin': 'auto', 'marginBottom': 20}),

        html.Div([
            dcc.Graph(id='tv-evolution-graph'),
            dcc.Graph(id='tv-gaps-graph')
        ])
    ])

# Layout pour l'onglet Formats Vidéo
def get_video_layout():
    print("Rendering Video layout")  # Debug print
    return html.Div([
        html.H2('Préférences des Formats Vidéo',
                style={'textAlign': 'center', 'marginBottom': 20, 'marginTop': 20}),

        html.Div([
            html.Div([
                html.Label('Tranche d\'âge:'),
                dcc.Dropdown(
                    id='age-selector-video',
                    options=[{'label': age, 'value': age}
                            for age in df_format['RS2C_RECODE_AG_R'].unique()],
                    value=df_format['RS2C_RECODE_AG_R'].iloc[0]
                )
            ], style={'width': '30%', 'display': 'inline-block', 'marginRight': '20px'}),

            html.Div([
                html.Label('Type d\'affichage:'),
                dcc.RadioItems(
                    id='view-selector-video',
                    options=[
                        {'label': 'Valeurs absolues', 'value': 'absolute'},
                        {'label': 'Pourcentages', 'value': 'percentage'}
                    ],
                    value='absolute',
                    style={'marginTop': '10px'}
                )
            ], style={'width': '30%', 'display': 'inline-block'})
        ], style={'marginBottom': '20px'}),

        dcc.Graph(id='video-preferences-graph')
    ])

# Layout pour l'onglet Modes d'Information
def get_info_layout():
    print("Rendering Info layout")  # Debug print
    return html.Div([
        html.H2('Modes d\'Information par Tranche d\'Âge',
                style={'textAlign': 'center', 'marginBottom': 20, 'marginTop': 20}),

        html.Div([
            html.Div([
                dcc.Graph(id='info-bar-chart')
            ], style={'width': '48%', 'display': 'inline-block'}),

            html.Div([
                dcc.Graph(id='info-overview-chart')
            ], style={'width': '48%', 'display': 'inline-block'})
        ]),

        html.Div([
            html.Label('Sélectionner une tranche d\'âge:'),
            dcc.Dropdown(
                id='age-selector-info',
                options=[{'label': age, 'value': age}
                        for age in df_news1['Age_Group'].unique()],
                value=df_news1['Age_Group'].iloc[0]
            )
        ], style={'width': '30%', 'margin': 'auto', 'marginTop': '20px'})
    ])

# Layout pour l'onglet Prédictions
def get_prediction_layout():
    print("Rendering Prediction layout")  # Debug print
    return html.Div([
        html.H2('Distribution des modes d\'information selon l\'âge',
                style={'textAlign': 'center', 'marginBottom': 20, 'marginTop': 20}),

        dcc.Graph(id='prediction-graph'),

        html.Div([
            html.Label('Sélectionner un âge:'),
            dcc.Slider(
                id='age-slider',
                min=15,
                max=80,
                value=25,
                marks={i: str(i) for i in range(15, 81, 5)},
                step=1
            )
        ], style={'width': '80%', 'margin': '20px auto'})
    ])

# Layout pour l'onglet Réseaux Sociaux
def get_social_layout():
    print("Rendering Social layout")  # Debug print
    return html.Div([
        html.H2('Utilisation des réseaux sociaux par âge et format vidéo préféré',
                style={'textAlign': 'center', 'marginBottom': 20, 'marginTop': 20}),

        # Ajout des statistiques dans un encadré discret
        html.Div([
            html.Details([
                html.Summary('Statistiques globales', style={'cursor': 'pointer'}),
                html.Div([
                    html.P([
                        html.Strong("TikTok :"),
                        html.Br(),
                        "Nombre de valeurs non vides : 804",
                        html.Br(),
                        "Personnes ayant TikTok : 447",
                        html.Br(),
                        "Personnes n'ayant pas TikTok : 357"
                    ], style={'marginBottom': '10px'}),
                    html.P([
                        html.Strong("Instagram :"),
                        html.Br(),
                        "Nombre de valeurs non vides : 1501",
                        html.Br(),
                        "Personnes ayant Instagram : 758",
                        html.Br(),
                        "Personnes n'ayant pas Instagram : 743"
                    ])
                ], style={'padding': '10px', 'backgroundColor': '#f8f9fa', 'borderRadius': '5px'})
            ])
        ], style={'width': '300px', 'margin': '10px auto', 'textAlign': 'left'}),

        html.Div([
            html.Button(
                'Changer de réseau social',
                id='social-toggle',
                n_clicks=0,
                style={'margin': '10px', 'padding': '10px'}
            ),

            html.Div([
                html.Label('Tranche d\'âge:'),
                dcc.Dropdown(
                    id='age-select',
                    options=age_options,
                    value='all',
                    style={'width': '200px'}
                )
            ], style={'margin': '10px'}),

            html.Div([
                html.Label('Format vidéo:'),
                dcc.Dropdown(
                    id='format-select',
                    options=format_options,
                    value='all',
                    style={'width': '200px'}
                )
            ], style={'margin': '10px'})
        ], style={'display': 'flex', 'justifyContent': 'center', 'alignItems': 'center'}),

        dcc.Graph(id='usage-graph'),

        html.Div(
            "* Les pourcentages sont calculés pour chaque combinaison âge/format",
            style={'textAlign': 'center', 'padding': '10px', 'fontStyle': 'italic'}
        )
    ])
  # Callback pour gérer les onglets
@app.callback(
    Output('tabs-content', 'children'),
    [Input('tabs', 'value')]
)
def render_content(tab):
    print(f"Selected tab: {tab}")  # Debug print
    if tab == 'tab-tv':
        return get_tv_layout()
    elif tab == 'tab-video':
        return get_video_layout()
    elif tab == 'tab-info':
        return get_info_layout()
    elif tab == 'tab-pred':
        return get_prediction_layout()
    elif tab == 'tab-social':
        return get_social_layout()

# Callbacks pour l'onglet TV
@app.callback(
    Output('tv-evolution-graph', 'figure'),
    [Input('tv-category-selector', 'value')]
)
def update_tv_evolution_graph(selected_category):
    print(f"Updating TV evolution graph for category: {selected_category}")  # Debug print
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df_offre['Année'],
        y=df_offre[selected_category],
        name='Offre',
        line=dict(color='#2563eb', width=2)
    ))

    fig.add_trace(go.Scatter(
        x=df_conso['Année'],
        y=df_conso[selected_category],
        name='Consommation',
        line=dict(color='#16a34a', width=2)
    ))

    fig.update_layout(
        title=f'Évolution de {selected_category}',
        xaxis_title='Année',
        yaxis_title='Pourcentage',
        hovermode='x unified',
        plot_bgcolor='white',
        paper_bgcolor='white',
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        )
    )

    return fig

@app.callback(
    Output('tv-gaps-graph', 'figure'),
    [Input('tv-category-selector', 'value')]
)
def update_tv_gaps_graph(selected_category):
    print(f"Updating TV gaps graph for category: {selected_category}")  # Debug print
    gaps = []
    for cat in categories_tv:
        avg_gap = (df_offre[cat] - df_conso[cat]).mean()
        gaps.append({'category': cat, 'gap': avg_gap})

    df_gaps = pd.DataFrame(gaps)

    fig = px.bar(
        df_gaps,
        x='category',
        y='gap',
        title='Écarts moyens entre offre et consommation',
        labels={'category': 'Catégorie', 'gap': 'Écart moyen'}
    )

    fig.update_layout(
        xaxis_tickangle=-45,
        showlegend=False,
        hovermode='x unified',
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    fig.update_traces(
        marker_color=[
            '#6366f1' if cat == selected_category else '#94a3b8'
            for cat in df_gaps['category']
        ]
    )

    return fig

# Callbacks pour l'onglet Formats Vidéo
@app.callback(
    Output('video-preferences-graph', 'figure'),
    [Input('age-selector-video', 'value'),
     Input('view-selector-video', 'value')]
)
def update_video_preferences_graph(selected_age, view_type):
    print(f"Updating video preferences graph for age: {selected_age}, view type: {view_type}")  # Debug print
    if not selected_age:
        return go.Figure()

    filtered_df = df_format[df_format['RS2C_RECODE_AG_R'] == selected_age]

    if view_type == 'percentage':
        total = filtered_df['Frequency'].sum()
        filtered_df['Value'] = filtered_df['Frequency'] / total * 100
        y_title = 'Pourcentage (%)'
    else:
        filtered_df['Value'] = filtered_df['Frequency']
        y_title = 'Nombre de personnes'

    colors = {
        "D'un format très court (moins de 5 minutes)": '#ffd700',
        "D'un format court (Entre 5 et 20 minutes)": '#82ca9d',
        "D'un format long (Plus de 20 minutes)": '#8884d8'
    }

    fig = go.Figure()

    for format_type in filtered_df['FORMAT3_LR3_FORMAT3_3_R'].unique():
        value = filtered_df[filtered_df['FORMAT3_LR3_FORMAT3_3_R'] == format_type]['Value'].iloc[0]

        fig.add_trace(go.Bar(
            x=[format_type],
            y=[value],
            name=format_type,
            marker_color=colors[format_type],
            text=[f'{value:.1f}{"%" if view_type == "percentage" else ""}'],
            textposition='auto',
        ))

    fig.update_layout(
        title=f'Préférences pour la tranche d\'âge : {selected_age}',
        xaxis_title='Format de vidéo',
        yaxis_title=y_title,
        showlegend=True,
        plot_bgcolor='white',
        paper_bgcolor='white',
        barmode='group',
        height=600
    )

    return fig

# Callbacks pour l'onglet Modes d'Information
@app.callback(
    [Output('info-bar-chart', 'figure'),
     Output('info-overview-chart', 'figure')],
    [Input('age-selector-info', 'value')]
)
def update_info_graphs(selected_age):
    print(f"Updating info graphs for age: {selected_age}")  # Debug print

    # Premier graphique : répartition par support
    df_filtered = df_news1[df_news1['Age_Group'] == selected_age]

    fig1 = go.Figure()
    fig1.add_trace(go.Bar(
        x=df_filtered[cols_info].iloc[0],
        y=[labels_info[col] for col in cols_info],
        orientation='h',
        marker=dict(color='#2980b9')
    ))

    fig1.update_layout(
        title='Fréquence d\'utilisation par support',
        xaxis_title='Fréquence d\'utilisation quotidienne',
        yaxis=dict(autorange="reversed"),
        plot_bgcolor='white',
        paper_bgcolor='white',
        height=500
    )

    # Deuxième graphique : vue d'ensemble
    fig2 = go.Figure()

    for col in df_info_mode.columns[1:]:
        fig2.add_trace(go.Bar(
            name=col,
            x=df_info_mode['Age_Group'],
            y=df_info_mode[col]
        ))

    fig2.update_layout(
        title='Vue d\'ensemble des modes d\'information',
        barmode='group',
        xaxis_title='Tranche d\'âge',
        yaxis_title='Fréquence d\'utilisation quotidienne',
        plot_bgcolor='white',
        paper_bgcolor='white',
        height=500,
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=1.02,
            xanchor='right',
            x=1
        )
    )

    return fig1, fig2

# Callback pour l'onglet Prédictions
@app.callback(
    Output('prediction-graph', 'figure'),
    [Input('age-slider', 'value')]
)
def update_prediction_graph(selected_age):
    print(f"Updating prediction graph for age: {selected_age}")  # Debug print
    ages = np.linspace(15, 80, 100)
    probas = predictor.predict_proba(ages)

    fig = go.Figure()

    for i, mode in enumerate(predictor.get_classes()):
        fig.add_trace(go.Scatter(
            x=ages,
            y=probas[:, i],
            name=mode,
            line=dict(width=2),
            hovertemplate=f'{mode}: %{{y:.1%}}<extra></extra>'
        ))

    fig.add_vline(x=selected_age, line_dash="dash", line_color="gray")

    fig.update_layout(
        xaxis_title='Âge',
        yaxis_title='Probabilité',
        hovermode='x unified',
        plot_bgcolor='white',
        paper_bgcolor='white',
        yaxis_tickformat='.0%',
        height=700,
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=-0.5,
            xanchor='center',
            x=0.5
        ),
        margin=dict(b=150)
    )

    return fig

# Callback pour l'onglet Réseaux Sociaux
@app.callback(
    [Output('usage-graph', 'figure'),
     Output('social-toggle', 'children')],
    [Input('age-select', 'value'),
     Input('format-select', 'value'),
     Input('social-toggle', 'n_clicks')]
)
def update_social_graph(selected_age, selected_format, n_clicks):
    print(f"Updating social graph for age: {selected_age}, format: {selected_format}, clicks: {n_clicks}")  # Debug print

    # Déterminer quel réseau social afficher
    display_mode = 'instagram' if n_clicks % 2 == 0 else 'tiktok'
    button_text = 'Voir TikTok' if display_mode == 'instagram' else 'Voir Instagram'

    # Filtrer les données
    filtered_df = df_social.copy()
    if selected_age != 'all':
        filtered_df = filtered_df[filtered_df['RS2C_RECODE_AG_R'] == selected_age]
    if selected_format != 'all':
        filtered_df = filtered_df[filtered_df['FORMAT3_LR3_FORMAT3_3_R'] == selected_format]

    # Préparer les données pour le graphique
    social_column = 'RSINFO_2_LR_R_2' if display_mode == 'instagram' else 'RSINFO_2_LR_R_4'
    group_columns = ['RS2C_RECODE_AG_R', 'FORMAT3_LR3_FORMAT3_3_R']

    # Calculer les pourcentages d'utilisation
    def calculate_percentages(group):
        total = group['Frequency'].sum()
        uses = group[group[social_column] == 'Oui']['Frequency'].sum()
        not_uses = group[group[social_column] == 'Non']['Frequency'].sum()
        return pd.Series({
            'Utilise': (uses * 100),
            'N\'utilise pas': (not_uses * 100)
        })

    results = filtered_df.groupby(group_columns).apply(calculate_percentages).reset_index()

    # Créer le graphique
    fig = go.Figure()

    x_values = results['FORMAT3_LR3_FORMAT3_3_R'] if selected_age != 'all' else results['RS2C_RECODE_AG_R']
    x_values = [x.replace('D\'un format ', '') for x in x_values]

    fig.add_trace(go.Bar(
        x=x_values,
        y=results['Utilise'],
        name=f'Utilise {display_mode.capitalize()}',
        marker_color='#82ca9d'
    ))

    fig.add_trace(go.Bar(
        x=x_values,
        y=results['N\'utilise pas'],
        name=f'N\'utilise pas {display_mode.capitalize()}',
        marker_color='#ff8042'
    ))

    fig.update_layout(
        barmode='stack',
        title={
            'text': f'Utilisation de {display_mode.capitalize()} par ' +
                   ('format' if selected_age != 'all' else 'tranche d\'âge'),
            'x': 0.5,
            'xanchor': 'center'
        },
        xaxis_title='Format vidéo' if selected_age != 'all' else 'Tranche d\'âge',
        yaxis_title='Pourcentage (%)',
        yaxis_range=[0, 100],
        xaxis_tickangle=-45,
        showlegend=True,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=0.01
        ),
        margin=dict(b=100),
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    return fig, button_text

if __name__ == '__main__':
    print("\nStarting dashboard server...")
    app.run_server(debug=True, dev_tools_props_check=True)

Loading data files...


  df_info = pd.read_csv('../Data/DonneesCreees/lesfrancaisetlinformationpropre.csv')



Shapes of loaded data:
df_offre shape: (26, 11)
df_conso shape: (26, 11)
df_format shape: (21, 3)
df_news1 shape: (42, 10)
df_info_mode shape: (7, 9)
df_social shape: (71, 5)

Initializing model...
Preparing prediction data...
Prediction data shape: (3298, 3)
Fitting prediction model...
Model fitting completed

Preparing dropdown options...

Starting dashboard server...
