In [2]:
from pymongo import MongoClient
import gridfs
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt

# Initialisation
def init_fs(uri="mongodb://localhost:27017", db_name="affiches"):
    client = MongoClient(uri)
    db = client[db_name]
    return gridfs.GridFS(db)

# R√©cup√©ration des images
def get_all_images(n=15):
    fs = init_fs()
    files = list(fs.find().sort("uploadDate", -1).limit(n))  # les plus r√©cents d'abord
    images = []
    for f in files:
        data = f.read()
        img = Image.open(BytesIO(data))
        images.append((f.filename, img))
    return images

# Affichage
images = get_all_images(n=15)
cols = 5
rows = (len(images) + cols - 1) // cols
plt.figure(figsize=(15, 3 * rows))

for i, (filename, img) in enumerate(images):
    plt.subplot(rows, cols, i + 1)
    plt.imshow(img)
    plt.axis("off")
    plt.title(filename, fontsize=8)

plt.tight_layout()
plt.show()


<Figure size 1500x0 with 0 Axes>

In [19]:
import pandas as pd
import plotly.express as px

# Donn√©es
data = [
    ["2025-07-23T16:03:48.475172", "rbf", "Horreur|Animation", "15x10", 0.7375, 0.755, 500, 0.1, "rosenblatt", 1000, 0.05],
    ["2025-07-23T16:05:26.881932", "rbf", "Horreur|Animation", "15x10", 0.7819, 0.795, 500, 0.1, "rosenblatt", 5000, 0.05],
    ["2025-07-23T16:06:37.863566", "rbf", "Horreur|Animation", "15x10", 0.7362, 0.7625, 500, 0.1, "gradient-descent", 1000, 0.05],
    ["2025-07-23T16:08:46.235103", "rbf", "Horreur|Animation", "15x10", 0.7913, 0.8075, 500, 0.1, "gradient-descent", 5000, 0.05],
    ["2025-07-23T16:10:37.569219", "rbf", "Horreur|Animation|Action", "15x10", 0.5424, 0.5483, 500, 0.1, "rosenblatt", 1000, 0.05],
    ["2025-07-23T16:13:45.712492", "rbf", "Horreur|Animation|Action", "15x10", 0.5767, 0.5712, 500, 0.1, "rosenblatt", 5000, 0.05],
    ["2025-07-23T16:16:11.127309", "rbf", "Horreur|Animation|Action", "15x10", 0.5543, 0.5571, 500, 0.1, "gradient-descent", 1000, 0.05],
    ["2025-07-23T16:22:24.462194", "rbf", "Horreur|Animation|Action", "15x10", 0.5996, 0.5923, 500, 0.1, "gradient-descent", 5000, 0.05],
]

columns = [
    "timestamp", "model", "genres", "shape", "train_acc", "test_acc", "nb_centers",
    "std", "algo", "epochs", "lr"
]

df = pd.DataFrame(data, columns=columns)

# Plotly boxplot interactif
fig = px.box(
    df,
    x="genres",
    y="test_acc",
    color="algo",
    title="Comparaison des test accuracy selon l'algorithme et les genres (Plotly)",
    labels={"test_acc": "Test Accuracy", "genres": "Genres", "algo": "Algorithme"}
)

genres_uniques = df["genres"].unique()
x_positions = {genre: i for i, genre in enumerate(genres_uniques)}

fig.update_layout(
    yaxis=dict(range=[0, 1]),
    boxmode='group',
    legend_title="Algorithme",
    xaxis_title="Genres",
    yaxis_title="Test Accuracy",
    shapes=[
        # Ligne √† 0.5 pour Horreur|Animation (2 genres)
        dict(
            type="line",
            x0=x_positions["Horreur|Animation"] - 0.4,
            x1=x_positions["Horreur|Animation"] + 0.4,
            y0=0.5,
            y1=0.5,
            line=dict(color="green", width=2, dash="dash")
        ),
        # Ligne √† 0.33 pour Horreur|Animation|Action (3 genres)
        dict(
            type="line",
            x0=x_positions["Horreur|Animation|Action"] - 0.4,
            x1=x_positions["Horreur|Animation|Action"] + 0.4,
            y0=0.33,
            y1=0.33,
            line=dict(color="blue", width=2, dash="dash")
        )
    ]
)


fig.show()

In [26]:
import pandas as pd
import plotly.graph_objects as go

df = pd.read_csv('../results/logs/rbf3.csv', parse_dates=['timestamp'])

fig = go.Figure()

# Positions x num√©riques pour bien coller les boxplots

# Temps - Rosenblatt
fig.add_trace(go.Box(
    y=df.loc[df['algo'] == 'rosenblatt', 'train_duration_seconds'],
    x=[0]*len(df.loc[df['algo'] == 'rosenblatt']),
    name='Rosenblatt',
    jitter=0.5,
    pointpos=0
))

# Temps - Gradient Descent
fig.add_trace(go.Box(
    y=df.loc[df['algo'] == 'gradient-descent', 'train_duration_seconds'],
    x=[0.4]*len(df.loc[df['algo'] == 'gradient-descent']),
    name='Gradient Descent',
    jitter=0.5,
    pointpos=0
))

fig.update_layout(
    title="Comparaison du temps d'entra√Ænement : Rosenblatt vs Gradient Descent",
    xaxis=dict(
        tickvals=[0.2],
        ticktext=['Temps d\'entra√Ænement (s)'],
        zeroline=False,
        showgrid=False,
    ),
    yaxis=dict(
        title='Dur√©e (secondes)',
        zeroline=False,
        showgrid=True,
    ),
    boxmode='overlay',
    legend_title_text='Algorithme',
)

fig.show()

In [11]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
from datetime import datetime

df = pd.read_csv("../tests-projets/results/logs/rbf.csv")
print(df.head(5))
# Convertir timestamp en datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Ajouter une colonne pour l'√©cart entre train et test
df['accuracy_gap'] = df['train_accuracy'] - df['test_accuracy']

# Trouver le meilleur mod√®le
best_model = df.loc[df['test_accuracy'].idxmax()]

print("üèÜ MEILLEUR MOD√àLE:")
print(f"Test Accuracy: {best_model['test_accuracy']:.4f} ({best_model['test_accuracy']*100:.1f}%)")
print(f"Train Accuracy: {best_model['train_accuracy']:.4f} ({best_model['train_accuracy']*100:.1f}%)")
print(f"Learning Rate: {best_model['learning_rate']}")
print(f"Epochs: {best_model['epochs']}")
print(f"√âcart Train-Test: {best_model['accuracy_gap']:.4f}")
print("-" * 50)

# 1. Graphique de comparaison Train vs Test Accuracy
fig1 = go.Figure()

fig1.add_trace(go.Scatter(
    x=list(range(len(df))),
    y=df['train_accuracy'],
    mode='lines+markers',
    name='Train Accuracy',
    line=dict(color='#ff7f0e', width=3),
    marker=dict(size=8)
))

fig1.add_trace(go.Scatter(
    x=list(range(len(df))),
    y=df['test_accuracy'],
    mode='lines+markers',
    name='Test Accuracy',
    line=dict(color='#1f77b4', width=3),
    marker=dict(size=8)
))

# Marquer le meilleur mod√®le
best_idx = df['test_accuracy'].idxmax()
fig1.add_trace(go.Scatter(
    x=[best_idx],
    y=[best_model['test_accuracy']],
    mode='markers',
    name='Meilleur mod√®le',
    marker=dict(size=15, color='red', symbol='star')
))

fig1.update_layout(
    title='üìä Comparaison Train vs Test Accuracy',
    xaxis_title='Exp√©riences',
    yaxis_title='Accuracy',
    hovermode='x unified',
    template='plotly_white',
    height=500
)

fig1.show()

# 2. Impact du Learning Rate
fig2 = px.box(df, x='learning_rate', y='test_accuracy', 
              title='üìà Impact du Learning Rate sur Test Accuracy',
              color='learning_rate',
              color_discrete_sequence=px.colors.qualitative.Set3)

fig2.add_trace(go.Scatter(
    x=df['learning_rate'],
    y=df['test_accuracy'],
    mode='markers',
    marker=dict(size=10, color='red', opacity=0.7),
    name='Donn√©es individuelles'
))

fig2.update_layout(template='plotly_white', height=500)
fig2.show()

# 3. Impact des √©poques avec learning rate
fig3 = px.scatter(df, x='epochs', y='test_accuracy', 
                  color='learning_rate', size='train_accuracy',
                  title='‚è±Ô∏è Impact √âpoques vs Learning Rate sur Test Accuracy',
                  color_continuous_scale='viridis',
                  size_max=20)

fig3.update_layout(template='plotly_white', height=500)
fig3.show()

# 4. Heatmap des hyperparam√®tres
pivot_table = df.pivot_table(values='test_accuracy', 
                           index='epochs', 
                           columns='learning_rate', 
                           aggfunc='mean')

fig4 = go.Figure(data=go.Heatmap(
    z=pivot_table.values,
    x=pivot_table.columns,
    y=pivot_table.index,
    colorscale='RdYlBu_r',
    text=np.round(pivot_table.values, 4),
    texttemplate="%{text}",
    textfont={"size": 12}
))

fig4.update_layout(
    title='üî• Heatmap: Test Accuracy par √âpoques et Learning Rate',
    xaxis_title='Learning Rate',
    yaxis_title='√âpoques',
    template='plotly_white',
    height=500
)

fig4.show()

# 5. √âvolution temporelle
fig5 = make_subplots(
    rows=2, cols=1,
    subplot_titles=('Accuracy dans le temps', '√âcart Train-Test dans le temps'),
    vertical_spacing=0.1
)

fig5.add_trace(
    go.Scatter(x=df['timestamp'], y=df['train_accuracy'], 
               name='Train Accuracy', line=dict(color='orange')),
    row=1, col=1
)

fig5.add_trace(
    go.Scatter(x=df['timestamp'], y=df['test_accuracy'], 
               name='Test Accuracy', line=dict(color='blue')),
    row=1, col=1
)

fig5.add_trace(
    go.Scatter(x=df['timestamp'], y=df['accuracy_gap'], 
               name='√âcart Train-Test', line=dict(color='red'),
               fill='tonexty'),
    row=2, col=1
)

fig5.update_layout(
    title_text='üèÜ √âvolution temporelle des performances',
    template='plotly_white',
    height=700
)

fig5.show()

# 6. Analyse statistique d√©taill√©e
print("\nüìä ANALYSE STATISTIQUE D√âTAILL√âE:")
print("\nPerformances par Learning Rate:")
lr_stats = df.groupby('learning_rate').agg({
    'test_accuracy': ['mean', 'std', 'max'],
    'train_accuracy': ['mean', 'std', 'max'],
    'accuracy_gap': ['mean', 'std']
}).round(4)

print(lr_stats)

print("\nPerformances par nombre d'√©poques:")
epochs_stats = df.groupby('epochs').agg({
    'test_accuracy': ['mean', 'std', 'max'],
    'train_accuracy': ['mean', 'std', 'max'],
    'accuracy_gap': ['mean', 'std']
}).round(4)

print(epochs_stats)

# 7. Graphique de performance globale
fig6 = go.Figure()

# Cr√©er des √©tiquettes personnalis√©es
labels = [f"LR:{lr}, E:{ep}" for lr, ep in zip(df['learning_rate'], df['epochs'])]

fig6.add_trace(go.Scatter(
    x=df['train_accuracy'],
    y=df['test_accuracy'],
    mode='markers+text',
    text=labels,
    textposition='top center',
    marker=dict(
        size=15,
        color=df['accuracy_gap'],
        colorscale='RdYlBu',
        colorbar=dict(title="√âcart Train-Test"),
        line=dict(width=2, color='black')
    ),
    name='Mod√®les'
))

# Ligne de r√©f√©rence (performance parfaite)
max_acc = max(df['train_accuracy'].max(), df['test_accuracy'].max())
min_acc = min(df['train_accuracy'].min(), df['test_accuracy'].min())

fig6.add_trace(go.Scatter(
    x=[min_acc, max_acc],
    y=[min_acc, max_acc],
    mode='lines',
    line=dict(dash='dash', color='gray'),
    name='Ligne parfaite (Train=Test)'
))

fig6.update_layout(
    title='üéØ Performance Train vs Test (couleur = √©cart)',
    xaxis_title='Train Accuracy',
    yaxis_title='Test Accuracy',
    template='plotly_white',
    height=600
)

fig6.show()

# Recommandations
print("\nüéØ RECOMMANDATIONS:")
print(f"‚Ä¢ Meilleur learning rate: {best_model['learning_rate']} (Test Accuracy: {best_model['test_accuracy']:.4f})")
print(f"‚Ä¢ Meilleur nombre d'√©poques: {best_model['epochs']}")
print(f"‚Ä¢ L'√©cart Train-Test est de {best_model['accuracy_gap']:.4f}, indiquant un l√©ger overfitting")

if best_model['accuracy_gap'] > 0.01:
    print("‚Ä¢ Consid√©rer la r√©gularisation pour r√©duire l'overfitting")
else:
    print("‚Ä¢ Le mod√®le g√©n√©ralise bien")

# Learning rate optimal
lr_performance = df.groupby('learning_rate')['test_accuracy'].mean().sort_values(ascending=False)
print(f"‚Ä¢ Ordre des learning rates par performance: {list(lr_performance.index)}")

# √âpoques optimales
epochs_performance = df.groupby('epochs')['test_accuracy'].mean().sort_values(ascending=False)
print(f"‚Ä¢ Plus d'√©poques semble am√©liorer les performances: {list(epochs_performance.index)}")

                    timestamp model         categories   size  train_accuracy  \
0  2025-07-22T11:22:56.499974   rbf  Horreur|Animation  15x10          0.6269   
1  2025-07-22T11:22:58.970819   rbf  Horreur|Animation  15x10          0.7456   
2  2025-07-22T11:23:01.400210   rbf  Horreur|Animation  15x10          0.7488   
3  2025-07-22T11:23:03.797447   rbf  Horreur|Animation  15x10          0.7469   
4  2025-07-22T11:23:13.458299   rbf  Horreur|Animation  15x10          0.7506   

   test_accuracy   C  gamma        algo  epochs  learning_rate  
0         0.6600  10   0.01  rosenblatt    1000          0.001  
1         0.7675  10   0.01  rosenblatt    1000          0.010  
2         0.7750  10   0.01  rosenblatt    1000          0.050  
3         0.7625  10   0.01  rosenblatt    1000          0.100  
4         0.7700  10   0.01  rosenblatt    5000          0.001  
üèÜ MEILLEUR MOD√àLE:
Test Accuracy: 0.7900 (79.0%)
Train Accuracy: 0.7981 (79.8%)
Learning Rate: 0.05
Epochs: 5000
√âcart


üìä ANALYSE STATISTIQUE D√âTAILL√âE:

Performances par Learning Rate:
              test_accuracy                 train_accuracy                  \
                       mean     std     max           mean     std     max   
learning_rate                                                                
0.001                0.6080  0.1124  0.7700         0.5988  0.1076  0.7519   
0.010                0.6641  0.1319  0.7750         0.6634  0.1157  0.7744   
0.050                0.6671  0.1358  0.7900         0.6668  0.1211  0.7981   
0.100                0.6728  0.1311  0.7875         0.6718  0.1227  0.8100   

              accuracy_gap          
                      mean     std  
learning_rate                       
0.001              -0.0092  0.0317  
0.010              -0.0007  0.0270  
0.050              -0.0002  0.0265  
0.100              -0.0010  0.0260  

Performances par nombre d'√©poques:
       test_accuracy                 train_accuracy                  \
              


üéØ RECOMMANDATIONS:
‚Ä¢ Meilleur learning rate: 0.05 (Test Accuracy: 0.7900)
‚Ä¢ Meilleur nombre d'√©poques: 5000
‚Ä¢ L'√©cart Train-Test est de 0.0081, indiquant un l√©ger overfitting
‚Ä¢ Le mod√®le g√©n√©ralise bien
‚Ä¢ Ordre des learning rates par performance: [0.1, 0.05, 0.01, 0.001]
‚Ä¢ Plus d'√©poques semble am√©liorer les performances: [5000, 1000]


In [16]:
fig = go.Figure()

# Courbe principale de l'accuracy gap
fig.add_trace(go.Scatter(
    x=list(range(len(df))),
    y=df['accuracy_gap'],
    mode='lines+markers',
    name='Accuracy Gap',
    line=dict(color='#e74c3c', width=4),
    marker=dict(size=10, color='#c0392b', line=dict(width=2, color='white')),
    hovertemplate='<b>Exp√©rience %{x}</b><br>' +
                  'Accuracy Gap: %{y:.4f}<br>' +
                  'Learning Rate: %{customdata[0]}<br>' +
                  '√âpoques: %{customdata[1]}<br>' +
                  '<extra></extra>',
    customdata=list(zip(df['learning_rate'], df['epochs']))
))

# Ligne de r√©f√©rence √† 0 (pas d'overfitting)
fig.add_hline(y=0, line_dash="dash", line_color="green", line_width=2,
              annotation_text="Gap = 0 (g√©n√©ralisation parfaite)", 
              annotation_position="top left")

# Ligne de la moyenne
mean_gap = df['accuracy_gap'].mean()
fig.add_hline(y=mean_gap, line_dash="dot", line_color="orange", line_width=2,
              annotation_text=f"Moyenne = {mean_gap:.4f}", 
              annotation_position="bottom left")

# Colorer le fond selon les zones d'overfitting
fig.add_hrect(y0=-0.1, y1=0, fillcolor="lightgreen", opacity=0.2, 
              annotation_text="Zone de sous-apprentissage", annotation_position="inside bottom")
fig.add_hrect(y0=0, y1=0.02, fillcolor="lightblue", opacity=0.2,
              annotation_text="Zone optimale", annotation_position="inside top")
fig.add_hrect(y0=0.02, y1=0.1, fillcolor="lightyellow", opacity=0.2,
              annotation_text="Overfitting l√©ger", annotation_position="inside top")

# Annoter les points extr√™mes
min_gap_idx = df['accuracy_gap'].idxmin()
max_gap_idx = df['accuracy_gap'].idxmax()

fig.add_annotation(
    x=min_gap_idx, y=df.loc[min_gap_idx, 'accuracy_gap'],
    text=f"Minimum<br>LR: {df.loc[min_gap_idx, 'learning_rate']}<br>E: {df.loc[min_gap_idx, 'epochs']}",
    showarrow=True, arrowhead=2, arrowcolor="green", arrowwidth=2,
    bgcolor="lightgreen", bordercolor="green", borderwidth=2
)

fig.add_annotation(
    x=max_gap_idx, y=df.loc[max_gap_idx, 'accuracy_gap'],
    text=f"Maximum<br>LR: {df.loc[max_gap_idx, 'learning_rate']}<br>E: {df.loc[max_gap_idx, 'epochs']}",
    showarrow=True, arrowhead=2, arrowcolor="red", arrowwidth=2,
    bgcolor="lightcoral", bordercolor="red", borderwidth=2
)

# Mise en forme
fig.update_layout(
    title={
        'text': 'Courbe de l\'Accuracy Gap (Overfitting / Underfitting)',
        'x': 0.5,
        'font': {'size': 20, 'color': '#2c3e50'}
    },
    xaxis_title='Exp√©riences',
    yaxis_title='Accuracy Gap (Train - Test)',
    template='plotly_white',
    height=600,
    width=1000,
    hovermode='x unified',
    font=dict(size=12),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

# # Ajouter des √©tiquettes sur l'axe X
# fig.update_xaxis(
#     tickmode='array',
#     tickvals=list(range(len(df))),
#     ticktext=[f"Exp {i+1}" for i in range(len(df))],
#     tickangle=45
# )

fig.show()

In [21]:
import pandas as pd
import plotly.express as px

# Donn√©es

columns = [
    "timestamp", "model", "genres", "shape", "train_acc", "test_acc", "nb_centers",
    "std", "algo", "epochs", "lr"
]

df = pd.read_csv("../results/logs/rbf4.csv")
df["num_genres"] = df["categories"].str.count(r"\|") + 1
df["Group"] = df["num_genres"].apply(lambda x: "2 genres" if x == 2 else "10 genres")

# Cr√©ation du boxplot de base
fig = px.box(
    df,
    x="Group",
    y="test_accuracy",
    title="Comparaison de la test_accuracy : 2 genres vs 10 genres",
    labels={"test_accuracy": "Test Accuracy", "Group": "Nombre de genres"},
    color="Group"
)

# Ajout des lignes horizontales conditionnelles
fig.add_shape(
    type="line",
    x0=-0.4, x1=0.4, y0=0.5, y1=0.5,
    xref="x", yref="y",
    line=dict(color="black", dash="dash"),
)
fig.add_annotation(x="2 genres", y=0.5, text="R√©f√©rence = 0.5", showarrow=False, yshift=10)

fig.add_shape(
    type="line",
    x0=0.6, x1=1.4, y0=0.1, y1=0.1,
    xref="x", yref="y",
    line=dict(color="black", dash="dash"),
)
fig.add_annotation(x="10 genres", y=0.1, text="R√©f√©rence = 0.1", showarrow=False, yshift=10)
fig.update_yaxes(range=[0, 1])
fig.show()

In [5]:
import pandas as pd

df = pd.read_csv("../tests-projets/results/logs/linear.csv")

best_row = df.loc[df['test_accuracy'].idxmax()]
print(best_row)

timestamp         2025-07-22T02:22:38.623817
model                                 linear
categories                 Horreur|Animation
size                                   15x10
train_accuracy                      0.770939
test_accuracy                       0.746835
epochs                                  5000
learning_rate                           0.05
algo                              rosenblatt
Name: 10, dtype: object


In [6]:
import pandas as pd

df = pd.read_csv("../tests-projets/results/logs/linear2.csv")

best_row = df.loc[df['test_accuracy'].idxmax()]
print(best_row)

timestamp         2025-07-22T02:31:20.624175
model                                 linear
categories          Horreur|Animation|Action
size                                   15x10
train_accuracy                      0.650314
test_accuracy                       0.586265
epochs                                  5000
learning_rate                           0.05
algo                        gradient-descent
Name: 11, dtype: object


In [3]:
import pandas as pd
import plotly.graph_objects as go

df = pd.read_csv("../results/logs/rbf_various_sizes.csv")

comms = [
    "Taille d'image avec la meilleure test_accuracy, et est sup√©rieur au train_acc, ce qui montre qu'il n'y a pas d'overfitting",
    "test_accuracy √©galement le plus √©lev√©, mais inf√©rieur au train_acc, donc on commence √† avoir un l√©ger overfitting",
    "Ici, on observe un underfitting, car on monte le nombre de param√®tres, sans monter le nombres d'exemples, le train_acc chute",
    "Encore un l√©ger overfitting, avec une perte de qualit√© sur les images, qui risque d'entrainer des biais pour les entrainements"
]

df["comm"] = comms

fig = go.Figure(data=[go.Table(
    header=dict(
        values=["size", "train_accuracy", "test_accuracy", "Commentaire"],
        fill_color='lightgray',
        align='center',
        font=dict(color='black', size=12),
        line_color='darkslategray'
    ),
    cells=dict(
        values=[df[col] for col in ["size", "train_accuracy", "test_accuracy", "comm"]],
        fill_color='white',
        align='center',
        line_color='lightgray'
    )
)])

fig.update_layout(
    title="R√©sultats d'entra√Ænement du mod√®le RBF",
    width=1400,
    height=500
)

fig.show()