In [None]:
# üîó Test de connexion PostgreSQL
try:
    import sqlalchemy as sa
    engine = sa.create_engine('postgresql://dev:devpass@localhost:5432/muscle_analytics')
    result = engine.execute(sa.text('SELECT COUNT(*) FROM sets')).fetchone()
    print(f"‚úÖ PostgreSQL accessible - {result[0]} sets trouv√©s")
    print("üí° Ce notebook peut utiliser les donn√©es PostgreSQL")
    use_postgresql = True
except Exception as e:
    print(f"‚ö†Ô∏è PostgreSQL non accessible: {e}")
    print("üìÑ Ce notebook utilisera les donn√©es CSV")
    use_postgresql = False

# üèãÔ∏è Analyse EDA - Muscles et Exercices

Ce notebook se concentre sur l'analyse des exercices, des groupes musculaires et de leur r√©partition.

## Objectifs
- Analyser la r√©partition par r√©gion musculaire
- Cr√©er un mapping exercices ‚Üî muscles complet
- Identifier les d√©s√©quilibres potentiels
- Analyser la fr√©quence d'entra√Ænement par groupe musculaire

## üîß Imports et configuration

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Configuration des graphiques
plt.style.use('seaborn-v0_8')
sns.set_palette("Set2")
plt.rcParams['figure.figsize'] = (12, 8)

print("üîß Librairies charg√©es avec succ√®s")

## üìÅ Chargement et pr√©paration des donn√©es

In [None]:
# Chargement des donn√©es
df_raw = pd.read_csv('../examples/sample_data.csv')

# Nettoyage et pr√©paration (copie du notebook pr√©c√©dent)
df = df_raw.copy()
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')
df['Poids_kg'] = df['Poids / Distance'].str.replace(' kg', '').str.replace(',', '.').astype(float)
df['Reps'] = df['R√©p√©titions / Temps'].str.extract(r'(\d+)').astype(float)
df['Volume'] = df['Poids_kg'] * df['Reps']
df['Type_serie'] = df['S√©rie / S√©rie d\'√©chauffement / S√©rie de r√©cup√©ration']
df['Sautee'] = df['Saut√©e'].map({'Oui': True, 'Non': False})

print(f"üìä Donn√©es charg√©es: {len(df)} sets sur {df['Date'].nunique()} jours")
print(f"üèãÔ∏è Exercices uniques: {df['Exercice'].nunique()}")
print(f"üí™ R√©gions musculaires: {df['R√©gion'].nunique()}")

## üéØ Analyse par r√©gion musculaire

In [None]:
# Analyse globale par r√©gion
print("üí™ ANALYSE PAR R√âGION MUSCULAIRE")
print("=" * 50)

region_stats = df.groupby('R√©gion').agg({
    'Date': 'count',  # Nombre de sets
    'Volume': ['sum', 'mean'],
    'Poids_kg': ['mean', 'max'],
    'Reps': 'mean',
    'Exercice': 'nunique'  # Nombre d'exercices diff√©rents
}).round(2)

region_stats.columns = ['Nb_Sets', 'Volume_Total', 'Volume_Moyen', 'Poids_Moyen', 'Poids_Max', 'Reps_Moyen', 'Nb_Exercices']
region_stats = region_stats.sort_values('Volume_Total', ascending=False)

print(region_stats)

# Calcul des pourcentages
region_stats['Pct_Volume'] = (region_stats['Volume_Total'] / region_stats['Volume_Total'].sum() * 100).round(1)
region_stats['Pct_Sets'] = (region_stats['Nb_Sets'] / region_stats['Nb_Sets'].sum() * 100).round(1)

print("\nüìä R√âPARTITION EN POURCENTAGES:")
print(region_stats[['Pct_Volume', 'Pct_Sets']])

In [None]:
# Visualisation de la r√©partition par r√©gion
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('üí™ Analyse par R√©gion Musculaire', fontsize=16, fontweight='bold')

# 1. Volume par r√©gion (pie chart)
axes[0,0].pie(region_stats['Volume_Total'], labels=region_stats.index, autopct='%1.1f%%', 
              startangle=90, colors=sns.color_palette("Set2", len(region_stats)))
axes[0,0].set_title('R√©partition du Volume Total')

# 2. Nombre de sets par r√©gion (bar chart)
region_stats['Nb_Sets'].plot(kind='bar', ax=axes[0,1], color='steelblue', alpha=0.8)
axes[0,1].set_title('Nombre de Sets par R√©gion')
axes[0,1].set_ylabel('Nombre de Sets')
axes[0,1].tick_params(axis='x', rotation=45)

# 3. Poids moyen par r√©gion
region_stats['Poids_Moyen'].plot(kind='bar', ax=axes[1,0], color='darkgreen', alpha=0.8)
axes[1,0].set_title('Poids Moyen par R√©gion')
axes[1,0].set_ylabel('Poids (kg)')
axes[1,0].tick_params(axis='x', rotation=45)

# 4. Diversit√© d'exercices par r√©gion
region_stats['Nb_Exercices'].plot(kind='bar', ax=axes[1,1], color='purple', alpha=0.8)
axes[1,1].set_title('Nombre d\'Exercices par R√©gion')
axes[1,1].set_ylabel('Nombre d\'Exercices Uniques')
axes[1,1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## üó∫Ô∏è Mapping exercices ‚Üî muscles

In [None]:
# Cr√©ation du mapping exercices ‚Üí muscles
print("üó∫Ô∏è MAPPING EXERCICES ‚Üî MUSCLES")
print("=" * 50)

# Extraction unique des combinaisons exercice/muscles
exercise_mapping = df[['Exercice', 'R√©gion', 'Groupes musculaires (Primaires)', 'Groupes musculaires (Secondaires)']].drop_duplicates()

print("üìã CATALOGUE D'EXERCICES:")
for _, row in exercise_mapping.iterrows():
    print(f"\nüèãÔ∏è {row['Exercice']}")
    print(f"   üìç R√©gion: {row['R√©gion']}")
    print(f"   üéØ Muscles primaires: {row['Groupes musculaires (Primaires)']}")
    print(f"   üéØ Muscles secondaires: {row['Groupes musculaires (Secondaires)']}")

# Cr√©ation d'un dictionnaire pour faciliter l'usage
exercise_dict = {}
for _, row in exercise_mapping.iterrows():
    exercise_dict[row['Exercice']] = {
        'region': row['R√©gion'],
        'primary': row['Groupes musculaires (Primaires)'],
        'secondary': row['Groupes musculaires (Secondaires)']
    }

print(f"\n‚úÖ Mapping cr√©√© pour {len(exercise_dict)} exercices")

In [None]:
# Analyse d√©taill√©e des muscles primaires
print("üéØ ANALYSE DES MUSCLES PRIMAIRES")
print("=" * 50)

# Extraction et comptage des muscles primaires
all_primary_muscles = []
for _, row in df.iterrows():
    muscles = [m.strip() for m in row['Groupes musculaires (Primaires)'].split(',')]
    all_primary_muscles.extend(muscles)

primary_muscle_counts = Counter(all_primary_muscles)
primary_df = pd.DataFrame(primary_muscle_counts.items(), columns=['Muscle', 'Frequence'])
primary_df = primary_df.sort_values('Frequence', ascending=False)

print("üìä Fr√©quence des muscles primaires sollicit√©s:")
print(primary_df)

# Visualisation
plt.figure(figsize=(12, 6))
plt.bar(primary_df['Muscle'], primary_df['Frequence'], color='darkblue', alpha=0.7)
plt.title('üéØ Fr√©quence de Sollicitation des Muscles Primaires', fontsize=14, fontweight='bold')
plt.xlabel('Muscle')
plt.ylabel('Nombre de Sets')
plt.xticks(rotation=45)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Analyse des muscles secondaires
print("üéØ ANALYSE DES MUSCLES SECONDAIRES")
print("=" * 50)

# Extraction et comptage des muscles secondaires
all_secondary_muscles = []
for _, row in df.iterrows():
    if pd.notna(row['Groupes musculaires (Secondaires)']):
        muscles = [m.strip() for m in row['Groupes musculaires (Secondaires)'].split(',')]
        all_secondary_muscles.extend(muscles)

secondary_muscle_counts = Counter(all_secondary_muscles)
secondary_df = pd.DataFrame(secondary_muscle_counts.items(), columns=['Muscle', 'Frequence'])
secondary_df = secondary_df.sort_values('Frequence', ascending=False)

print("üìä Fr√©quence des muscles secondaires sollicit√©s:")
print(secondary_df)

# Visualisation
plt.figure(figsize=(10, 6))
plt.bar(secondary_df['Muscle'], secondary_df['Frequence'], color='orange', alpha=0.7)
plt.title('üéØ Fr√©quence de Sollicitation des Muscles Secondaires', fontsize=14, fontweight='bold')
plt.xlabel('Muscle')
plt.ylabel('Nombre de Sets')
plt.xticks(rotation=45)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## ‚öñÔ∏è Analyse des √©quilibres musculaires

In [None]:
# Analyse de l'√©quilibre musculaire
print("‚öñÔ∏è ANALYSE DES √âQUILIBRES MUSCULAIRES")
print("=" * 60)

# Calcul du volume par muscle primaire
muscle_volume = {}
for _, row in df.iterrows():
    muscles = [m.strip() for m in row['Groupes musculaires (Primaires)'].split(',')]
    volume_per_muscle = row['Volume'] / len(muscles)  # R√©partition √©quitable
    
    for muscle in muscles:
        if muscle not in muscle_volume:
            muscle_volume[muscle] = 0
        muscle_volume[muscle] += volume_per_muscle

muscle_volume_df = pd.DataFrame(muscle_volume.items(), columns=['Muscle', 'Volume_Total'])
muscle_volume_df = muscle_volume_df.sort_values('Volume_Total', ascending=False)
muscle_volume_df['Pourcentage'] = (muscle_volume_df['Volume_Total'] / muscle_volume_df['Volume_Total'].sum() * 100).round(1)

print("üìä Volume par muscle primaire:")
print(muscle_volume_df)

# D√©tection des d√©s√©quilibres
print("\n‚ö†Ô∏è D√âTECTION DES D√âS√âQUILIBRES:")
mean_volume = muscle_volume_df['Volume_Total'].mean()
std_volume = muscle_volume_df['Volume_Total'].std()

under_developed = muscle_volume_df[muscle_volume_df['Volume_Total'] < mean_volume - std_volume]
over_developed = muscle_volume_df[muscle_volume_df['Volume_Total'] > mean_volume + std_volume]

if len(under_developed) > 0:
    print(f"üîª Muscles sous-d√©velopp√©s (< {mean_volume - std_volume:.0f}kg):")
    for _, muscle in under_developed.iterrows():
        print(f"   ‚Ä¢ {muscle['Muscle']}: {muscle['Volume_Total']:.0f}kg ({muscle['Pourcentage']:.1f}%)")

if len(over_developed) > 0:
    print(f"\nüî∫ Muscles sur-d√©velopp√©s (> {mean_volume + std_volume:.0f}kg):")
    for _, muscle in over_developed.iterrows():
        print(f"   ‚Ä¢ {muscle['Muscle']}: {muscle['Volume_Total']:.0f}kg ({muscle['Pourcentage']:.1f}%)")

if len(under_developed) == 0 and len(over_developed) == 0:
    print("‚úÖ D√©veloppement relativement √©quilibr√© d√©tect√©")

In [None]:
# Visualisation de l'√©quilibre musculaire
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# 1. Volume par muscle (bar chart)
muscle_volume_df.plot(x='Muscle', y='Volume_Total', kind='bar', ax=axes[0], color='teal', alpha=0.8)
axes[0].set_title('üìä Volume Total par Muscle Primaire', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Volume Total (kg)')
axes[0].tick_params(axis='x', rotation=45)
axes[0].axhline(mean_volume, color='red', linestyle='--', alpha=0.7, label=f'Moyenne: {mean_volume:.0f}kg')
axes[0].legend()

# 2. R√©partition en pourcentage (pie chart)
axes[1].pie(muscle_volume_df['Volume_Total'], labels=muscle_volume_df['Muscle'], 
           autopct='%1.1f%%', startangle=90)
axes[1].set_title('ü•ß R√©partition du Volume par Muscle', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

## üìÖ Fr√©quence d'entra√Ænement par groupe musculaire

In [None]:
# Analyse de la fr√©quence d'entra√Ænement
print("üìÖ FR√âQUENCE D'ENTRA√éNEMENT PAR GROUPE MUSCULAIRE")
print("=" * 60)

# Calcul de la fr√©quence par r√©gion par jour
daily_region_training = df.groupby(['Date', 'R√©gion']).size().reset_index(name='Sets')
region_frequency = daily_region_training.groupby('R√©gion').agg({
    'Date': 'nunique',  # Nombre de jours d'entra√Ænement
    'Sets': 'sum'       # Total des sets
}).reset_index()

region_frequency.columns = ['R√©gion', 'Jours_Entrainement', 'Total_Sets']
total_training_days = df['Date'].nunique()
region_frequency['Frequence_Pct'] = (region_frequency['Jours_Entrainement'] / total_training_days * 100).round(1)
region_frequency['Sets_par_Jour'] = (region_frequency['Total_Sets'] / region_frequency['Jours_Entrainement']).round(1)

region_frequency = region_frequency.sort_values('Frequence_Pct', ascending=False)

print(f"Sur {total_training_days} jours d'entra√Ænement total:")
print(region_frequency)

# Visualisation de la fr√©quence
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# 1. Fr√©quence en pourcentage
axes[0].bar(region_frequency['R√©gion'], region_frequency['Frequence_Pct'], 
           color='skyblue', alpha=0.8)
axes[0].set_title('üìÖ Fr√©quence d\'Entra√Ænement par R√©gion (%)', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Pourcentage de jours')
axes[0].tick_params(axis='x', rotation=45)
axes[0].grid(axis='y', alpha=0.3)

# 2. Sets par jour d'entra√Ænement
axes[1].bar(region_frequency['R√©gion'], region_frequency['Sets_par_Jour'], 
           color='lightgreen', alpha=0.8)
axes[1].set_title('üèãÔ∏è Sets Moyens par Jour d\'Entra√Ænement', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Sets par jour')
axes[1].tick_params(axis='x', rotation=45)
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## üéØ Recommandations et insights

In [None]:
print("üéØ RECOMMANDATIONS ET INSIGHTS")
print("=" * 60)

# Analyse des points forts et faibles
most_trained_region = region_stats.index[0]
least_trained_region = region_stats.index[-1]
most_frequent_muscle = primary_df.iloc[0]['Muscle']
least_frequent_muscle = primary_df.iloc[-1]['Muscle']

print(f"üìä BILAN ACTUEL:")
print(f"   ü•á R√©gion la plus travaill√©e: {most_trained_region} ({region_stats.loc[most_trained_region, 'Pct_Volume']:.1f}% du volume)")
print(f"   ü•â R√©gion la moins travaill√©e: {least_trained_region} ({region_stats.loc[least_trained_region, 'Pct_Volume']:.1f}% du volume)")
print(f"   üí™ Muscle le plus sollicit√©: {most_frequent_muscle} ({primary_df.iloc[0]['Frequence']} sets)")
print(f"   üí§ Muscle le moins sollicit√©: {least_frequent_muscle} ({primary_df.iloc[-1]['Frequence']} sets)")

print(f"\nüéØ RECOMMANDATIONS:")

# Recommandations bas√©es sur l'√©quilibre
volume_gap = region_stats['Volume_Total'].max() - region_stats['Volume_Total'].min()
if volume_gap > region_stats['Volume_Total'].mean():
    print(f"   ‚ö†Ô∏è D√©s√©quilibre d√©tect√© entre {most_trained_region} et {least_trained_region}")
    print(f"   üí° Augmenter le volume pour {least_trained_region} (+{volume_gap/2:.0f}kg recommand√©)")

# Recommandations sur la fr√©quence
low_frequency_regions = region_frequency[region_frequency['Frequence_Pct'] < 50]
if len(low_frequency_regions) > 0:
    print(f"   üìÖ R√©gions entra√Æn√©es moins de 50% du temps:")
    for _, region in low_frequency_regions.iterrows():
        print(f"      ‚Ä¢ {region['R√©gion']}: {region['Frequence_Pct']:.1f}% (recommand√©: augmenter la fr√©quence)")

# Recommandations sur la diversit√©
region_diversity = region_stats['Nb_Exercices']
low_diversity = region_diversity[region_diversity <= 1]
if len(low_diversity) > 0:
    print(f"   üîÑ R√©gions avec peu de diversit√© d'exercices:")
    for region in low_diversity.index:
        print(f"      ‚Ä¢ {region}: seulement {low_diversity[region]} exercice(s) - ajouter de la vari√©t√©")

print(f"\n‚úÖ POINTS POSITIFS:")
if len(exercise_dict) >= 3:
    print(f"   üèãÔ∏è Bonne diversit√© d'exercices ({len(exercise_dict)} exercices diff√©rents)")

if region_stats['Volume_Total'].std() / region_stats['Volume_Total'].mean() < 0.5:
    print(f"   ‚öñÔ∏è R√©partition du volume relativement √©quilibr√©e entre r√©gions")

if len(df) >= 10:
    print(f"   üìä Volume de donn√©es suffisant pour l'analyse ({len(df)} sets)")

print(f"\nüöÄ PROCHAINES √âTAPES:")
print(f"   1. Analyser l'√©volution temporelle des performances par muscle")
print(f"   2. Calculer les ratios antagonistes/agonistes")
print(f"   3. D√©velopper un syst√®me de recommandations automatis√©")
print(f"   4. Int√©grer ces insights dans le dashboard principal")

---
## üìù R√©sum√© du mapping exercices ‚Üî muscles

Ce notebook a cr√©√© un mapping complet entre exercices et groupes musculaires, permettant :

### ‚úÖ R√©alisations
- Catalogue d√©taill√© exercices ‚Üí muscles (primaires/secondaires)
- Analyse de l'√©quilibre musculaire par volume
- D√©tection des d√©s√©quilibres potentiels
- Recommandations personnalis√©es

### üéØ Insights cl√©s
- Identification des groupes musculaires prioritaires
- Analyse de la fr√©quence d'entra√Ænement
- Diversit√© des exercices par r√©gion

### üìä Donn√©es g√©n√©r√©es
- `exercise_dict`: Mapping exercices ‚Üí muscles
- `muscle_volume_df`: Volume par muscle
- `region_frequency`: Fr√©quence d'entra√Ænement par r√©gion

**Prochaine √©tape:** Analyse temporelle avanc√©e (03_EDA_temporel.ipynb)