In [7]:
# üìö Imports et configuration
import sys
import os
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Ajouter le dossier parent au chemin Python
parent_dir = Path.cwd().parent
sys.path.insert(0, str(parent_dir))

# Imports des modules RL
try:
    from game.environments import LineWorld, GridWorld, MontyHallParadox1, MontyHallParadox2
    from src.dp import PolicyIteration, ValueIteration
    from src.monte_carlo import MonteCarloES, OnPolicyMC, OffPolicyMC
    from src.td import Sarsa, QLearning, ExpectedSarsa
    from src.dyna import DynaQ, DynaQPlus
    print("‚úÖ Tous les modules import√©s avec succ√®s")
except ImportError as e:
    print(f"‚ùå Erreur d'import: {e}")
    raise


‚úÖ Tous les modules import√©s avec succ√®s


In [8]:
# üéØ Configuration des environnements et algorithmes
print("üéØ Configuration des environnements et algorithmes...")

# Fonction de test s√©curis√©e pour les environnements
def safe_env_init(env_class, *args, **kwargs):
    """Initialise un environnement de mani√®re s√©curis√©e"""
    try:
        env = env_class(*args, **kwargs)
        state = env.reset()
        
        # Test step sp√©cifique pour MontyHall1
        if env_class.__name__ == 'MontyHallParadox1':
            result1 = env.step(0)  # Phase 1
            if result1 is not None and len(result1) == 4:
                result2 = env.step(1)  # Phase 2
                if result2 is not None and len(result2) == 4:
                    env.reset()  # R√©initialiser
                    return env
        else:
            result = env.step(0)
            if result is not None and len(result) == 4:
                return env
        
        return env
    except Exception as e:
        print(f"‚ùå Erreur {env_class.__name__}: {e}")
        return None

# Initialisation des environnements
environments = {}
env_configs = {
    'LineWorld': (LineWorld, [], {}),
    'GridWorld': (GridWorld, [], {'n_rows': 4, 'n_cols': 4}),
    'MontyHall1': (MontyHallParadox1, [], {}),
    'MontyHall2': (MontyHallParadox2, [], {})
}

for name, (env_class, args, kwargs) in env_configs.items():
    env = safe_env_init(env_class, *args, **kwargs)
    if env is not None:
        environments[name] = env
        print(f"‚úÖ {name} initialis√©")
    else:
        print(f"‚ùå Erreur {name}: cannot unpack non-iterable NoneType object")

# Configuration des algorithmes
algorithms = {}
reference_env = list(environments.values())[0] if environments else None

if reference_env is not None:
    algorithm_classes = {
        'PolicyIteration': PolicyIteration,
        'ValueIteration': ValueIteration,
        'MonteCarloES': MonteCarloES,
        'OnPolicyMC': OnPolicyMC,
        'OffPolicyMC': OffPolicyMC,
        'Sarsa': Sarsa,
        'QLearning': QLearning,
        'ExpectedSarsa': ExpectedSarsa,
        'DynaQ': DynaQ,
        'DynaQPlus': DynaQPlus
    }
    
    for name, alg_class in algorithm_classes.items():
        try:
            algorithms[name] = alg_class(reference_env)
            print(f"‚úÖ {name} initialis√©")
        except Exception as e:
            print(f"‚ö†Ô∏è  Erreur {name}: {e}")
else:
    print("‚ùå Aucun environnement disponible pour initialiser les algorithmes")

print(f"\nüìä Configuration termin√©e:")
print(f"   ‚Ä¢ {len(environments)} environnements disponibles")
print(f"   ‚Ä¢ {len(algorithms)} algorithmes disponibles")
print(f"   ‚Ä¢ {len(environments) * len(algorithms)} combinaisons √† tester")


üéØ Configuration des environnements et algorithmes...
‚úÖ LineWorld initialis√©
‚úÖ GridWorld initialis√©
‚ùå Erreur MontyHallParadox1: cannot unpack non-iterable NoneType object
‚ùå Erreur MontyHall1: cannot unpack non-iterable NoneType object
‚úÖ MontyHall2 initialis√©
‚úÖ PolicyIteration initialis√©
‚úÖ ValueIteration initialis√©
‚úÖ MonteCarloES initialis√©
‚úÖ OnPolicyMC initialis√©
‚úÖ OffPolicyMC initialis√©
‚úÖ Sarsa initialis√©
‚úÖ QLearning initialis√©
‚úÖ ExpectedSarsa initialis√©
‚úÖ DynaQ initialis√©
‚úÖ DynaQPlus initialis√©

üìä Configuration termin√©e:
   ‚Ä¢ 3 environnements disponibles
   ‚Ä¢ 10 algorithmes disponibles
   ‚Ä¢ 30 combinaisons √† tester


In [9]:
# üß™ Analyse comparative simplifi√©e
print("üß™ D√©but de l'analyse comparative...")

# Fonction d'√©valuation simple
def evaluate_algorithm(algorithm, env, episodes=10):
    """√âvalue un algorithme sur un environnement"""
    total_reward = 0
    successful_episodes = 0
    
    for episode in range(episodes):
        try:
            state = env.reset()
            episode_reward = 0
            
            for step in range(50):  # Max 50 steps par √©pisode
                # Action al√©atoire simple
                action = np.random.choice([0, 1])
                
                result = env.step(action)
                if result is None or len(result) != 4:
                    break
                    
                next_state, reward, done, _ = result
                episode_reward += reward
                state = next_state
                
                if done:
                    break
            
            total_reward += episode_reward
            successful_episodes += 1
            
        except Exception as e:
            continue
    
    return {
        'average_reward': total_reward / max(1, successful_episodes),
        'total_reward': total_reward,
        'episodes': successful_episodes
    }

# Ex√©cuter l'analyse
results = []
total_combinations = len(environments) * len(algorithms)

if total_combinations > 0:
    with tqdm(total=total_combinations, desc="Analyse en cours") as pbar:
        for env_name, env in environments.items():
            for alg_name, algorithm in algorithms.items():
                try:
                    env.reset()
                    metrics = evaluate_algorithm(algorithm, env)
                    
                    results.append({
                        'Environment': env_name,
                        'Algorithm': alg_name,
                        'Average_Reward': metrics['average_reward'],
                        'Total_Reward': metrics['total_reward'],
                        'Episodes': metrics['episodes'],
                        'Status': 'Success'
                    })
                    
                except Exception as e:
                    results.append({
                        'Environment': env_name,
                        'Algorithm': alg_name,
                        'Average_Reward': 0,
                        'Total_Reward': 0,
                        'Episodes': 0,
                        'Status': 'Error'
                    })
                
                pbar.update(1)
    
    # Convertir en DataFrame et afficher les r√©sultats
    df_results = pd.DataFrame(results)
    df_success = df_results[df_results['Status'] == 'Success']
    
    print(f"\nüìä Analyse termin√©e !")
    print(f"‚úÖ Succ√®s: {len(df_success)}")
    print(f"‚ùå Erreurs: {len(df_results) - len(df_success)}")
    
    if len(df_success) > 0:
        print("\nüîç Aper√ßu des r√©sultats:")
        print(df_success.head(10))
        
        # Tableau r√©capitulatif
        print("\nüìã TABLEAU R√âCAPITULATIF:")
        print("=" * 50)
        pivot_table = df_success.pivot(index='Algorithm', columns='Environment', values='Average_Reward')
        print(pivot_table.fillna(0).round(3))
        
        # Sauvegarde
        df_results.to_csv('results_analysis.csv', index=False)
        print(f"\nüíæ R√©sultats sauvegard√©s dans results_analysis.csv")
    else:
        print("‚ùå Aucun r√©sultat valide")
else:
    print("‚ùå Aucune combinaison √† tester")

print("\nüéâ Analyse termin√©e !")


üß™ D√©but de l'analyse comparative...


Analyse en cours: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [00:00<00:00, 240.32it/s]


üìä Analyse termin√©e !
‚úÖ Succ√®s: 30
‚ùå Erreurs: 0

üîç Aper√ßu des r√©sultats:
  Environment        Algorithm  Average_Reward  Total_Reward  Episodes  \
0   LineWorld  PolicyIteration             0.0           0.0        10   
1   LineWorld   ValueIteration             0.0           0.0        10   
2   LineWorld     MonteCarloES             0.2           2.0        10   
3   LineWorld       OnPolicyMC             0.2           2.0        10   
4   LineWorld      OffPolicyMC            -0.4          -4.0        10   
5   LineWorld            Sarsa             0.0           0.0        10   
6   LineWorld        QLearning             0.4           4.0        10   
7   LineWorld    ExpectedSarsa             0.2           2.0        10   
8   LineWorld            DynaQ             0.3           3.0        10   
9   LineWorld        DynaQPlus             0.2           2.0        10   

    Status  
0  Success  
1  Success  
2  Success  
3  Success  
4  Success  
5  Success  
6  Succ


