In [3]:
import os
import re
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# ==============================================================
# Fonctions d'extraction et d'analyse
# ==============================================================

def parse_marche(filename):
    """Extrait les données HOMO, LUMO, GAP d'un fichier marche_aleatoire.txt"""
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            content = f.read()
        
        pattern = r"HOMO:\s*([-+]?\d*\.\d+|\d+)\s*Ha\s*LUMO:\s*([-+]?\d*\.\d+|\d+)\s*Ha\s*GAP:\s*([-+]?\d*\.\d+|\d+)\s*eV"
        data = re.findall(pattern, content)
        
        homo = np.array([float(x[0]) * 27.2114 for x in data])
        lumo = np.array([float(x[1]) * 27.2114 for x in data])
        gap  = np.array([float(x[2]) for x in data])
        
        return homo, lumo, gap
    except Exception as e:
        print(f"Erreur lecture {filename}: {e}")
        return None, None, None

def fitness_auto_correlation(fitnesses, max_lag):
    """Calcule les coefficients d'auto-corrélation"""
    auto_correlations = []
    for l in range(1, max_lag):
        if len(fitnesses) > l:
            corr = np.corrcoef(fitnesses[:-l], fitnesses[l:])[0, 1]
            auto_correlations.append(float(corr))
        else:
            break
    return auto_correlations

def correlation_length(C):
    """
    Calcul de Lc avec la formule : Lc = -1/ln(C(1))
    où C(1) est le premier coefficient d'autocorrélation
    """
    if len(C) == 0:
        return np.nan
    
    C1 = C[0]  # Premier coefficient C(1)
    
    # Vérifier que C(1) > 0 pour éviter log(0) ou log(négatif)
    if C1 <= 0:
        return np.nan
    
    # Vérifier que C(1) < 1 pour éviter division par zéro (ln(1) = 0)
    if C1 >= 1:
        return np.inf
    
    Lc = -1.0 / np.log(C1)
    return Lc

def delta_fitness(f):
    """Calcul des variations de fitness"""
    return np.diff(f)

# ==============================================================
# Programme principal
# ==============================================================

def main():
    max_lag = 50
    
    # Structures pour stocker toutes les autocorrélations
    all_autocorr = {'HOMO': [], 'LUMO': [], 'GAP': []}
    all_lc = {'HOMO': [], 'LUMO': [], 'GAP': []}
    all_mean_dF = {'HOMO': [], 'LUMO': [], 'GAP': []}
    
    print("="*70)
    print("DATA COLLECTION - AUTOCORRELATION")
    print("="*70)
    
    # Browse through 10 folders
    for i in range(1, 11):
        folder_name = f"n°{i}"
        marche_file = os.path.join(folder_name, "marche_aleatoire.txt")
        
        if not os.path.exists(marche_file):
            print(f"✗ {marche_file} not found")
            continue
        
        # Data extraction
        homo, lumo, gap = parse_marche(marche_file)
        
        if homo is None:
            continue
        
        print(f"✓ {folder_name}: {len(homo)} steps")
        
        # Calcul des autocorrélations pour chaque propriété
        for name, data in [('HOMO', homo), ('LUMO', lumo), ('GAP', gap)]:
            C = fitness_auto_correlation(data, max_lag)
            if len(C) > 0:
                all_autocorr[name].append(C)
                
                # Calcul des statistiques avec la nouvelle formule Lc
                Lc = correlation_length(C)
                dF = delta_fitness(data)
                mean_dF = np.mean(np.abs(dF))
                
                # Ne stocker Lc que s'il est valide (pas NaN ou inf)
                if np.isfinite(Lc):
                    all_lc[name].append(Lc)
                all_mean_dF[name].append(mean_dF)
    
    # ==============================================================
    # Calcul des moyennes et écarts-types
    # ==============================================================
    
    print("\n" + "="*70)
    print("COMPUTING AVERAGES AND STANDARD DEVIATIONS")
    print("="*70)
    
    # Find minimum autocorrelation length
    min_length = min(min(len(c) for c in all_autocorr[name]) 
                     for name in ['HOMO', 'LUMO', 'GAP'] 
                     if len(all_autocorr[name]) > 0)
    
    # Truncate all autocorrelations to same length
    for name in ['HOMO', 'LUMO', 'GAP']:
        all_autocorr[name] = [c[:min_length] for c in all_autocorr[name]]
    
    # Calculate statistics
    stats = {}
    for name in ['HOMO', 'LUMO', 'GAP']:
        if len(all_autocorr[name]) > 0:
            autocorr_array = np.array(all_autocorr[name])
            stats[name] = {
                'mean_C': np.mean(autocorr_array, axis=0),
                'std_C': np.std(autocorr_array, axis=0),
                'mean_Lc': np.mean(all_lc[name]) if len(all_lc[name]) > 0 else np.nan,
                'std_Lc': np.std(all_lc[name]) if len(all_lc[name]) > 0 else np.nan,
                'mean_dF': np.mean(all_mean_dF[name]),
                'std_dF': np.std(all_mean_dF[name])
            }
            
            print(f"\n{name}:")
            if np.isfinite(stats[name]['mean_Lc']):
                print(f"  Average Lc = -1/ln(C(1)): {stats[name]['mean_Lc']:.2f} ± {stats[name]['std_Lc']:.2f}")
            else:
                print(f"  Average Lc: Non calculable (C(1) <= 0 ou >= 1)")
            print(f"  Average Δ: {stats[name]['mean_dF']:.4f} ± {stats[name]['std_dF']:.4f}")
    
    # ==============================================================
    # Sauvegarde des résultats
    # ==============================================================
    
    print("\n" + "="*70)
    print("GENERATING GRAPHS")
    print("="*70)
    
    # Writing text file
    with open("Average_Autocorrelation.txt", "w", encoding="utf-8") as txt_file:
        txt_file.write("="*70 + "\n")
        txt_file.write("AVERAGE AUTOCORRELATION ANALYSIS\n")
        txt_file.write("Correlation length formula: Lc = -1/ln(C(1))\n")
        txt_file.write("="*70 + "\n")
        txt_file.write(f"\nNumber of experiments analyzed: {len(all_autocorr['HOMO'])}\n")
        txt_file.write(f"Autocorrelation length computed: {min_length} lags\n\n")
        
        for name in ['HOMO', 'LUMO', 'GAP']:
            txt_file.write(f"\n=== {name} ===\n")
            if np.isfinite(stats[name]['mean_Lc']):
                txt_file.write(f"Average correlation length Lc = -1/ln(C(1)): {stats[name]['mean_Lc']:.2f} ± {stats[name]['std_Lc']:.2f}\n")
            else:
                txt_file.write(f"Average correlation length: Non calculable\n")
            txt_file.write(f"Average Δfitness: {stats[name]['mean_dF']:.4f} ± {stats[name]['std_dF']:.4f}\n")
    
    # Define pastel colors
    homo_color = "#FF69B4"   # Hot pink
    lumo_color = "#9370DB"   # Lavender purple
    gap_color = "#1E90FF"    # Soft turquoise
    script_dir = os.path.basename(os.getcwd())
    
    # Creating graphs
    with PdfPages("Average_Autocorrelation.pdf") as pdf:
        # Figure 1: All 3 autocorrelations on one graph
        fig, ax = plt.subplots(figsize=(12, 6))
        
        lags = np.arange(1, min_length + 1)
        colors = {'HOMO': homo_color, 'LUMO': lumo_color, 'GAP': gap_color}
        
        for name in ['HOMO', 'LUMO', 'GAP']:
            mean_C = stats[name]['mean_C']
            std_C = stats[name]['std_C']
            
            ax.plot(lags, mean_C, color=colors[name], label=f'{name}', linewidth=2)
            ax.fill_between(lags, mean_C - std_C, mean_C + std_C, 
                           color=colors[name], alpha=0.2)

        ax.set_xlabel('Lag k', fontsize=12)
        ax.set_ylabel('Autocorrelation coefficient C(k)', fontsize=12)
        ax.set_title(f'Average autocorrelations (HOMO, LUMO, GAP) for {script_dir}', fontsize=14, fontweight='bold')
        ax.legend(fontsize=10)
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)
        


        # Figures 2-4: Detailed individual graphs
        for name in ['HOMO', 'LUMO', 'GAP']:
            fig, axs = plt.subplots(1, 2, figsize=(14, 5))
            
            mean_C = stats[name]['mean_C']
            std_C = stats[name]['std_C']
            
            # Autocorrelation with error band
            axs[0].plot(lags, mean_C, color=colors[name], linewidth=2, label='Average')
            axs[0].fill_between(lags, mean_C - std_C, mean_C + std_C, 
                               color=colors[name], alpha=0.3, label='std dev')
            axs[0].set_xlabel('Lag k')
            axs[0].set_ylabel('C(k)')
            axs[0].set_title(f'{name} - Average autocorrelation for {script_dir}')
            axs[0].legend()
            axs[0].grid(True, alpha=0.3)
            
            # All individual curves
            for i, C in enumerate(all_autocorr[name]):
                axs[1].plot(np.arange(1, len(C)+1), C, alpha=0.5, linewidth=0.8)
            axs[1].plot(lags, mean_C, color='black', linewidth=2.5, label='Average')
            axs[1].set_xlabel('Lag k')
            axs[1].set_ylabel('C(k)')
            axs[1].set_title(f'{name} - All autocorrelation for {script_dir}')
            axs[1].legend()
            axs[1].grid(True, alpha=0.3)
            
            plt.tight_layout()
            pdf.savefig(fig)
            plt.close(fig)
    
    print("\n✓ Results saved in:")
    print("  - Average_Autocorrelation.txt")
    print("  - Average_Autocorrelation.pdf")
    print("\n" + "="*70)

if __name__ == "__main__":
    main()

DATA COLLECTION - AUTOCORRELATION
✓ n°1: 301 steps
✓ n°2: 301 steps
✓ n°3: 301 steps
✓ n°4: 301 steps
✓ n°5: 301 steps
✓ n°6: 301 steps
✓ n°7: 301 steps
✓ n°8: 301 steps
✓ n°9: 301 steps
✓ n°10: 301 steps

COMPUTING AVERAGES AND STANDARD DEVIATIONS

HOMO:
  Average Lc = -1/ln(C(1)): 8.11 ± 2.03
  Average Δ: 0.2321 ± 0.0263

LUMO:
  Average Lc = -1/ln(C(1)): 9.84 ± 3.52
  Average Δ: 0.2857 ± 0.0255

GAP:
  Average Lc = -1/ln(C(1)): 3.91 ± 1.35
  Average Δ: 0.3521 ± 0.0316

GENERATING GRAPHS

✓ Results saved in:
  - Average_Autocorrelation.txt
  - Average_Autocorrelation.pdf

