In [None]:
# analyze_training.py
import numpy as np
import matplotlib.pyplot as plt
import struct
import yfinance as yf
import prepare_data

In [None]:
tickers = prepare_data.get_tickers(n_total=140)
print(f"Selected {len(tickers)} tickers\n")
    
    # 2. Download closing prices (15 years)
print("Downloading 15 years of closing prices...")
data = yf.download(tickers, period="15y", progress=True)
prices = data['Close']  

In [None]:
prices

In [None]:


def load_parameters(filename):
    """Charge les param√®tres depuis le fichier binaire"""
    with open(filename, 'rb') as f:
        N = struct.unpack('i', f.read(4))[0]
        K = struct.unpack('i', f.read(4))[0]
        
        A = np.frombuffer(f.read(N * N * 4), dtype=np.float32).reshape(N, N)
        pi = np.frombuffer(f.read(N * 4), dtype=np.float32)
        mu = np.frombuffer(f.read(N * K * 4), dtype=np.float32).reshape(N, K)
        Sigma = np.frombuffer(f.read(N * K * K * 4), dtype=np.float32).reshape(N, K, K)
        
    return {'N': N, 'K': K, 'A': A, 'pi': pi, 'mu': mu, 'Sigma': Sigma}

def plot_training_history(csv_file):
    """Plot la courbe de convergence"""
    data = np.loadtxt(csv_file, delimiter=',', skiprows=1)
    iterations = data[:, 0]
    log_likelihood = data[:, 1]
    
    plt.figure(figsize=(10, 6))
    plt.plot(iterations, log_likelihood, 'b-', linewidth=2)
    plt.xlabel('Iteration')
    plt.ylabel('Log-Likelihood')
    plt.title('EM Training Convergence')
    plt.grid(True, alpha=0.3)
    plt.savefig('training_convergence.png', dpi=150)
    plt.show()

def analyze_parameters(params):
    """Analyse les param√®tres appris"""
    print("=== ANALYSE DES PARAM√àTRES ===\n")
    
    print("Distribution initiale œÄ :")
    print(params['pi'])
    print()
    
    print("Matrice de transition A :")
    print(params['A'])
    print()
    
    print("Moyennes Œº (premiers 5 assets) :")
    print(params['mu'][:, :5])
    print()

 
    
    # Persistance des √©tats
    persistence = np.diag(params['A'])
    print("Persistance des √©tats (diagonale de A) :")
    for i, p in enumerate(persistence):
        print(f"  √âtat {i}: {p:.4f}")

if __name__ == '__main__':
    params = load_parameters('trained_params.bin')
    analyze_parameters(params)
    plot_training_history('training_history.csv')

In [None]:
# Matrice de transition
A = params['A']
print("Matrice de transition A:")
print(A)

# Visualiser
import seaborn as sns
plt.figure(figsize=(8, 6))
sns.heatmap(A, annot=True, fmt='.3f', cmap='YlOrRd', 
            xticklabels=[f'√âtat {i}' for i in range(3)],
            yticklabels=[f'√âtat {i}' for i in range(3)])
plt.title('Probabilit√©s de transition entre r√©gimes')
plt.savefig('transition_matrix.png', dpi=150)
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt



# Plot des moyennes par √©tat
fig, axes = plt.subplots(3, 1, figsize=(12, 10))

for state in range(3):
    axes[state].bar(range(len(params['mu'][state])), params['mu'][state])
    axes[state].set_title(f'√âtat {state} - Rendements moyens (Œº)')
    axes[state].set_xlabel('Asset index')
    axes[state].set_ylabel('Mean return')
    axes[state].axhline(y=0, color='r', linestyle='--', alpha=0.3)
    axes[state].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('states_interpretation.png')
plt.show()

# Statistiques par √©tat
for state in range(3):
    mean_return = np.mean(params['mu'][state])
    volatility = np.sqrt(np.mean(np.diag(params['Sigma'][state])))
    print(f"√âtat {state}:")
    print(f"  Rendement moyen: {mean_return:.4f}")
    print(f"  Volatilit√© moyenne: {volatility:.4f}")
    print(f"  Persistance: {params['A'][state, state]:.4f}")
    print()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Charger les √©tats
states = pd.read_csv('decoded_states.csv')

# Reconstituer les dates (15 ans de donn√©es boursi√®res)
# Environ 252 jours ouvr√©s par an √ó 15 = 3780 jours
# Mais tu as 3647 jours ‚Üí commence probablement vers d√©but 2010
start_date = pd.Timestamp('2010-12-23')  
dates = pd.bdate_range(start=start_date, periods=len(states))
states['date'] = dates

# D√©finir les couleurs et labels
colors = {0: 'green', 1: 'red', 2: 'orange'}
labels = {0: 'Calme/Haussier', 1: 'Crise/Bear', 2: 'Normal/Mixte'}

# Timeline
fig, ax = plt.subplots(figsize=(16, 4))
for state in range(3):
    mask = states['state'] == state
    ax.scatter(states[mask]['date'], [state]*mask.sum(), 
              c=colors[state], label=labels[state], alpha=0.6, s=1)

ax.set_yticks([0, 1, 2])
ax.set_yticklabels(labels.values())
ax.set_xlabel('Date')
ax.set_ylabel('R√©gime de march√©')
ax.set_title('R√©gimes de march√© identifi√©s par HMM sur 15 ans (100+ assets)')
ax.legend()
ax.grid(True, alpha=0.2)
plt.tight_layout()
plt.savefig('regime_timeline_dated.png', dpi=200)
plt.show()

# Identifier COVID
covid_start = 2175
covid_end = 2218
print(f"P√©riode de crise la plus longue (COVID?):")
print(f"  Du {states.iloc[covid_start]['date'].date()} au {states.iloc[covid_end]['date'].date()}")
print(f"  Dur√©e: {covid_end - covid_start + 1} jours")

# Compter les transitions par an
states['year'] = states['date'].dt.year
transitions_per_year = states.groupby('year').apply(
    lambda x: (x['state'].diff() != 0).sum()
)
print("\nNombre de changements de r√©gime par ann√©e:")
print(transitions_per_year)

In [1]:
import analyze_profiling

In [2]:
analyze_profiling.main()


                    ANALYSE PROFILING CPU

‚ùå Erreur: ../build/profiling_results.csv introuvable
üí° Ex√©cutez d'abord: make profile-cpu-simple


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
