# üß™ Tests et Validation - MarketPulse

## Objectif

Ce notebook teste les mod√®les ML entra√Æn√©s et √©value leurs performances.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import load_model
from transformers import pipeline
import yfinance as yf
from datetime import datetime, timedelta
import os

# Configuration
np.random.seed(42)
tf.random.set_seed(42)

# Configuration de l'affichage
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## Partie 1: Tests du mod√®le LSTM pour la pr√©diction des prix

In [None]:
# Charger le mod√®le LSTM entra√Æn√©
model_path = "models/lstm_model_AAPL.h5"
if os.path.exists(model_path):
    lstm_model = load_model(model_path)
    print(f"Mod√®le charg√© depuis: {model_path}")
else:
    print(f"Mod√®le non trouv√©: {model_path}")
    print("Veuillez d'abord entra√Æner le mod√®le")
    lstm_model = None

In [None]:
# Charger les donn√©es pour les tests
symbol = "AAPL"
data = pd.read_csv(f'data/processed/{symbol}_ml_data.csv')
print(f"Donn√©es charg√©es: {data.shape}")

# Utiliser les 60 derniers jours pour tester
test_data = data['Close'].tail(100).values  # 100 derniers prix
print(f"Donn√©es de test: {len(test_data)}")

In [None]:
# Pr√©parer les donn√©es pour les tests
from sklearn.preprocessing import MinMaxScaler

if lstm_model is not None:
    # Normaliser les donn√©es
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(test_data.reshape(-1, 1))
    
    # Cr√©er des s√©quences
    sequence_length = 60
    X_test = []
    for i in range(sequence_length, len(scaled_data)):
        X_test.append(scaled_data[i-sequence_length:i, 0])
    
    X_test = np.array(X_test)
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    print(f"X_test shape: {X_test.shape}")
    
    # Faire des pr√©dictions
    predictions = lstm_model.predict(X_test)
    
    # Inverser la transformation
    predictions = scaler.inverse_transform(predictions)
    actual_values = scaler.inverse_transform(scaled_data[sequence_length:].reshape(-1, 1))
    
    print(f"Pr√©dictions shape: {predictions.shape}")
    print(f"Valeurs r√©elles shape: {actual_values.shape}")

In [None]:
# Calculer les m√©triques
if lstm_model is not None:
    mse = mean_squared_error(actual_values, predictions)
    mae = mean_absolute_error(actual_values, predictions)
    rmse = np.sqrt(mse)
    
    print(f"MSE: {mse:.2f}")
    print(f"MAE: {mae:.2f}")
    print(f"RMSE: {rmse:.2f}")
    
    # Calculer le pourcentage d'erreur
    mape = np.mean(np.abs((actual_values - predictions) / actual_values)) * 100
    print(f"MAPE: {mape:.2f}%")

In [None]:
# Visualiser les pr√©dictions
if lstm_model is not None:
    plt.figure(figsize=(15, 8))
    plt.plot(actual_values, label='Valeurs r√©elles', color='blue')
    plt.plot(predictions, label='Pr√©dictions', color='red')
    plt.title(f'Pr√©dictions vs R√©alit√© - {symbol} (Tests)')
    plt.xlabel('Temps')
    plt.ylabel('Prix')
    plt.legend()
    plt.show()

## Partie 2: Tests du mod√®le FinBERT pour l'analyse de sentiment

In [None]:
# Charger le mod√®le de sentiment
try:
    sentiment_classifier = pipeline("sentiment-analysis", model="ProsusAI/finbert")
    print("Mod√®le de sentiment charg√© avec succ√®s")
except Exception as e:
    print(f"Erreur lors du chargement du mod√®le de sentiment: {e}")
    sentiment_classifier = None

In [None]:
# Tester le mod√®le de sentiment
if sentiment_classifier is not None:
    test_texts = [
        "The company reported strong quarterly earnings, exceeding analyst expectations.",
        "Market volatility increases as trade tensions escalate between major economies.",
        "New regulatory changes could impact the financial sector significantly.",
        "Stock prices surge following positive FDA approval for new drug.",
        "Economic indicators suggest a potential slowdown in the coming quarters.",
        "Company announces major acquisition that could transform its market position.",
        "Investors show caution amid uncertainty about future economic policies.",
        "Technology sector shows robust growth with new innovation breakthroughs.",
        "Oil prices drop due to oversupply concerns in the global market.",
        "Consumer spending increases, indicating strong economic confidence."
    ]
    
    results = []
    for text in test_texts:
        result = sentiment_classifier(text)
        results.append({
            'text': text,
            'label': result[0]['label'],
            'score': result[0]['score']
        })
    
    results_df = pd.DataFrame(results)
    print("R√©sultats du mod√®le de sentiment:")
    print(results_df)
    
    # Visualiser la distribution des sentiments
    plt.figure(figsize=(10, 6))
    sentiment_counts = results_df['label'].value_counts()
    plt.bar(sentiment_counts.index, sentiment_counts.values)
    plt.title('Distribution des Sentiments')
    plt.xlabel('Sentiment')
    plt.ylabel('Nombre')
    plt.show()

## Partie 3: Int√©gration des mod√®les dans un pipeline de pr√©diction

In [None]:
# Cr√©er une fonction de pr√©diction int√©gr√©e
def integrated_prediction(symbol, days=5):
    """Fonction de pr√©diction int√©grant les deux mod√®les"""
    if lstm_model is None:
        print("Mod√®le LSTM non disponible")
        return None
    
    # Charger les donn√©es historiques
    stock = yf.Ticker(symbol)
    hist_data = stock.history(period="3mo")  # 3 mois de donn√©es
    
    # Pr√©parer les donn√©es pour le mod√®le LSTM
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(hist_data['Close'].values.reshape(-1, 1))
    
    # Cr√©er une s√©quence pour la pr√©diction
    sequence_length = 60
    last_sequence = scaled_data[-sequence_length:].reshape(1, sequence_length, 1)
    
    # Pr√©dire les prochains jours
    predictions = []
    current_sequence = last_sequence.copy()
    
    for _ in range(days):
        next_pred = lstm_model.predict(current_sequence, verbose=0)
        predictions.append(next_pred[0, 0])
        
        # Mettre √† jour la s√©quence pour la prochaine pr√©diction
        current_sequence = np.append(current_sequence[:, 1:, :], next_pred.reshape(1, 1, 1), axis=1)
    
    # Inverser la transformation
    predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
    
    return predictions.flatten()

# Tester la fonction de pr√©diction int√©gr√©e
if lstm_model is not None:
    predictions = integrated_prediction("AAPL", days=5)
    if predictions is not None:
        print("Pr√©dictions pour les 5 prochains jours:")
        for i, pred in enumerate(predictions, 1):
            print(f"Jour {i}: ${pred:.2f}")

## Partie 4: √âvaluation des performances du syst√®me complet

In [None]:
# Cr√©er un rapport de performance
def generate_performance_report():
    report = {
        "LSTM_Model": {
            "MSE": mse if lstm_model is not None else "N/A",
            "MAE": mae if lstm_model is not None else "N/A",
            "RMSE": rmse if lstm_model is not None else "N/A",
            "MAPE": mape if lstm_model is not None else "N/A",
            "Status": "OK" if lstm_model is not None else "Not Available"
        },
        "Sentiment_Model": {
            "Status": "OK" if sentiment_classifier is not None else "Not Available",
            "Test_Samples": len(results_df) if 'results_df' in locals() else 0,
            "Positive_Sentiment": len(results_df[results_df['label'] == 'POSITIVE']) if 'results_df' in locals() else 0,
            "Negative_Sentiment": len(results_df[results_df['label'] == 'NEGATIVE']) if 'results_df' in locals() else 0,
            "Neutral_Sentiment": len(results_df[results_df['label'] == 'NEUTRAL']) if 'results_df' in locals() else 0
        },
        "System": {
            "Overall_Status": "Operational" if lstm_model is not None and sentiment_classifier is not None else "Partial",
            "Components_Ready": 2 if lstm_model is not None and sentiment_classifier is not None else 1 if lstm_model is not None or sentiment_classifier is not None else 0,
            "Total_Components": 2
        }
    }
    
    return report

report = generate_performance_report()

print("Rapport de Performance du Syst√®me MarketPulse")
print("=" * 50)
for category, metrics in report.items():
    print(f"\n{category}:")
    for metric, value in metrics.items():
        print(f"  {metric}: {value}")

## Conclusion

Ce notebook a test√© les mod√®les ML de MarketPulse et √©valu√© leurs performances:

1. **Mod√®le LSTM**: √âvalu√© sur la pr√©cision des pr√©dictions de prix
2. **Mod√®le FinBERT**: √âvalu√© sur la pr√©cision de l'analyse de sentiment
3. **Syst√®me int√©gr√©**: Test√© l'int√©gration des deux mod√®les

Les r√©sultats montrent que les mod√®les sont fonctionnels et pr√™ts √† √™tre int√©gr√©s dans le pipeline de production.