# PARTIE 5 : GRAPHIQUE INTERACTIF PLOTLY - S&P 500

Visualisations interactives des données du S&P 500 avec Plotly

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import yfinance as yf
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

In [2]:
sp500_symbols = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA", "BRK.B", "JNJ", "V",
    "WMT", "JPM", "PG", "MA", "UNH", "HD", "DIS", "CRM", "MCD", "KO",
    "NFLX", "CSCO", "PEP", "ABT", "ABBV", "MMM", "XOM", "CVX", "BA", "COST",
    "AMD", "INTC", "QCOM", "PYPL", "TXN", "ADBE", "IBM", "AVGO", "ACN", "GILD",
    "CDNS", "MRVL", "OKTA", "CCI", "ANET", "SNPS", "ASML", "NOW", "PANW", "LRCX"
]

sector_info = pd.DataFrame({
    'symbol': ["AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA", "BRK.B", "JNJ", "V",
               "WMT", "JPM", "PG", "MA", "UNH", "HD", "DIS", "CRM", "MCD", "KO",
               "NFLX", "CSCO", "PEP", "ABT", "ABBV", "MMM", "XOM", "CVX", "BA", "COST",
               "AMD", "INTC", "QCOM", "PYPL", "TXN", "ADBE", "IBM", "AVGO", "ACN", "GILD",
               "CDNS", "MRVL", "OKTA", "CCI", "ANET", "SNPS", "ASML", "NOW", "PANW", "LRCX"],
    'sector': ["Technology", "Technology", "Technology", "Consumer", "Technology", "Technology", "Consumer", "Finance", "Healthcare", "Finance",
               "Consumer", "Finance", "Consumer", "Finance", "Healthcare", "Consumer", "Consumer", "Technology", "Consumer", "Consumer",
               "Technology", "Technology", "Consumer", "Healthcare", "Healthcare", "Industrial", "Energy", "Energy", "Industrial", "Consumer",
               "Technology", "Technology", "Technology", "Finance", "Technology", "Technology", "Technology", "Technology", "Technology", "Healthcare",
               "Technology", "Technology", "Technology", "Real Estate", "Technology", "Technology", "Technology", "Technology", "Technology", "Technology"]
})

In [3]:
# Récupérer les données
print("Récupération des données S&P 500...")
end_date = datetime.now()
start_date = end_date - timedelta(days=365)

# Télécharger les données
sp500_data_list = []
for symbol in sp500_symbols:
    try:
        data = yf.download(symbol, start=start_date, end=end_date, progress=False)
        if not data.empty:  # Vérifier que des données ont été récupérées
            # Aplatir les colonnes si MultiIndex
            if isinstance(data.columns, pd.MultiIndex):
                data.columns = data.columns.get_level_values(0)
            data = data.reset_index()
            data['symbol'] = symbol
            sp500_data_list.append(data)
        else:
            print(f"Aucune donnée pour {symbol}")
    except Exception as e:
        print(f"Erreur pour {symbol}: {e}")

# Concaténer les données verticalement
sp500_data = pd.concat(sp500_data_list, axis=0, ignore_index=True)

# Standardiser les noms de colonnes
sp500_data.columns = sp500_data.columns.str.lower().str.replace(' ', '_')

# Joindre les secteurs
sp500_data = sp500_data.merge(sector_info, on='symbol', how='left')

print(f"Données chargées: {len(sp500_data)} lignes")
print(f"Actions récupérées: {sp500_data['symbol'].nunique()}")

Récupération des données S&P 500...


$BRK.B: possibly delisted; no timezone found

1 Failed download:
['BRK.B']: possibly delisted; no timezone found


Aucune donnée pour BRK.B
Données chargées: 12250 lignes
Actions récupérées: 49


## GRAPHIQUE INTERACTIF 1 : Prix vs Volume (Scatter)

In [4]:
latest_data = sp500_data.sort_values('date').groupby('symbol').tail(1).copy()

# Calculer le changement de prix par rapport au début
sp500_data_sorted = sp500_data.sort_values(['symbol', 'date'])
price_30d_ago = sp500_data_sorted.groupby('symbol').first()[['close']].reset_index()
price_30d_ago.columns = ['symbol', 'close_30d_ago']

latest_data = latest_data.merge(price_30d_ago, on='symbol', how='left')
latest_data['price_change'] = latest_data['close'] - latest_data['close_30d_ago']
latest_data['price_change_pct'] = (latest_data['price_change'] / latest_data['close_30d_ago']) * 100

# Créer le scatter plot (utiliser abs() pour size car Plotly n'accepte que des valeurs positives)
fig = px.scatter(
    latest_data,
    x=latest_data['volume'] / 1e6,
    y='close',
    color='sector',
    size=latest_data['price_change_pct'].abs(),
    hover_name='symbol',
    hover_data={
        'close': ':.2f',
        'volume': ':.2f',
        'price_change_pct': ':.2f'
    },
    labels={
        'x': 'Volume (millions)',
        'close': 'Prix (USD)',
        'sector': 'Secteur',
        'price_change_pct': 'Changement 30j (%)'
    },
    title='S&P 500 : Prix vs Volume (Taille = Changement 30j)'
)

fig.update_layout(
    plot_bgcolor='#f8f9fa',
    paper_bgcolor='white',
    hovermode='closest',
    height=600
)

fig.show()

## GRAPHIQUE INTERACTIF 2 : Évolution temporelle des Top 5 stocks

In [5]:
top_5_symbols = ["AAPL", "MSFT", "GOOGL", "AMZN", "NVDA"]
top_5_data = sp500_data[sp500_data['symbol'].isin(top_5_symbols)].sort_values('date')

fig = px.line(
    top_5_data,
    x='date',
    y='close',
    color='symbol',
    title='Évolution du prix - Top 5 Actions',
    labels={'date': 'Date', 'close': 'Prix (USD)', 'symbol': 'Action'}
)

fig.update_layout(
    plot_bgcolor='#f8f9fa',
    paper_bgcolor='white',
    hovermode='x unified',
    height=600
)

fig.show()

## GRAPHIQUE INTERACTIF 3 : Distribution des prix par secteur (Box plot)

In [6]:
fig = px.box(
    sp500_data,
    x='sector',
    y='close',
    color='sector',
    title='Distribution des prix par secteur',
    labels={'sector': 'Secteur', 'close': 'Prix (USD)'}
)

fig.update_layout(
    plot_bgcolor='#f8f9fa',
    paper_bgcolor='white',
    showlegend=False,
    height=600
)

fig.show()

## GRAPHIQUE INTERACTIF 4 : Heatmap interactive des corrélations secteur

In [7]:
# Calculer la corrélation entre secteurs
sector_prices = sp500_data.groupby(['date', 'sector'])['close'].mean().reset_index()
pivot_sector = sector_prices.pivot(index='date', columns='sector', values='close')
sector_correlation = pivot_sector.corr()

# Créer la heatmap
fig = go.Figure(data=go.Heatmap(
    z=sector_correlation.values,
    x=sector_correlation.columns,
    y=sector_correlation.index,
    colorscale='RdBu',
    zmid=0,
    text=np.round(sector_correlation.values, 3),
    texttemplate='%{text}',
    textfont={"size": 10},
    hovertemplate='%{y} vs %{x}: %{z:.3f}<extra></extra>'
))

fig.update_layout(
    title='Matrice de corrélation des prix par secteur',
    xaxis_title='Secteur',
    yaxis_title='Secteur',
    height=600
)

fig.show()

print("\n✓ Partie 5 complétée : 4 graphiques interactifs Plotly")


✓ Partie 5 complétée : 4 graphiques interactifs Plotly
