<a href="https://colab.research.google.com/github/ZygoOoade/finance/blob/main/finance_corr%C3%A9lations_entre_cours_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
import yfinance as yf
import pandas as pd
import numpy as np
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

def fetch_and_prepare_data():
    # Set time period (5 years of data for a robust analysis)
    end_date = datetime.now()
    start_date = end_date - timedelta(days=5*365)

    # Fetch FTI data
    fti = yf.Ticker("TTE")
    fti_data = fti.history(start=start_date, end=end_date)

    # Fetch Brent Crude Oil data
    brent = yf.Ticker("BZ=F")
    brent_data = brent.history(start=start_date, end=end_date)

    # Prepare the data
    df = pd.DataFrame({
        'FTI_Close': fti_data['Close'],
        'Brent_Close': brent_data['Close']
    })

    # Remove any missing values
    df = df.dropna()

    # Calculate daily returns
    df['FTI_Returns'] = df['FTI_Close'].pct_change()
    df['Brent_Returns'] = df['Brent_Close'].pct_change()

    # Remove any infinity values that might occur from price changes
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna()

    return df

def calculate_correlations(df):
    # Pearson correlation (linear relationship)
    pearson_price = df['FTI_Close'].corr(df['Brent_Close'])
    pearson_returns = df['FTI_Returns'].corr(df['Brent_Returns'])

    # Spearman correlation (monotonic relationship)
    spearman_price = stats.spearmanr(df['FTI_Close'], df['Brent_Close'])[0]
    spearman_returns = stats.spearmanr(df['FTI_Returns'], df['Brent_Returns'])[0]

    # Rolling correlation (30-day window)
    rolling_corr = df['FTI_Close'].rolling(window=30).corr(df['Brent_Close'])

    return {
        'pearson_price': pearson_price,
        'pearson_returns': pearson_returns,
        'spearman_price': spearman_price,
        'spearman_returns': spearman_returns,
        'rolling_corr': rolling_corr
    }

def calculate_beta(df):
    # Calculate beta (systematic risk relative to oil price movements)
    covariance = df['FTI_Returns'].cov(df['Brent_Returns'])
    variance = df['Brent_Returns'].var()
    beta = covariance / variance
    return beta

def create_visualization(df, rolling_corr):
    # Create figure with subplots
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(15, 12))

    # Normalize prices to start at 100 for comparison
    start_fti = df['FTI_Close'].iloc[0]
    start_brent = df['Brent_Close'].iloc[0]

    normalized_fti = (df['FTI_Close'] / start_fti) * 100
    normalized_brent = (df['Brent_Close'] / start_brent) * 100

    # Plot 1: Normalized prices
    ax1.plot(normalized_fti, label='FTI (normalized)')
    ax1.plot(normalized_brent, label='Brent Oil (normalized)')
    ax1.set_title('Normalized Price Comparison (Base = 100)')
    ax1.legend()
    ax1.grid(True)

    # Plot 2: Rolling correlation
    ax2.plot(rolling_corr)
    ax2.set_title('30-Day Rolling Correlation')
    ax2.grid(True)
    ax2.set_ylim(-1, 1)  # Set y-axis limits for correlation

    # Plot 3: Scatter plot with regression line
    sns.regplot(x='Brent_Returns', y='FTI_Returns', data=df, ax=ax3)
    ax3.set_title('Daily Returns Scatter Plot with Regression Line')

    plt.tight_layout()
    return fig

def analyze_correlation():
    try:
        df = fetch_and_prepare_data()

        print(f"Data shape after cleaning: {df.shape}")

        # Calculate correlations
        corr = calculate_correlations(df)

        # Calculate beta
        beta = calculate_beta(df)

        # Create visualization
        print("\nCreating visualization...")
        fig = create_visualization(df, corr['rolling_corr'])

        # Print results
        print("\nCorrelation Analysis Results:")
        print(f"Number of observations: {len(df)}")
        print("\nPrice Level Correlations:")
        print(f"Pearson correlation: {corr['pearson_price']:.4f}")
        print(f"Spearman correlation: {corr['spearman_price']:.4f}")

        print("\nDaily Returns Correlations:")
        print(f"Pearson correlation: {corr['pearson_returns']:.4f}")
        print(f"Spearman correlation: {corr['spearman_returns']:.4f}")

        print(f"\nBeta (sensitivity to oil price movements): {beta:.4f}")

        # Calculate additional statistics
        print("\nSummary Statistics:")
        print("\nFTI Returns:")
        print(df['FTI_Returns'].describe())
        print("\nBrent Oil Returns:")
        print(df['Brent_Returns'].describe())

        # Save the plot
        plt.savefig('fti_brent_correlation.png')
        plt.close()

        return df, corr, beta

    except Exception as e:
        print(f"An error occurred: {str(e)}")
        raise

df, correlations, beta = analyze_correlation()

Data shape after cleaning: (1257, 4)

Creating visualization...

Correlation Analysis Results:
Number of observations: 1257

Price Level Correlations:
Pearson correlation: 0.6085
Spearman correlation: 0.6604

Daily Returns Correlations:
Pearson correlation: 0.4596
Spearman correlation: 0.4742

Beta (sensitivity to oil price movements): 0.3602

Summary Statistics:

FTI Returns:
count    1257.000000
mean        0.000608
std         0.021754
min        -0.178208
25%        -0.010309
50%         0.001295
75%         0.010983
max         0.152756
Name: FTI_Returns, dtype: float64

Brent Oil Returns:
count    1257.000000
mean        0.000522
std         0.027755
min        -0.244036
25%        -0.011824
50%         0.002265
75%         0.014519
max         0.210186
Name: Brent_Returns, dtype: float64


In [13]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Fetch FTI data (using .L suffix for London Stock Exchange)
# Note: For LSE stocks in yfinance, we need to use the .L suffix
ticker = "FTI"
end_date = datetime.now()
start_date = end_date - timedelta(days=365)  # Last year of data

# Fetch the data
stock = yf.Ticker(ticker)
df = stock.history(start=start_date, end=end_date)

def calculate_all_indicators(df):
    """Calculate all technical indicators"""
    def SMA(series, periods):
        return series.rolling(window=periods).mean()

    def EMA(series, periods):
        return series.ewm(span=periods, adjust=False).mean()

    # Trend Indicators
    df['SMA_20'] = SMA(df['Close'], 20)
    df['SMA_50'] = SMA(df['Close'], 50)
    df['SMA_200'] = SMA(df['Close'], 200)
    df['EMA_12'] = EMA(df['Close'], 12)
    df['EMA_26'] = EMA(df['Close'], 26)

    # Momentum Indicators
    # RSI
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    # Stochastic Oscillator
    low_14 = df['Low'].rolling(window=14).min()
    high_14 = df['High'].rolling(window=14).max()
    df['K_percent'] = 100 * ((df['Close'] - low_14) / (high_14 - low_14))
    df['D_percent'] = df['K_percent'].rolling(window=3).mean()

    # MACD
    df['MACD'] = df['EMA_12'] - df['EMA_26']
    df['MACD_Signal'] = EMA(df['MACD'], 9)
    df['MACD_Histogram'] = df['MACD'] - df['MACD_Signal']

    # Volatility Indicators
    # Bollinger Bands
    df['BB_Middle'] = SMA(df['Close'], 20)
    std_dev = df['Close'].rolling(window=20).std()
    df['BB_Upper'] = df['BB_Middle'] + (std_dev * 2)
    df['BB_Lower'] = df['BB_Middle'] - (std_dev * 2)
    df['BB_Width'] = (df['BB_Upper'] - df['BB_Lower']) / df['BB_Middle']

    # Average True Range (ATR)
    high_low = df['High'] - df['Low']
    high_close = abs(df['High'] - df['Close'].shift())
    low_close = abs(df['Low'] - df['Close'].shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    true_range = ranges.max(axis=1)
    df['ATR'] = true_range.rolling(window=14).mean()

    # Volume Indicators
    # On-Balance Volume (OBV)
    df['OBV'] = (np.sign(df['Close'].diff()) * df['Volume']).cumsum()

    # Volume Price Trend
    df['VPT'] = df['Volume'] * ((df['Close'] - df['Close'].shift()) / df['Close'].shift())
    df['VPT'] = df['VPT'].cumsum()

    # Money Flow Index
    typical_price = (df['High'] + df['Low'] + df['Close']) / 3
    money_flow = typical_price * df['Volume']
    positive_flow = money_flow.where(typical_price > typical_price.shift(), 0).rolling(window=14).sum()
    negative_flow = money_flow.where(typical_price < typical_price.shift(), 0).rolling(window=14).sum()
    money_ratio = positive_flow / negative_flow
    df['MFI'] = 100 - (100 / (1 + money_ratio))

    return df

# Calculate indicators
df_with_indicators = calculate_all_indicators(df)

# Create a summary of the latest values
latest_data = df_with_indicators.iloc[-1]

print("\nFTI Stock Analysis Summary")
print(f"Date: {latest_data.name.strftime('%Y-%m-%d')}")
print(f"\nPrice Information:")
print(f"Close: £{latest_data['Close']:.2f}")
print(f"Volume: {latest_data['Volume']:,.0f}")

print("\nTrend Indicators:")
print(f"SMA (20): £{latest_data['SMA_20']:.2f}")
print(f"SMA (50): £{latest_data['SMA_50']:.2f}")
print(f"SMA (200): £{latest_data['SMA_200']:.2f}")

print("\nMomentum Indicators:")
print(f"RSI: {latest_data['RSI']:.2f}")
print(f"Stochastic K%: {latest_data['K_percent']:.2f}")
print(f"Stochastic D%: {latest_data['D_percent']:.2f}")
print(f"MACD: {latest_data['MACD']:.4f}")
print(f"MACD Signal: {latest_data['MACD_Signal']:.4f}")

print("\nVolatility Indicators:")
print(f"Bollinger Upper: £{latest_data['BB_Upper']:.2f}")
print(f"Bollinger Middle: £{latest_data['BB_Middle']:.2f}")
print(f"Bollinger Lower: £{latest_data['BB_Lower']:.2f}")
print(f"BB Width: {latest_data['BB_Width']:.4f}")
print(f"ATR: {latest_data['ATR']:.4f}")

print("\nVolume Indicators:")
print(f"OBV: {latest_data['OBV']:,.0f}")
print(f"MFI: {latest_data['MFI']:.2f}")

# Show the last 5 days of data with all indicators
print("\nLast 5 Days of Technical Analysis:")
print(df_with_indicators.tail())


FTI Stock Analysis Summary
Date: 2024-11-01

Price Information:
Close: £26.28
Volume: 2,408,800

Trend Indicators:
SMA (20): £26.43
SMA (50): £26.12
SMA (200): £24.99

Momentum Indicators:
RSI: 42.04
Stochastic K%: 65.57
Stochastic D%: 57.28
MACD: -0.0534
MACD Signal: -0.0587

Volatility Indicators:
Bollinger Upper: £27.88
Bollinger Middle: £26.43
Bollinger Lower: £24.99
BB Width: 0.1094
ATR: 0.7514

Volume Indicators:
OBV: 95,427,800
MFI: 46.88

Last 5 Days of Technical Analysis:
                                Open   High        Low      Close   Volume  \
Date                                                                         
2024-10-28 00:00:00-04:00  25.799999  26.16  25.650000  26.080000  5341300   
2024-10-29 00:00:00-04:00  26.340000  26.42  25.799999  26.100000  3038000   
2024-10-30 00:00:00-04:00  26.240000  26.52  25.950001  26.090000  3224900   
2024-10-31 00:00:00-04:00  26.290001  26.99  26.200001  26.690001  3882800   
2024-11-01 00:00:00-04:00  26.709999  27.01  

In [32]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Fetch data for FTI (using .L suffix for London Stock Exchange) and Brent Crude Oil
fti_ticker = "DIS"
brent_ticker = "TTE"  # Ticker for Brent Crude Oil on Yahoo Finance
end_date = datetime.now()
start_date = end_date - timedelta(days=365)  # Last year of data

# Fetch FTI data
fti_data = yf.Ticker(fti_ticker).history(start=start_date, end=end_date)

# Fetch Brent Crude Oil data
brent_data = yf.Ticker(brent_ticker).history(start=start_date, end=end_date)

# Ensure both dataframes have 'Date' as index and align dates
fti_data = fti_data[['Close']].rename(columns={'Close': 'FTI_Close'})
brent_data = brent_data[['Close']].rename(columns={'Close': 'Brent_Close'})
data = pd.merge(fti_data, brent_data, left_index=True, right_index=True, how='inner')

# Calculate daily returns
data['FTI_Returns'] = data['FTI_Close'].pct_change()
data['Brent_Returns'] = data['Brent_Close'].pct_change()

# Drop rows with NaN values created by the pct_change
data = data.dropna()

# Calculate Correlation
correlation = data['FTI_Returns'].corr(data['Brent_Returns'])

print(f"Correlation between FTI share price and Brent Crude Oil price (based on daily returns): {correlation:.4f}")

# For completeness, you can include the technical indicators you created previously.
def calculate_all_indicators(df):
    """Calculate all technical indicators"""
    def SMA(series, periods):
        return series.rolling(window=periods).mean()

    def EMA(series, periods):
        return series.ewm(span=periods, adjust=False).mean()

    # Trend Indicators
    df['SMA_20'] = SMA(df['FTI_Close'], 20)
    df['SMA_50'] = SMA(df['FTI_Close'], 50)
    df['SMA_200'] = SMA(df['FTI_Close'], 200)
    df['EMA_12'] = EMA(df['FTI_Close'], 12)
    df['EMA_26'] = EMA(df['FTI_Close'], 26)

    # Momentum Indicators
    delta = df['FTI_Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    low_14 = df['FTI_Close'].rolling(window=14).min()
    high_14 = df['FTI_Close'].rolling(window=14).max()
    df['K_percent'] = 100 * ((df['FTI_Close'] - low_14) / (high_14 - low_14))
    df['D_percent'] = df['K_percent'].rolling(window=3).mean()

    # MACD
    df['MACD'] = df['EMA_12'] - df['EMA_26']
    df['MACD_Signal'] = EMA(df['MACD'], 9)
    df['MACD_Histogram'] = df['MACD'] - df['MACD_Signal']

    # Volatility Indicators
    df['BB_Middle'] = SMA(df['FTI_Close'], 20)
    std_dev = df['FTI_Close'].rolling(window=20).std()
    df['BB_Upper'] = df['BB_Middle'] + (std_dev * 2)
    df['BB_Lower'] = df['BB_Middle'] - (std_dev * 2)
    df['BB_Width'] = (df['BB_Upper'] - df['BB_Lower']) / df['BB_Middle']

    high_low = df['FTI_Close'] - df['FTI_Close'].shift()
    high_close = abs(df['FTI_Close'] - df['FTI_Close'].shift())
    low_close = abs(df['FTI_Close'] - df['FTI_Close'].shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    true_range = ranges.max(axis=1)
    df['ATR'] = true_range.rolling(window=14).mean()

    df['OBV'] = (np.sign(df['FTI_Close'].diff()) * df['FTI_Close']).cumsum()

    typical_price = (df['FTI_Close'] + df['FTI_Close'] + df['FTI_Close']) / 3
    money_flow = typical_price * df['FTI_Close']
    positive_flow = money_flow.where(typical_price > typical_price.shift(), 0).rolling(window=14).sum()
    negative_flow = money_flow.where(typical_price < typical_price.shift(), 0).rolling(window=14).sum()
    money_ratio = positive_flow / negative_flow
    df['MFI'] = 100 - (100 / (1 + money_ratio))

    return df

# Calculate indicators for FTI and merge with Brent
data_with_indicators = calculate_all_indicators(data)

# Print last 5 rows with indicators
print("\nLast 5 days with indicators:")
print(data_with_indicators.tail())


Correlation between FTI share price and Brent Crude Oil price (based on daily returns): 0.1526

Last 5 days with indicators:
                           FTI_Close  Brent_Close  FTI_Returns  Brent_Returns  \
Date                                                                            
2024-10-28 00:00:00-04:00  96.199997    64.699997     0.012312      -0.004922   
2024-10-29 00:00:00-04:00  96.129997    64.029999    -0.000728      -0.010355   
2024-10-30 00:00:00-04:00  95.080002    63.570000    -0.010923      -0.007184   
2024-10-31 00:00:00-04:00  96.199997    62.560001     0.011780      -0.015888   
2024-11-01 00:00:00-04:00  95.809998    62.470001    -0.004054      -0.001439   

                              SMA_20   SMA_50     SMA_200     EMA_12  \
Date                                                                   
2024-10-28 00:00:00-04:00  94.929500  92.7212  100.789928  95.612933   
2024-10-29 00:00:00-04:00  95.033500  92.8274  100.820902  95.692481   
2024-10-30 00:00:00

In [35]:
import yfinance as yf
import pandas as pd
import numpy as np
from scipy.stats import pearsonr

# Download historical data for Disney (DIS) and TotalEnergies (TTE)
tickers = ["DIS", "TTE"]
data = yf.download(tickers, start="2020-01-01", end="2023-01-01")['Adj Close']

# Calculate the daily returns for each stock
data['DIS_Returns'] = data['DIS'].pct_change()
data['TTE_Returns'] = data['TTE'].pct_change()

# 1. Simple Moving Average (SMA)
data['DIS_SMA_20'] = data['DIS'].rolling(window=20).mean()
data['TTE_SMA_20'] = data['TTE'].rolling(window=20).mean()

# 2. Exponential Moving Average (EMA)
data['DIS_EMA_20'] = data['DIS'].ewm(span=20, adjust=False).mean()
data['TTE_EMA_20'] = data['TTE'].ewm(span=20, adjust=False).mean()

# 3. Moving Average Convergence Divergence (MACD)
data['DIS_MACD'] = data['DIS'].ewm(span=12, adjust=False).mean() - data['DIS'].ewm(span=26, adjust=False).mean()
data['TTE_MACD'] = data['TTE'].ewm(span=12, adjust=False).mean() - data['TTE'].ewm(span=26, adjust=False).mean()

# 4. Relative Strength Index (RSI)
def calculate_rsi(series, window=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

data['DIS_RSI'] = calculate_rsi(data['DIS'])
data['TTE_RSI'] = calculate_rsi(data['TTE'])

# 5. Bollinger Bands (20-day SMA + 2 standard deviations)
data['DIS_BB_Upper'] = data['DIS_SMA_20'] + 2 * data['DIS'].rolling(window=20).std()
data['DIS_BB_Lower'] = data['DIS_SMA_20'] - 2 * data['DIS'].rolling(window=20).std()
data['TTE_BB_Upper'] = data['TTE_SMA_20'] + 2 * data['TTE'].rolling(window=20).std()
data['TTE_BB_Lower'] = data['TTE_SMA_20'] - 2 * data['TTE'].rolling(window=20).std()

# 6. On-Balance Volume (OBV)
def calculate_obv(price, volume):
    delta = price.diff()
    obv = (volume * (delta > 0).astype(int)) - (volume * (delta < 0).astype(int))
    return obv.cumsum()

volume_data = yf.download(tickers, start="2020-01-01", end="2023-01-01")['Volume']
data['DIS_OBV'] = calculate_obv(data['DIS'], volume_data['DIS'])
data['TTE_OBV'] = calculate_obv(data['TTE'], volume_data['TTE'])

# Drop any NaN values created by rolling windows
data = data.dropna()

# Calculate correlations for each indicator
indicators = ['Returns', 'SMA_20', 'EMA_20', 'MACD', 'RSI', 'BB_Upper', 'BB_Lower', 'OBV']
correlations = {}

for indicator in indicators:
    dis_col = f'DIS_{indicator}'
    tte_col = f'TTE_{indicator}'
    correlation, _ = pearsonr(data[dis_col], data[tte_col])
    correlations[indicator] = correlation

# Display the correlation results
correlations_df = pd.DataFrame.from_dict(correlations, orient='index', columns=['Correlation'])
print(correlations_df)


[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed


          Correlation
Returns      0.512045
SMA_20      -0.084323
EMA_20      -0.096531
MACD         0.439789
RSI          0.364173
BB_Upper    -0.158339
BB_Lower     0.030055
OBV         -0.341693
