In [2]:
import requests
import pandas as pd
import datetime
import yfinance as yf
import numpy as np
import os
from scipy.signal import savgol_filter

from download_SNP500_OHLCV import get_SNP500_stock_OHLCV, download_stock_OHLCV, plot_OHLC

In [2]:
MMM_OHLCV = get_SNP500_stock_OHLCV("MMM")

plot_OHLC(MMM_OHLCV, "MMM")

In [3]:
def smooth_values(values):
    """
    Smooth a series of values using a Savitzky-Golay filter.

    Parameters:
    values (pandas.Series): Series containing values to be smoothed.

    Returns:
    pandas.Series: Series containing smoothed values.
    """
    smooth_values = savgol_filter(values, 3, 1)
    return pd.Series(smooth_values, index=values.index, name=values.name)

smooth_MMM_C = smooth_values(MMM_OHLCV['Close'])

plot_OHLC(smooth_MMM_C, "MMM")

In [4]:
def compute_return(prices):
    """
    Compute daily returns and log returns from a series of prices.

    Parameters:
    prices (pandas.Series or pandas.DataFrame): Series or DataFrame containing price data.

    Returns:
    pandas.DataFrame: DataFrame containing columns for daily returns and log returns.
    """
    ret = prices.pct_change()
    log_ret = np.log(1 + ret)
    RL_df = pd.DataFrame({'Return' :ret, 'Log Return': log_ret}, index=prices.index)

    return RL_df
    
smooth_MMM_RL = compute_return(smooth_MMM_C)

smooth_MMM_RL.head(2)

Unnamed: 0_level_0,Return,Log Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-12-01,,
2015-12-02,-0.00803,-0.008062


In [8]:
SNP500_Close = download_stock_OHLCV("^GSPC")['Close']

SNP500_Close.head(2)

^GSPC downloaded


Date
2015-12-01    2102.629883
2015-12-02    2079.510010
Name: Close, dtype: float64

In [10]:
compute_return(SNP500_Close).head(2)

Unnamed: 0_level_0,Return,Log Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-12-01,,
2015-12-02,-0.010996,-0.011057


In [9]:
def add_SNP500_return_diff(Return_df, SNP500_prices):
    """
    Add a column representing the difference between stock return and S&P 500 return.

    Parameters:
    Return_df (pandas.DataFrame): DataFrame containing return data for a stock.
    SNP500_prices (pandas.Series): Series with S&P 500 prices.

    Returns:
    pandas.DataFrame: Modified DataFrame with 'SNP500_return_diff' column added.
    """
    SNP500_prices.name = "SNP500_prices"

    df = Return_df.merge(SNP500_prices, 'left', left_index=True, right_index=True)

    df['SNP500_prices'].ffill(inplace=True)

    df['SNP500_return_diff'] = df['Return'] - df['SNP500_prices'].pct_change()

    ret_SNPDiff_df = df.drop(columns=['SNP500_prices'])

    return ret_SNPDiff_df

smooth_MMM_RLSNPDiff = add_SNP500_return_diff(smooth_MMM_RL, SNP500_Close)

smooth_MMM_RLSNPDiff.head(2)

Unnamed: 0_level_0,Return,Log Return,SNP500_return_diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-12-01,,,
2015-12-02,-0.00803,-0.008062,0.002966


In [12]:
def save_SNP500_stock_smooth_Ret_SNPDiff(ret_SNPDiff_df, symbol):
    """
    Save smoothed returns and S&P 500 return differences for a specific stock symbol to a CSV file.

    Parameters:
    ret_SNPDiff_df (pandas.DataFrame): DataFrame containing smoothed returns and S&P 500 return differences.
    symbol (str): Stock symbol used as part of the CSV filename.
    """
    ret_SNPDiff_df.to_csv("C:/Users/loris/Desktop/td/data/SNP500_smooth_Ret_SNPDiff/"+symbol+".csv")

save_SNP500_stock_smooth_Ret_SNPDiff(smooth_MMM_RLSNPDiff, "MMM")

def get_SNP500_stock_smooth_Ret_SNPDiff(symbol):
    """
    Load smoothed returns and S&P 500 return differences for a specific stock symbol from a CSV file.

    Parameters:
    symbol (str): Stock symbol used to identify the CSV file.

    Returns:
    pandas.DataFrame: DataFrame containing smoothed returns and S&P 500 return differences for the specified stock symbol.
    """
    return pd.read_csv("C:/Users/loris/Desktop/td/data/SNP500_smooth_Ret_SNPDiff/"+symbol+".csv", index_col='Date', parse_dates=['Date'])

get_SNP500_stock_smooth_Ret_SNPDiff("MMM").head(2)

Unnamed: 0_level_0,Return,Log Return,SNP500_return_diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-12-01,,,
2015-12-02,-0.00803,-0.008062,0.002966


In [20]:
def compute_RetDiff_stats(ret_SNPDiff_df):

    def first_day_better_than_SNP(ret_SNPDiff_df):
        return ret_SNPDiff_df['SNP500_return_diff'][-1] > 0 and ret_SNPDiff_df['SNP500_return_diff'][-2] < 0
    
    def consecutive_days_same_sign(ret_SNPDiff_df):
        
    
    nbr_days_same_sign_list = consecutive_days_same_sign(ret_SNPDiff_df['SNP500_return_diff'][1:])

    return {
        "First_day_better_than_SNP" : first_day_better_than_SNP(ret_SNPDiff_df)
    }

compute_RetDiff_stats(smooth_MMM_RLSNPDiff)


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



{'First_day_better_than_SNP': True}

In [37]:
smooth_MMM_RLSNPDiff['SNP500_return_diff']

Date
2015-12-01         NaN
2015-12-02    0.002966
2015-12-03    0.017200
2015-12-04   -0.016148
2015-12-07    0.012113
                ...   
2023-12-27    0.008966
2023-12-28    0.003335
2023-12-29    0.006671
2024-01-02    0.002177
2024-01-03    0.000654
Name: SNP500_return_diff, Length: 2036, dtype: float64

In [43]:
def consecutive_days_same_sign(ret_SNPDiff_df):

    ret_SNPDiff_df_tmp = ret_SNPDiff_df[1:].copy()

    ret_SNPDiff_df_tmp['Positive'] = ret_SNPDiff_df_tmp['SNP500_return_diff'] >= 0

    ret_SNPDiff_df_tmp['Change Signe'] = ret_SNPDiff_df_tmp['Positive'].values != pd.concat([ret_SNPDiff_df_tmp['Positive'][:1],
                                                                                             ret_SNPDiff_df_tmp['Positive'][:-1]]).values

    ret_SNPDiff_df_tmp['ID same sign'] = ret_SNPDiff_df_tmp['Change Signe'].cumsum()

    return ret_SNPDiff_df_tmp

consecutive_days_same_sign(smooth_MMM_RLSNPDiff)

Unnamed: 0_level_0,Return,Log Return,SNP500_return_diff,Positive,Change Signe,ID same sign
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-12-02,-0.008030,-0.008062,0.002966,True,False,0
2015-12-03,0.002827,0.002823,0.017200,True,False,0
2015-12-04,0.004378,0.004368,-0.016148,False,True,1
2015-12-07,0.005124,0.005111,0.012113,True,True,2
2015-12-08,-0.004590,-0.004601,0.001900,True,False,2
...,...,...,...,...,...,...
2023-12-27,0.010397,0.010343,0.008966,True,False,1104
2023-12-28,0.003706,0.003699,0.003335,True,False,1104
2023-12-29,0.003844,0.003837,0.006671,True,False,1104
2024-01-02,-0.003483,-0.003490,0.002177,True,False,1104


In [45]:
pd.DataFrame([])