In [43]:
#We import these libraries
import urllib.request, json , time, os, difflib, itertools
import pandas as pd
from multiprocessing.dummy import Pool
from datetime import datetime


In [44]:
#an check if we have a functioning internet connection
try:
    import httplib
except:
    import http.client as httplib

def check_internet():
    conn = httplib.HTTPConnection("www.google.com", timeout=5)
    try:
        conn.request("HEAD", "/")
        conn.close()
        return True
    except:
        conn.close()
        return False

In [45]:
check_internet()

True

In [4]:
# this is a function that is taking in the query url
# and a path for the json and the csv to be stored
def get_historic_price(query_url,json_path,csv_path):
    
    while not check_internet():
        print("Could not connect, trying again in 5 seconds...")
        time.sleep(5)
    #we get the stock id by retrieving it from the query url as the string between symbol= and &period 
    stock_id=query_url.split("&period")[0].split("symbol=")[1]
    # if the csv already exists, we update the data simpy 
    if os.path.exists(csv_path+stock_id+'.csv') and os.stat(csv_path+stock_id+'.csv').st_size != 0:
        print("<<<  Historical data of "+stock_id+" already exists, Updating data...")

    try:
        with urllib.request.urlopen(query_url) as url:
            parsed = json.loads(url.read().decode())
    except:
        print("|||  Historical data of "+stock_id+" doesn't exist")
        return

    #otherwise we remove the json and add a new json dump
    else:
        if os.path.exists(json_path+stock_id+'.json'):
            os.remove(json_path+stock_id+'.json')
        with open(json_path+stock_id+'.json', 'w') as outfile:
            json.dump(parsed, outfile, indent=4)

        try:
            Date=[]
            #for each entry in the parsed json we restructure it into lists
            for i in parsed['chart']['result'][0]['timestamp']:
                Date.append(datetime.utcfromtimestamp(int(i)).strftime('%d-%m-%Y'))
            
            Low=parsed['chart']['result'][0]['indicators']['quote'][0]['low']
            Open=parsed['chart']['result'][0]['indicators']['quote'][0]['open']
            Volume=parsed['chart']['result'][0]['indicators']['quote'][0]['volume']
            High=parsed['chart']['result'][0]['indicators']['quote'][0]['high']
            Close=parsed['chart']['result'][0]['indicators']['quote'][0]['close']
            Adjusted_Close=parsed['chart']['result'][0]['indicators']['adjclose'][0]['adjclose']

            #that we put into an array that we write into a csv

            df=pd.DataFrame(list(zip(Date,Low,Open,Volume,High,Close,Adjusted_Close)),columns =['Date','Low','Open','Volume','High','Close','Adjusted Close'])

            if os.path.exists(csv_path+stock_id+'.csv'):
                os.remove(csv_path+stock_id+'.csv')
            df.to_csv(csv_path+stock_id+'.csv', sep=',', index=None)
            print(">>>  Historical data of "+stock_id+" saved")
            return
        except:
            print(">>>  Historical data of "+stock_id+" exists but has no trading data")

In [5]:
#we write the path for the folder structure which is based on different os dependent notations
json_path = os.getcwd()+os.sep+".."+os.sep+"historic_data"+os.sep+"json"+os.sep
csv_path = os.getcwd()+os.sep+".."+os.sep+"historic_data"+os.sep+"csv"+os.sep


In [6]:
# here we create directories if they do not exist
if not os.path.isdir(json_path):
    os.makedirs(json_path)
if not os.path.isdir(csv_path):
    os.makedirs(csv_path)


In [7]:
#we define some predetermine variables
period1 = -1325583000
period2 = 9999999999
interval = "1d"


In [8]:
#and our custom dictionary of dax 40 companies and their stock id
full_ticker_dict={
"Dow Jones":'^DJI',
    "S&P 500":"^GSPC",
    "Nasdaq":"^NDX",
    "Russel":"^RUT"
 }

In [9]:
query_urls=[]
#now we go through all the ticker stock ids and create urls
for ticker in full_ticker_dict.values():
    query_urls.append("https://query1.finance.yahoo.com/v8/finance/chart/"+ticker+"?symbol="+ticker+"&period1=0&period2=9999999999&interval=1d&includePrePost=true&events=div%2Csplit")

In [10]:
#and then we go through all urls in a pool process
with Pool(processes=len(query_urls)) as pool:
    pool.starmap(get_historic_price, zip(query_urls, itertools.repeat(json_path), itertools.repeat(csv_path)))
print("All downloads completed !")

<<<  Historical data of ^GSPC already exists, Updating data...
<<<  Historical data of ^RUT already exists, Updating data...
<<<  Historical data of ^DJI already exists, Updating data...
<<<  Historical data of ^NDX already exists, Updating data...
>>>  Historical data of ^DJI saved
>>>  Historical data of ^NDX saved
>>>  Historical data of ^RUT saved
>>>  Historical data of ^GSPC saved
All downloads completed !


In [11]:
# we lastly create a translate df to write from the stock id the entire name of the stock
df_translate=pd.DataFrame(full_ticker_dict,index=[0]).T.reset_index()
df_translate.columns=["name","shortener"]
df_translate

Unnamed: 0,name,shortener
0,Dow Jones,^DJI
1,S&P 500,^GSPC
2,Nasdaq,^NDX
3,Russel,^RUT


In [12]:
df_translate.to_csv("../data/translate.csv",index=False)

In [13]:
# this proves it has worked
shortener_name=df_translate[df_translate["name"]=="S&P 500"].shortener.values[0]
SP_yahoo=pd.read_csv("../historic_data/csv/"+shortener_name+".csv")
SP_yahoo=SP_yahoo[["Date","Close"]]
SP_yahoo["Date"]=pd.to_datetime(SP_yahoo["Date"],format="%d-%m-%Y")
SP_yahoo

Unnamed: 0,Date,Close
0,1970-01-02,93.000000
1,1970-01-05,93.459999
2,1970-01-06,92.820000
3,1970-01-07,92.629997
4,1970-01-08,92.680000
...,...,...
13822,2024-10-22,5851.200195
13823,2024-10-23,5797.419922
13824,2024-10-24,5809.859863
13825,2024-10-25,5808.120117


In [14]:
# this proves it has worked
shortener_name=df_translate[df_translate["name"]=="Dow Jones"].shortener.values[0]
dow_yahoo=pd.read_csv("../historic_data/csv/"+shortener_name+".csv")
dow_yahoo=dow_yahoo[["Date","Close"]]
dow_yahoo["Date"]=pd.to_datetime(dow_yahoo["Date"],format="%d-%m-%Y")
dow_yahoo

Unnamed: 0,Date,Close
0,1992-01-02,3172.399902
1,1992-01-03,3201.500000
2,1992-01-06,3200.100098
3,1992-01-07,3204.800049
4,1992-01-08,3203.899902
...,...,...
8262,2024-10-22,42924.890625
8263,2024-10-23,42514.949219
8264,2024-10-24,42374.359375
8265,2024-10-25,42114.398438


In [15]:
further_historic_sp=pd.read_csv("../data/SP500.csv")
further_historic_sp=further_historic_sp[["Date","Close"]]
further_historic_sp["Date"]=pd.to_datetime(further_historic_sp["Date"])

In [16]:
further_historic_dow=pd.read_csv("../data/DJA.csv")
further_historic_dow=further_historic_dow.rename(columns={"Unnamed: 0":"Date"})
further_historic_dow=further_historic_dow[["Date","Close"]]
further_historic_dow["Date"]=pd.to_datetime(further_historic_dow["Date"])

In [18]:
merged_almost_100_years_sp=pd.concat([further_historic_sp,SP_yahoo]).drop_duplicates(keep="first")
merged_almost_100_years_sp

Unnamed: 0,Date,Close
0,1927-12-30,17.660000
1,1928-01-03,17.760000
2,1928-01-04,17.719999
3,1928-01-05,17.549999
4,1928-01-06,17.660000
...,...,...
13822,2024-10-22,5851.200195
13823,2024-10-23,5797.419922
13824,2024-10-24,5809.859863
13825,2024-10-25,5808.120117


In [19]:
merged_almost_100_years_dow=pd.concat([further_historic_dow,dow_yahoo]).drop_duplicates(keep="first")
merged_almost_100_years_dow

Unnamed: 0,Date,Close
0,1885-05-02,31.435000
1,1885-05-04,30.661400
2,1885-05-05,30.508700
3,1885-05-06,30.824000
4,1885-05-07,30.503800
...,...,...
8262,2024-10-22,42924.890625
8263,2024-10-23,42514.949219
8264,2024-10-24,42374.359375
8265,2024-10-25,42114.398438


In [20]:
import datetime


In [21]:
election_days=[]
for year in range(2024,1927,-4):
    print(year)
    weekday_number=0
    day=2
    while weekday_number!=1:
        start_datetime = datetime.datetime(year,11,day,00,00,00)
        weekday_number = start_datetime.date().weekday()
        day+=1

    election_days.append(start_datetime)
election_df=pd.DataFrame({"election_day":election_days})

2024
2020
2016
2012
2008
2004
2000
1996
1992
1988
1984
1980
1976
1972
1968
1964
1960
1956
1952
1948
1944
1940
1936
1932
1928


In [22]:
election_df["year"]=election_df["election_day"].astype(str).str[:4]

In [23]:
election_days=[]
for year in range(2024,1886,-4):
    print(year)
    weekday_number=0
    day=2
    while weekday_number!=1:
        start_datetime = datetime.datetime(year,11,day,00,00,00)
        weekday_number = start_datetime.date().weekday()
        day+=1

    election_days.append(start_datetime)
election_df_2=pd.DataFrame({"election_day":election_days})

2024
2020
2016
2012
2008
2004
2000
1996
1992
1988
1984
1980
1976
1972
1968
1964
1960
1956
1952
1948
1944
1940
1936
1932
1928
1924
1920
1916
1912
1908
1904
1900
1896
1892
1888


In [24]:
election_df_2["year"]=election_df_2["election_day"].astype(str).str[:4]


In [25]:
Unified=pd.read_csv("../data/Unified.csv")
Unified["Year"]=Unified["Congress"].str[-10:-6].astype(int)-1
Unified=Unified[["Year","Party Government"]]
Unified["Year"]=Unified["Year"].astype(str)
Unified

Unnamed: 0,Year,Party Government
0,1856,Unified
1,1858,Divided
2,1860,Unified
3,1862,Unified
4,1864,Unified/ Divided2
...,...,...
79,2014,Divided
80,2016,Unified
81,2018,Divided
82,2020,Unified


In [37]:
election_df['after_election_day'] = election_df['election_day'] +  pd.to_timedelta(1, unit='d')
election_df['after_election_week'] = election_df['election_day'] +  pd.to_timedelta(90, unit='d')
election_df['before_election_day'] = election_df['election_day'] -  pd.to_timedelta(1, unit='d')

election_df['before_election_week'] = election_df['election_day'] -  pd.to_timedelta(7, unit='d')
election_df['before_election_180'] = election_df['election_day'] -  pd.to_timedelta(180, unit='d')

election_df['year'] = election_df['year'].astype(str)

election_df_unified=pd.merge(election_df,Unified,left_on="year",right_on="Year")

In [38]:
election_df_2['after_election_day'] = election_df_2['election_day'] +  pd.to_timedelta(1, unit='d')
election_df_2['after_election_week'] = election_df_2['election_day'] +  pd.to_timedelta(180, unit='d')
election_df_2['before_election_day'] = election_df_2['election_day'] -  pd.to_timedelta(1, unit='d')
election_df_2['before_election_180'] = election_df_2['election_day'] -  pd.to_timedelta(180, unit='d')

election_df_2['before_election_week'] = election_df_2['election_day'] -  pd.to_timedelta(7, unit='d')
election_df_2['year'] = election_df_2['year'].astype(str)

election_df_unified_2=pd.merge(election_df_2,Unified,left_on="year",right_on="Year")

In [39]:
counter=0
year_list=[]
entry_list=[]
unified_list=[]
for index, election in election_df_unified.iterrows():

    t2_start=(election["after_election_day"])
    t2_end=(election["after_election_week"])
    df_t2=merged_almost_100_years_sp[(merged_almost_100_years_sp["Date"]>=t2_start)&(merged_almost_100_years_sp["Date"]<=t2_end)]
    mean_t2=(df_t2["Close"].mean())
    t1_end=(election["before_election_day"])
    t1_start=(election["before_election_week"])
    df_t1=merged_almost_100_years_sp[(merged_almost_100_years_sp["Date"]>=t1_start)&(merged_almost_100_years_sp["Date"]<=t1_end)]
    mean_t1=(df_t1["Close"].mean())
    entry=((mean_t2-mean_t1)/mean_t1)
    comp_end=(election["before_election_day"])
    comp_start=(election["before_election_180"])
    df_comp=merged_almost_100_years_sp[(merged_almost_100_years_sp["Date"]>=comp_start)&(merged_almost_100_years_sp["Date"]<=comp_end)]
    df_comp_mean=(df_comp["Close"].mean())
    year_list.append(election["year"])

    entry_list.append(entry*100)
    unified_list.append(election["Party Government"])
    if((mean_t2-mean_t1)>0):
        counter+=1
print(counter)

17


In [40]:
pd.DataFrame({"gain":entry_list}).mean()

gain    2.512591
dtype: float64

In [41]:
pd.DataFrame({"year":year_list,"gain":entry_list,"unified":unified_list})

Unnamed: 0,year,gain,unified
0,2020,15.970712,Unified
1,2016,9.515544,Unified
2,2012,5.35352,Divided
3,2008,-12.114286,Unified
4,2004,5.395025,Unified
5,2000,-10.344639,Unified / Divided
6,1996,9.170842,Divided
7,1992,4.394233,Unified
8,1988,3.8291,Divided
9,1984,4.092344,Divided


In [42]:
pd.DataFrame({"gain":entry_list,"unified":unified_list}).groupby("unified").mean()

Unnamed: 0_level_0,gain
unified,Unnamed: 1_level_1
Divided,2.959936
Unified,3.131156
Unified / Divided,-10.344639


In [33]:
election_df

Unnamed: 0,election_day,year,after_election_day,after_election_week,before_election_day,before_election_week
0,2024-11-05,2024,2024-11-06,2025-05-04,2024-11-04,2024-10-29
1,2020-11-03,2020,2020-11-04,2021-05-02,2020-11-02,2020-10-27
2,2016-11-08,2016,2016-11-09,2017-05-07,2016-11-07,2016-11-01
3,2012-11-06,2012,2012-11-07,2013-05-05,2012-11-05,2012-10-30
4,2008-11-04,2008,2008-11-05,2009-05-03,2008-11-03,2008-10-28
5,2004-11-02,2004,2004-11-03,2005-05-01,2004-11-01,2004-10-26
6,2000-11-07,2000,2000-11-08,2001-05-06,2000-11-06,2000-10-31
7,1996-11-05,1996,1996-11-06,1997-05-04,1996-11-04,1996-10-29
8,1992-11-03,1992,1992-11-04,1993-05-02,1992-11-02,1992-10-27
9,1988-11-08,1988,1988-11-09,1989-05-07,1988-11-07,1988-11-01


In [34]:
counter=0
for index, election in election_df_2.iterrows():
    print(election["election_day"])
    t2_start=(election["after_election_day"])
    t2_end=(election["after_election_week"])
    df_t2=merged_almost_100_years_dow[(merged_almost_100_years_dow["Date"]>=t2_start)&(merged_almost_100_years_dow["Date"]<=t2_end)]
    mean_t2=(df_t2["Close"].mean())
    t1_end=(election["before_election_day"])
    t1_start=(election["before_election_week"])
    df_t1=merged_almost_100_years_dow[(merged_almost_100_years_dow["Date"]>=t1_start)&(merged_almost_100_years_dow["Date"]<=t1_end)]
    mean_t1=(df_t1["Close"].mean())
    entry=((mean_t2-mean_t1)/mean_t1)
    entry_list.append(entry*100)
    if((mean_t2-mean_t1)>0):
        counter+=1
print(counter)

2024-11-05 00:00:00
2020-11-03 00:00:00
2016-11-08 00:00:00
2012-11-06 00:00:00
2008-11-04 00:00:00
2004-11-02 00:00:00
2000-11-07 00:00:00
1996-11-05 00:00:00
1992-11-03 00:00:00
1988-11-08 00:00:00
1984-11-06 00:00:00
1980-11-04 00:00:00
1976-11-02 00:00:00
1972-11-07 00:00:00
1968-11-05 00:00:00
1964-11-03 00:00:00
1960-11-08 00:00:00
1956-11-06 00:00:00
1952-11-04 00:00:00
1948-11-02 00:00:00
1944-11-07 00:00:00
1940-11-05 00:00:00
1936-11-03 00:00:00
1932-11-08 00:00:00
1928-11-06 00:00:00
1924-11-04 00:00:00
1920-11-02 00:00:00
1916-11-07 00:00:00
1912-11-05 00:00:00
1908-11-03 00:00:00
1904-11-08 00:00:00
1900-11-06 00:00:00
1896-11-03 00:00:00
1892-11-08 00:00:00
1888-11-06 00:00:00
22


In [35]:
pd.DataFrame({"gain":entry_list}).mean()

gain    2.649819
dtype: float64

In [36]:
election_df_2

Unnamed: 0,election_day,year,after_election_day,after_election_week,before_election_day,before_election_week
0,2024-11-05,2024,2024-11-06,2025-05-04,2024-11-04,2024-10-29
1,2020-11-03,2020,2020-11-04,2021-05-02,2020-11-02,2020-10-27
2,2016-11-08,2016,2016-11-09,2017-05-07,2016-11-07,2016-11-01
3,2012-11-06,2012,2012-11-07,2013-05-05,2012-11-05,2012-10-30
4,2008-11-04,2008,2008-11-05,2009-05-03,2008-11-03,2008-10-28
5,2004-11-02,2004,2004-11-03,2005-05-01,2004-11-01,2004-10-26
6,2000-11-07,2000,2000-11-08,2001-05-06,2000-11-06,2000-10-31
7,1996-11-05,1996,1996-11-06,1997-05-04,1996-11-04,1996-10-29
8,1992-11-03,1992,1992-11-04,1993-05-02,1992-11-02,1992-10-27
9,1988-11-08,1988,1988-11-09,1989-05-07,1988-11-07,1988-11-01
