In [17]:
#We import these libraries
import urllib.request, json , time, os, difflib, itertools
import pandas as pd
from multiprocessing.dummy import Pool
from datetime import datetime


In [18]:
#an check if we have a functioning internet connection
try:
    import httplib
except:
    import http.client as httplib

def check_internet():
    conn = httplib.HTTPConnection("www.google.com", timeout=5)
    try:
        conn.request("HEAD", "/")
        conn.close()
        return True
    except:
        conn.close()
        return False

In [19]:
check_internet()

True

In [20]:
# this is a function that is taking in the query url
# and a path for the json and the csv to be stored
def get_historic_price(query_url,json_path,csv_path):
    
    while not check_internet():
        print("Could not connect, trying again in 5 seconds...")
        time.sleep(5)
    #we get the stock id by retrieving it from the query url as the string between symbol= and &period 
    stock_id=query_url.split("&period")[0].split("symbol=")[1]
    # if the csv already exists, we update the data simpy 
    if os.path.exists(csv_path+stock_id+'.csv') and os.stat(csv_path+stock_id+'.csv').st_size != 0:
        print("<<<  Historical data of "+stock_id+" already exists, Updating data...")

    try:
        with urllib.request.urlopen(query_url) as url:
            parsed = json.loads(url.read().decode())
    except:
        print("|||  Historical data of "+stock_id+" doesn't exist")
        return

    #otherwise we remove the json and add a new json dump
    else:
        if os.path.exists(json_path+stock_id+'.json'):
            os.remove(json_path+stock_id+'.json')
        with open(json_path+stock_id+'.json', 'w') as outfile:
            json.dump(parsed, outfile, indent=4)

        try:
            Date=[]
            #for each entry in the parsed json we restructure it into lists
            for i in parsed['chart']['result'][0]['timestamp']:
                Date.append(datetime.utcfromtimestamp(int(i)).strftime('%d-%m-%Y'))
            
            Low=parsed['chart']['result'][0]['indicators']['quote'][0]['low']
            Open=parsed['chart']['result'][0]['indicators']['quote'][0]['open']
            Volume=parsed['chart']['result'][0]['indicators']['quote'][0]['volume']
            High=parsed['chart']['result'][0]['indicators']['quote'][0]['high']
            Close=parsed['chart']['result'][0]['indicators']['quote'][0]['close']
            Adjusted_Close=parsed['chart']['result'][0]['indicators']['adjclose'][0]['adjclose']

            #that we put into an array that we write into a csv

            df=pd.DataFrame(list(zip(Date,Low,Open,Volume,High,Close,Adjusted_Close)),columns =['Date','Low','Open','Volume','High','Close','Adjusted Close'])

            if os.path.exists(csv_path+stock_id+'.csv'):
                os.remove(csv_path+stock_id+'.csv')
            df.to_csv(csv_path+stock_id+'.csv', sep=',', index=None)
            print(">>>  Historical data of "+stock_id+" saved")
            return
        except:
            print(">>>  Historical data of "+stock_id+" exists but has no trading data")

In [23]:
#we write the path for the folder structure which is based on different os dependent notations
json_path = os.getcwd()+os.sep+".."+os.sep+"historic_data"+os.sep+"json"+os.sep
csv_path = os.getcwd()+os.sep+".."+os.sep+"historic_data"+os.sep+"csv"+os.sep


In [24]:
# here we create directories if they do not exist
if not os.path.isdir(json_path):
    os.makedirs(json_path)
if not os.path.isdir(csv_path):
    os.makedirs(csv_path)


In [25]:
#we define some predetermine variables
period1 = 0
period2 = 9999999999
interval = "1d"
country_name = "germany"


In [27]:
#and our custom dictionary of dax 40 companies and their stock id
full_ticker_dict={
"Adidas":'ADS.DE',
 "Airbus":'AIR.PA',
 "Allianz":'ALV.DE',
 "BASF":'BAS.DE',
"Bayer":'BAYN.DE',
        "Beiersdorf":"BEI.DE",
 "BMW":'BMW.DE',
 "Brenntag":'BNR.DE',
"Continental":'CON.DE',
    "Covestro": '1COV.DE',
    "Daimler Truck":'DTG.DE',
    "Delivery Hero": 'DHER.DE',
     "Deutsche Börse":'DB1.DE',
    "Deutsche Bank":"DBK.DE",
 "Deutsche Post": 'DHL.DE',
 "Deutsche Telekom":'DTE.DE',
 "E.ON":'EOAN.DE',
     "Fresenius": "FRE.DE",
    "Fresenius Medical Care":'FME.DE',
 "Hannover Rück":'HNR1.DE',
 "HeidelbergCement": 'HEI.DE',
    "Hello Fresh": 'HFG.DE',
     "Henkel":'HEN3.DE',
    "Infineon": 'IFX.DE',
"Mercedes Benz": "MBG.DE",
    "Merck": 'MRK.DE',
 "MTU Aero Engines":'MTX.DE',
 "Münchner Rück":'MUV2.DE',
     "Porsche":'P911.DE',
     "Puma":'PUM.DE',
     "QIAGEN":'QIA.DE',
    "RWE": 'RWE.DE',
     "SAP":'SAP.DE',
 "Sartorius":'SRT3.DE',
    "Siemens": 'SIE.DE',
    "Siemens Healthineers": 'SHL.DE',
    "Symrise":'SY1.DE',
    "Volkswagen":'VOW3.DE',
    "Vonovia": 'VNA.DE',
 "Zalando":'ZAL.DE',}

In [29]:
query_urls=[]
#now we go through all the ticker stock ids and create urls
for ticker in full_ticker_dict.values():
    query_urls.append("https://query1.finance.yahoo.com/v8/finance/chart/"+ticker+"?symbol="+ticker+"&period1=0&period2=9999999999&interval=1d&includePrePost=true&events=div%2Csplit")

In [30]:
#and then we go through all urls in a pool process
with Pool(processes=len(query_urls)) as pool:
    pool.starmap(get_historic_price, zip(query_urls, itertools.repeat(json_path), itertools.repeat(csv_path)))
print("All downloads completed !")

MTX.DEBAYN.DE
ZAL.DE
ALV.DE
MBG.DE
SHL.DE
FRE.DE
BAS.DE
1COV.DE
P911.DE
BEI.DE
ADS.DE

MRK.DE
VOW3.DE
DTE.DE
HEN3.DE
SY1.DE
DBK.DE
SAP.DE
<<<  Historical data of SAP.DE already exists, Updating data...
CON.DE
HFG.DE
<<<  Historical data of CON.DE already exists, Updating data...
DB1.DE
<<<  Historical data of ALV.DE already exists, Updating data...
RWE.DE
DTG.DE
<<<  Historical data of MBG.DE already exists, Updating data...
<<<  Historical data of BEI.DE already exists, Updating data...
<<<  Historical data of FRE.DE already exists, Updating data...
<<<  Historical data of BAS.DE already exists, Updating data...
<<<  Historical data of VOW3.DE already exists, Updating data...
IFX.DE
BNR.DE
SRT3.DE
<<<  Historical data of BNR.DE already exists, Updating data...
VNA.DE
EOAN.DE
<<<  Historical data of VNA.DE already exists, Updating data...
AIR.PA
BMW.DE
<<<  Historical data of BMW.DE already exists, Updating data...
<<<  Historical data of HFG.DE already exists, Updating data...
<<<  Hi

In [31]:
# we lastly create a translate df to write from the stock id the entire name of the stock
df_translate=pd.DataFrame(full_ticker_dict,index=[0]).T.reset_index()
df_translate.columns=["name","shortener"]
df_translate

Unnamed: 0,name,shortener
0,Adidas,ADS.DE
1,Airbus,AIR.PA
2,Allianz,ALV.DE
3,BASF,BAS.DE
4,Bayer,BAYN.DE
5,Beiersdorf,BEI.DE
6,BMW,BMW.DE
7,Brenntag,BNR.DE
8,Continental,CON.DE
9,Covestro,1COV.DE


In [32]:
df_translate.to_csv("../data/translate.csv",index=False)

In [15]:
# this proves it has worked
shortener_name=df_translate[df_translate["name"]=="Puma"].shortener.values[0]
pd.read_csv("../historic_data/csv/"+shortener_name+".csv")

'PUM.DE'