#### Importing Libraries

In [1]:
import urllib.request, json , time, os, difflib, itertools
import pandas as pd
from multiprocessing.dummy import Pool
from datetime import datetime

### Function to fetch stock price for given query_url

It will save the stock data as a csv file inside a folder named "historic_data"

In [2]:
def get_historic_price(query_url,csv_path):
    
    stock_id=query_url.split("&period")[0].split("symbol=")[1]
    
    if os.path.exists(csv_path+stock_id+'.csv') and os.stat(csv_path+stock_id+'.csv').st_size != 0:
        print("<<<  Historical data of "+stock_id+" already exists, Updating data...")

    try:
        with urllib.request.urlopen(query_url) as url:
            parsed = json.loads(url.read().decode())
    except:
        print("|||  Historical data of "+stock_id+" doesn't exist")
        return
    
    else:

        try:
            Date=[]
            for i in parsed['chart']['result'][0]['timestamp']:
                Date.append(datetime.utcfromtimestamp(int(i)).strftime('%d-%m-%Y'))

            Low=parsed['chart']['result'][0]['indicators']['quote'][0]['low']
            Open=parsed['chart']['result'][0]['indicators']['quote'][0]['open']
            Volume=parsed['chart']['result'][0]['indicators']['quote'][0]['volume']
            High=parsed['chart']['result'][0]['indicators']['quote'][0]['high']
            Close=parsed['chart']['result'][0]['indicators']['quote'][0]['close']
            Adjusted_Close=parsed['chart']['result'][0]['indicators']['adjclose'][0]['adjclose']

            df=pd.DataFrame(list(zip(Date,Low,Open,Volume,High,Close,Adjusted_Close)),columns =['Date','Low','Open','Volume','High','Close','Adjusted Close'])

            if os.path.exists(csv_path+stock_id+'.csv'):
                os.remove(csv_path+stock_id+'.csv')
            df.to_csv(csv_path+stock_id+'.csv', sep=',', index=None)
            print(">>>  Historical data of "+stock_id+" saved")
            return
        except:
            print(">>>  Historical data of "+stock_id+" exists but has no trading data")

#### Setting output csv file path

In [24]:
csv_path = os.getcwd()+os.sep+".."+os.sep+"historic_data_2013"+os.sep+"csv"+os.sep

## Create directory if not already present
if not os.path.isdir(csv_path):
    os.makedirs(csv_path)

#### Getting Stock Tickers

Now we need to get Tickers info. I found a list of yahoo tickers on this website: https://investexcel.net/all-yahoo-finance-stock-tickers/#google_vignette. 
    
I've saved the downloaded file in main folder as "Yahoo Tickers.xlsx"

In [5]:
ticker_file_path = "Yahoo Tickers.xlsx"
temp_df = pd.read_excel(ticker_file_path)
print("Total stocks:",len(temp_df))
temp_df.head(10)

Total stocks: 106331


Unnamed: 0,Yahoo Stock Tickers,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7
0,http://investexcel.net,,,,,,,
1,,,,,,,,
2,Ticker,Name,Exchange,Category Name,Country,,,
3,OEDV,"Osage Exploration and Development, Inc.",PNK,,USA,,,Samir Khan
4,AAPL,Apple Inc.,NMS,Electronic Equipment,USA,,,simulationconsultant@gmail.com
5,BAC,Bank of America Corporation,NYQ,Money Center Banks,USA,,,
6,AMZN,"Amazon.com, Inc.",NMS,Catalog & Mail Order Houses,USA,,,This ticker symbol list was downloaded from
7,T,AT&T Inc.,NYQ,Telecom Services - Domestic,USA,,,http://investexcel.net/all-yahoo-finance-stock...
8,GOOG,Alphabet Inc.,NMS,Internet Information Providers,USA,,,and was updated on 2nd September 2017
9,MO,"Altria Group, Inc.",NYQ,Cigarettes,USA,,,


We'll update the dataframe:
1. remove cloumns 5,6,7 as they are not needed
2. remove top 2 rows as they are not needed

In [7]:
temp_df = temp_df.drop(temp_df.columns[[5, 6, 7]], axis=1)
headers = temp_df.iloc[2]
df  = pd.DataFrame(temp_df.values[3:], columns=headers)
print("Total stocks:",len(df))
df.head(10)

Total stocks: 106328


2,Ticker,Name,Exchange,Category Name,Country
0,OEDV,"Osage Exploration and Development, Inc.",PNK,,USA
1,AAPL,Apple Inc.,NMS,Electronic Equipment,USA
2,BAC,Bank of America Corporation,NYQ,Money Center Banks,USA
3,AMZN,"Amazon.com, Inc.",NMS,Catalog & Mail Order Houses,USA
4,T,AT&T Inc.,NYQ,Telecom Services - Domestic,USA
5,GOOG,Alphabet Inc.,NMS,Internet Information Providers,USA
6,MO,"Altria Group, Inc.",NYQ,Cigarettes,USA
7,DAL,"Delta Air Lines, Inc.",NYQ,Major Airlines,USA
8,AA,Alcoa Corporation,NYQ,Aluminum,USA
9,AXP,American Express Company,NYQ,Credit Services,USA


#### We'll randomly select 1000 rows from this dataframe as we need to scrape Stock Prices for around 1000 companies only

In [8]:
df_subset = df.sample(1000)

In [10]:
### Checking if we have any 'Nan' countries in our dataframe

df_subset.Country.unique()

array(['USA', 'India', 'Germany', 'Switzerland', 'United Kingdom',
       'South Korea', nan, 'Indonesia', 'Taiwan', 'France', 'Greece',
       'Thailand', 'Canada', 'Brazil', 'Hong Kong', 'China', 'Mexico',
       'Australia', 'Spain', 'Sweden', 'Singapore', 'Israel', 'Malaysia',
       'Argentina', 'Finland', 'Turkey', 'Russia', 'Ireland',
       'Netherlands', 'New Zealand', 'Italy', 'Norway'], dtype=object)

In [11]:
### Removing rows with "Country" column 'nan'

df_subset.dropna(subset=['Country'],inplace=True)

In [12]:
### Checking subset Dataframe size

df_subset.shape

(901, 5)

#### Adding companies for which the stocks will be downloaded to a list

In [13]:
desired_company_list = df_subset['Name'].tolist()

In [14]:
len(desired_company_list)

901

Get the ticker list for the companies user entered in the desired_company_list

In [17]:
ticker_list=[]
for company in desired_company_list:
    try:
        exact_company_name = (difflib.get_close_matches(company, df_subset['Name'])[0])
        ticker_for_the_company = df_subset.loc[df_subset['Name'] == exact_company_name, 'Ticker'].iloc[0]
        ticker_list.append(ticker_for_the_company)
    except:
        print("Company name "+company+" not found.")

In [18]:
len(ticker_list)

901

#### Generatig URLs for FEB 2013

In [20]:
query_urls=[]
for ticker in ticker_list:
    query_urls.append("https://query1.finance.yahoo.com/v8/finance/chart/"+ticker+"?symbol="+ticker+"&period1=1359657000&period2=1361989800&interval=1d&includePrePost=true&events=div%2Csplit")

In [25]:
with Pool(processes=len(query_urls)) as pool:
    pool.starmap(get_historic_price, zip(query_urls, itertools.repeat(csv_path)))
print("All downloads completed !")

>>>  Historical data of PPTG exists but has no trading data
|||  Historical data of HXWWF doesn't exist
|||  Historical data of MAHAXPO.BO doesn't exist
|||  Historical data of AAARF doesn't exist
|||  Historical data of RI4.MU doesn't exist
|||  Historical data of KKB.IL doesn't exist
>>>  Historical data of BIO3.MU saved
>>>  Historical data of KSCL.NS saved
|||  Historical data of EXR1.BE doesn't exist
|||  Historical data of CRM.SW doesn't exist
|||  Historical data of VITANAGRO.BO doesn't exist
>>>  Historical data of 036830.KQ saved
>>>  Historical data of 064960.KS saved
>>>  Historical data of 140410.KQ saved
|||  Historical data of DRACO.BK doesn't exist
|||  Historical data of 722751.TWO doesn't exist
|||  Historical data of 8AP.F doesn't exist
|||  Historical data of GLOW doesn't exist
>>>  Historical data of EVT.DU saved
>>>  Historical data of LGA.F saved
|||  Historical data of EDRA.AT doesn't exist
>>>  Historical data of PNSE.JK saved
>>>  Historical data of OD8.F saved

|||  Historical data of PFIZER6.BO doesn't exist
|||  Historical data of ODDB.DU doesn't exist
|||  Historical data of 722530.TWO doesn't exist
|||  Historical data of QASPD doesn't exist
|||  Historical data of CNLMU doesn't exist
|||  Historical data of MOTOGENFIN-EQ.NS doesn't exist
>>>  Historical data of BECL.BK exists but has no trading data
|||  Historical data of EMERALD.BO doesn't exist
>>>  Historical data of PEM.V exists but has no trading data
>>>  Historical data of IU8.MU saved
>>>  Historical data of 119830.KQ saved
>>>  Historical data of 4EV.BE saved
>>>  Historical data of AOMD.DU saved
|||  Historical data of THCBF doesn't exist
|||  Historical data of RU000A0JUUF0.ME doesn't exist
>>>  Historical data of FPI-R.BK exists but has no trading data
>>>  Historical data of 0846.HK saved
>>>  Historical data of GPI1.DU saved
>>>  Historical data of B1Z.MU saved
|||  Historical data of 3M2.F doesn't exist
>>>  Historical data of TKD.F saved
|||  Historical data of SOG.TA do

|||  Historical data of 66N.BE doesn't exist
>>>  Historical data of GEOO34.SA saved
>>>  Historical data of ROLTA.NS saved
|||  Historical data of 039670.KQ doesn't exist
|||  Historical data of CNS-R.BK doesn't exist
|||  Historical data of OEM.BE doesn't exist
|||  Historical data of SPLK.SW doesn't exist
>>>  Historical data of XEL.V exists but has no trading data
|||  Historical data of MLC.IR doesn't exist
|||  Historical data of MLNGF doesn't exist
|||  Historical data of TAXA75.SA doesn't exist
>>>  Historical data of ARP.ST saved
>>>  Historical data of BYRA.SG saved
>>>  Historical data of AUTO.ST exists but has no trading data
|||  Historical data of 7TO.BE doesn't exist
|||  Historical data of PM7A.MU doesn't exist
|||  Historical data of 3KH.F doesn't exist
|||  Historical data of AC4.SG doesn't exist
|||  Historical data of URH.F doesn't exist
|||  Historical data of 722760.TWO doesn't exist
|||  Historical data of TTTMW doesn't exist
|||  Historical data of SPICEMOBI.NS 

>>>  Historical data of FLI.NZ exists but has no trading data
>>>  Historical data of HEAR saved
>>>  Historical data of ZB1.F saved
|||  Historical data of IX1.SG doesn't exist
>>>  Historical data of COROMANDEL.BO saved
>>>  Historical data of GOLCA.BO saved
>>>  Historical data of SGGKY saved
|||  Historical data of VW-V.SW doesn't exist
>>>  Historical data of EDC.DU saved
|||  Historical data of RSRZD doesn't exist
>>>  Historical data of BMA saved
>>>  Historical data of PREMIER.NS saved
|||  Historical data of HUP.BE doesn't exist
>>>  Historical data of ULKER.IS saved
>>>  Historical data of NAZ saved
>>>  Historical data of BRSR3.SA saved
>>>  Historical data of TPO.MU saved
|||  Historical data of 0GA.BE doesn't exist
|||  Historical data of OXFCD doesn't exist
|||  Historical data of AVPFF doesn't exist
|||  Historical data of GNH.F doesn't exist
>>>  Historical data of UNS.AX exists but has no trading data
|||  Historical data of STCINDIA6.BO doesn't exist
|||  Historical d

|||  Historical data of EMA-PF.TO doesn't exist
>>>  Historical data of HDFNFTYINAV.BO exists but has no trading data
|||  Historical data of LACOMERUBC.MX doesn't exist
|||  Historical data of C3B.BE doesn't exist
|||  Historical data of HHI.MU doesn't exist
|||  Historical data of ISHMS.BO doesn't exist
>>>  Historical data of M5D.BE saved
>>>  Historical data of OU6.F saved
|||  Historical data of FQV.F doesn't exist
|||  Historical data of 0LN9.L doesn't exist
|||  Historical data of VODOF doesn't exist
>>>  Historical data of BMSR.JK saved
>>>  Historical data of 3CKN.BE saved
>>>  Historical data of 5178.KL saved
>>>  Historical data of VHM.SG saved
>>>  Historical data of XUN.BE saved
|||  Historical data of VLNBP doesn't exist
|||  Historical data of NYWKF doesn't exist
>>>  Historical data of UNS.TO saved
>>>  Historical data of KAO.BE saved
|||  Historical data of TSE.BK doesn't exist
|||  Historical data of FBMCF doesn't exist
|||  Historical data of EBY.V doesn't exist
||| 

#### Repeating the same steps for FEB 2014

In [26]:
csv_path = os.getcwd()+os.sep+".."+os.sep+"historic_data_2014"+os.sep+"csv"+os.sep

## Create directory if not already present
if not os.path.isdir(csv_path):
    os.makedirs(csv_path)

#### Generating URLs for FEB 2014

In [27]:
query_urls2=[]
for ticker in ticker_list:
    query_urls2.append("https://query1.finance.yahoo.com/v8/finance/chart/"+ticker+"?symbol="+ticker+"&period1=1391193000&period2=1393525800&interval=1d&includePrePost=true&events=div%2Csplit")

In [28]:
with Pool(processes=len(query_urls2)) as pool:
    pool.starmap(get_historic_price, zip(query_urls2, itertools.repeat(csv_path)))
print("All downloads completed !")

>>>  Historical data of PPTG exists but has no trading data|||  Historical data of RI4.MU doesn't exist
|||  Historical data of AAARF doesn't exist

|||  Historical data of MAHAXPO.BO doesn't exist
|||  Historical data of HXWWF doesn't exist
|||  Historical data of KKB.IL doesn't exist
|||  Historical data of EXR1.BE doesn't exist
>>>  Historical data of BIO3.MU saved
>>>  Historical data of KSCL.NS saved
|||  Historical data of EDRA.AT doesn't exist
|||  Historical data of CRM.SW doesn't exist
|||  Historical data of 722751.TWO doesn't exist
|||  Historical data of GLOW doesn't exist
|||  Historical data of VITANAGRO.BO doesn't exist
|||  Historical data of HLBYL doesn't exist
>>>  Historical data of PNSE.JK saved
>>>  Historical data of 140410.KQ saved
>>>  Historical data of MOLG exists but has no trading data
>>>  Historical data of AOT-R.BK exists but has no trading data
>>>  Historical data of EVT.DU saved
>>>  Historical data of 036830.KQ saved
>>>  Historical data of 064960.KS 

|||  Historical data of ECOCQ doesn't exist
>>>  Historical data of 3MU.F saved
>>>  Historical data of PER.MU saved
>>>  Historical data of 0357.HK saved
>>>  Historical data of ALGIL.PA saved
>>>  Historical data of 8107.TWO saved
>>>  Historical data of LLY.BE saved
>>>  Historical data of INDIANHUME.NS saved
|||  Historical data of CXO doesn't exist
|||  Historical data of FRL.NS doesn't exist
|||  Historical data of T97.DU doesn't exist
|||  Historical data of XT4Y.BE doesn't exist
|||  Historical data of 3089.HK doesn't exist
|||  Historical data of NDX1.SW doesn't exist
|||  Historical data of TAE1.DU doesn't exist
|||  Historical data of LUXA.SG doesn't exist
|||  Historical data of TTM.SW doesn't exist
|||  Historical data of MLTPF doesn't exist
|||  Historical data of SC7.F doesn't exist
|||  Historical data of IMCFINA.BO doesn't exist
|||  Historical data of VLLX doesn't exist
|||  Historical data of CEAI doesn't exist
|||  Historical data of RELICAB.BO doesn't exist
|||  Hi

|||  Historical data of AC4.SG doesn't exist
|||  Historical data of BEMG doesn't exist
|||  Historical data of VPO.BK doesn't exist
>>>  Historical data of ETP exists but has no trading data
>>>  Historical data of BVB.BE saved
|||  Historical data of URH.F doesn't exist
|||  Historical data of THD.BE doesn't exist
|||  Historical data of BAM-PFB.TO doesn't exist
>>>  Historical data of BBY.SG saved
|||  Historical data of GILADAFINS.BO doesn't exist
>>>  Historical data of AUTO.ST exists but has no trading data
>>>  Historical data of ACS-D.MC exists but has no trading data
|||  Historical data of MRSKY doesn't exist
>>>  Historical data of CPH.BK saved
|||  Historical data of FCSSOFT-EQ.NS doesn't exist
<<<  Historical data of FGR.DU already exists, Updating data...
|||  Historical data of BRS.HM doesn't exist
>>>  Historical data of NEON saved
>>>  Historical data of 003410.KS saved
>>>  Historical data of TIMETECHNO.BO saved
>>>  Historical data of 0102.HK saved
>>>  Historical da

|||  Historical data of SABINA.BK doesn't exist
|||  Historical data of FBMCF doesn't exist
|||  Historical data of 1599.HK doesn't exist
|||  Historical data of TLVA doesn't exist
|||  Historical data of 0NW8.L doesn't exist
|||  Historical data of 041590.KQ doesn't exist
|||  Historical data of HINDNATGLS.NS doesn't exist
|||  Historical data of VHM.SG doesn't exist
|||  Historical data of QIC-U.V doesn't exist
|||  Historical data of PTG.BK doesn't exist
|||  Historical data of ESSAROIL.NS doesn't exist
|||  Historical data of CLTS doesn't exist
|||  Historical data of 721718.TWO doesn't exist
|||  Historical data of PTOAF doesn't exist
|||  Historical data of LPE.SG doesn't exist
|||  Historical data of DBE.F doesn't exist
|||  Historical data of CVN.V doesn't exist
|||  Historical data of VPCOU doesn't exist
|||  Historical data of EMA-PF.TO doesn't exist
|||  Historical data of XXT.F doesn't exist
|||  Historical data of ISHMS.BO doesn't exist
|||  Historical data of DARSHAN.BO d

### All done!!

Check historic_data_2013 & historic_data_2014 for csv files