# Download Yahoo Finance Data
Downloading stock price data from Yahoo Finance using yfinance library.

Sources:
* [yfinance library](https://pypi.org/project/yfinance/)
* [Tutorial](https://analyzingalpha.com/yfinance-python)

The notebook contains code used to get company codes, download data from Yahoo, and merge datasets in one file. If you want to load the downloaded data and explore it, you can use ExploreYahooFinanceData notebook.

In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import os

In [18]:
bim = yf.Ticker('BIMAS.IS')
bim_history = bim.history(period='max')
bim_history.shape

(4466, 7)

In [2]:
def download_yahoo_data(codes, path='data/yahoo_finance_unmerged'):
    for code in codes:
        # Turkish company symbols have ".IS" suffix on Yahoo Finance (meaning Istanbul probably)
        ticker = yf.Ticker(code + '.IS')
        df = ticker.history(period='max')
        
        if not df.empty:
            df = df.reset_index()
            df.columns = df.columns.map(lambda x: x.lower())
            df.to_csv(os.path.join(path, code+'.csv'), index=False)

Company codes

In [3]:
wiki_codes = pd.read_csv('data/wiki_company_symbols.csv').squeeze()
bist_codes = pd.read_csv('data/bist100_company_symbols.csv').squeeze()

### Loading company codes from Wikipedia

In [189]:
# # Code to get company symbols from Wikipedia

# url = 'https://en.wikipedia.org/wiki/List_of_companies_listed_on_the_Istanbul_Stock_Exchange'
# wiki_codes = pd.read_html(url)

# wiki_codes = pd.concat(wiki_codes, ignore_index=False)

# wiki_codes = wiki_codes.dropna(subset='Symbol')

# # Removing edit button
# wiki_codes = wiki_codes['Symbol'].str.replace('[edit]', '', regex=False)

# # Removing letter cells
# cond = wiki_codes.map(len) > 1
# wiki_codes = wiki_codes[cond]

# # These are cells with several company wiki_codes separated by a comma and a space
# cond = wiki_codes.map(len) > 5
# multi_wiki_codes = wiki_codes[cond].copy()
# wiki_codes = wiki_codes[~cond]

# # Splitting the multi_wiki_codes and reappending them
# code_list = []
# for code in multi_wiki_codes:
#     code_list += code.split(', ')
# wiki_codes = pd.concat([wiki_codes, pd.Series(code_list)])

# # Removing special letters
# cond = wiki_codes.map(len) < 4
# wiki_codes = wiki_codes[~cond]

# wiki_codes = wiki_codes.reset_index(drop=True)

In [194]:
# 76 out of 100 are in the wiki_codes
# Maybe some were lost in cleaning or were not read by pandas.read_html
wiki_codes.isin(bist_100).sum()

76

Download log (Do not run)

In [89]:
# Some companies were not downloaded for different reasons (see log below)
# download_yahoo_data(codes)

- USDTR.IS: 1d data not available for startTime=-2208994789 and endTime=1669575868. Only 100 years worth of day granularity data are allowed to be fetched per request.
Got error from yahoo api for ticker AKGUV.IS, Error: {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}
- AKGUV.IS: No data found for this date range, symbol may be delisted
Got error from yahoo api for ticker AKFEN.IS, Error: {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}
- AKFEN.IS: No data found for this date range, symbol may be delisted
Got error from yahoo api for ticker AKSEL.IS, Error: {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}
- AKSEL.IS: No data found for this date range, symbol may be delisted
Got error from yahoo api for ticker AKPAZ.IS, Error: {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}
- AKPAZ.IS: No data found for this date range, symbol may be delisted
Got error from yahoo api for

Got error from yahoo api for ticker MRDIN.IS, Error: {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}
- MRDIN.IS: No data found for this date range, symbol may be delisted
Got error from yahoo api for ticker MRTGG.IS, Error: {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}
- MRTGG.IS: No data found for this date range, symbol may be delisted
- NTTUR.IS: 1d data not available for startTime=-2208994789 and endTime=1669576021. Only 100 years worth of day granularity data are allowed to be fetched per request.
- PTOFS.IS: 1d data not available for startTime=-2208994789 and endTime=1669576027. Only 100 years worth of day granularity data are allowed to be fetched per request.
Got error from yahoo api for ticker TKSTL.IS, Error: {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}
- TKSTL.IS: No data found for this date range, symbol may be delisted
Got error from yahoo api for ticker TRCAM.IS, Error: {'code':

Creating a summary of the datasets downloaded

In [105]:
def create_file_summary(path='data/yahoo_finance_unmerged'):
    
    files = os.listdir(path)
    
    data_details = {
        'name': [],
        'start_date': [],
        'end_date': [],
        'size': [],
        'na_in_close': []
    }
    
    for file in files:
        file_path = os.path.join(path, file)
        df = pd.read_csv(file_path, parse_dates=['date'])
    
        data_details['name'].append(file[:-4])
        data_details['start_date'].append(df['date'].min().strftime('%Y-%m-%d'))
        data_details['end_date'].append(df['date'].max().strftime('%Y-%m-%d'))
        data_details['size'].append(df.shape[0])
        data_details['na_in_close'].append(df['close'].isna().sum())
        
    return data_details

In [4]:
# data_details = create_file_summary()
# df = pd.DataFrame.from_dict(data_details)
# df.to_csv('data/yahoo_finance/data_summary.csv', index=False)

df = pd.read_csv('data/yahoo_finance/data_summary.csv', parse_dates=['start_date', 'end_date'])
df.head()

Unnamed: 0,name,start_date,end_date,size,na_in_close
0,ACSEL,2012-07-06,2022-11-25,2680,0
1,ADEL,2000-05-10,2022-11-25,5792,0
2,ADESE,2011-11-03,2022-11-25,2856,0
3,AEFES,2000-05-10,2022-11-25,5709,0
4,AFYON,2000-05-10,2022-11-25,5774,0


In [5]:
df['size'].describe()

count     284.000000
mean     4821.961268
std      1299.515768
min        83.000000
25%      3941.250000
50%      5648.000000
75%      5713.000000
max      5824.000000
Name: size, dtype: float64

### Merging files
Do not run this code

In [214]:
# path = 'data/yahoo_finance'
# files = os.listdir(path)

# df_list = []

# for file in files:
#     file_path = os.path.join(path, file)
#     df = pd.read_csv(file_path)
    
#     # Adding company name column
#     df.insert(0, 'name', file.replace('.csv', ''))
#     df_list.append(df)
    
# merged_df = pd.concat(df_list, ignore_index=True)
# merged_df.to_csv('data/yahoo_finance/merged_yahoo_data.csv', index=False)