#### Notebook to check if monthly tick data files are missing any dates where the exchange was open (trading days)<br>
Assume some very small stocks will have days with 0 trades, though all trading days should have bid/ask records (from market makers)

In [1]:
import os
import csv
import pandas_market_calendars as mcal
import calendar

In [4]:
# use pandas_market_calendars package to get trading dates for Oslo Stock Exchange
ose = mcal.get_calendar('OSE')

# Fetch schedule and convert time zones
ose_sched = ose.schedule(start_date='2021-08-01', end_date='2022-04-01').index

In [6]:
dir = '..\\..\\data\\TICKS\\'
files = os.listdir(dir)

In [7]:
error_files = set()

In [8]:
for file in files:
    try:
        # split the filename to get the month and year
        month_year = file.split('_')[1]

        last_day_month = calendar.monthrange(int(month_year.split('-')[0]), int(month_year.split('-')[1]))[1]

        # get trading dates for file's month/year
        ose = mcal.get_calendar('OSE')

        if last_day_month < 10:
            last_day_month = '0' + str(last_day_month)
        else:
            last_day_month = str(last_day_month)

        # dataset only contains half of August 2021
        if month_year.split('-')[1] == '08':
            start_date = '-15'
        else:
            start_date = '-01'

        ose_sched = ose.schedule(start_date=month_year + start_date, end_date=month_year + '-' + last_day_month).index.strftime('%Y-%m-%d').to_list()
        ose_sched = set(ose_sched)

        # go through file and create a set containing all dates on which the stock was traded
        traded_dates = set()
        with open(dir + file, newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                date_str = row['Time'][:10]
                traded_dates.add(date_str)


            missing_dates = ose_sched.difference(traded_dates)
            extra_dates = traded_dates.difference(ose_sched)
            if len(missing_dates) > 0 or len(extra_dates) > 0:
                print(file)
                error_files.add(file)
                if len(missing_dates) > 0:
                    print('MISSING DATES:', missing_dates)
                if len(extra_dates) > 0:
                    print('EXTRA DATES:', extra_dates)
                print('-----')
    except:
        print('ERROR, file: ', file)


AKVA_2022-02_TRADES.csv
MISSING DATES: {'2022-02-09'}
-----
ALT_2021-10_TRADES.csv
MISSING DATES: {'2021-10-14', '2021-10-27', '2021-10-18'}
-----
ALT_2021-12_TRADES.csv
MISSING DATES: {'2021-12-02', '2021-12-03', '2021-12-06', '2021-12-09', '2021-12-14'}
-----
ALT_2022-01_TRADES.csv
MISSING DATES: {'2022-01-19', '2022-01-10', '2022-01-11', '2022-01-05', '2022-01-12', '2022-01-18', '2022-01-17', '2022-01-25', '2022-01-06'}
-----
ALT_2022-03_TRADES.csv
MISSING DATES: {'2022-03-02'}
-----
BWIDL_2021-08_TRADES.csv
MISSING DATES: {'2021-08-31'}
-----
BWIDL_2021-09_TRADES.csv
MISSING DATES: {'2021-09-17', '2021-09-13', '2021-09-22'}
-----
BWIDL_2021-10_TRADES.csv
MISSING DATES: {'2021-10-15', '2021-10-21'}
-----
BWIDL_2021-11_TRADES.csv
MISSING DATES: {'2021-11-12', '2021-11-30'}
-----
EAM_2021-09_TRADES.csv
MISSING DATES: {'2021-09-29'}
-----
EAM_2022-02_TRADES.csv
MISSING DATES: {'2022-02-18'}
-----
HBC_2021-12_TRADES.csv
MISSING DATES: {'2021-12-21'}
-----
HOC_2021-08_TRADES.csv
MISSING 