In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
from io import StringIO
from nse_old import NSE
from nsepy import get_history
from market import TRADED_DATES
from helpers import SafeHDFStore, clean_file, rename_columns, get_date, get_daily_volatility
import warnings
import time

In [22]:
symbol_meta = NSE().symbol_meta

In [23]:
from nse import NSE

In [24]:
nse = NSE()

In [27]:
%timeit fetch_eod_data('infy')

10.537630319595337 0.17525029182434082
9.243574142456055 0.3253765106201172
10.180083751678467 0.16495561599731445
11.184508085250854 0.11490511894226074
1 loop, best of 3: 9.57 s per loop


In [26]:
def fetch_eod_data(symbol, start=None):
    '''Fetch all End of Day(EOD) data from NSE'''
    from_date = get_date(start, start=True)
    to_date = get_date(start=False)

    # Get data from NSE
    t1 = time.time()
    try:
        nse_data = get_history(
            symbol=symbol, start=from_date,
            end=to_date, series='EQ'
        )
        if nse_data.empty:
            warnings.warn(
                'No data recieved from NSE for {0} from {1} to {2}'.
                format(symbol, from_date.date(), to_date.date())
            )
            return nse_data
        nse_data.drop(
            ['Series', 'Deliverable Volume'], 1, inplace=True
        )
        nse_data.index = pd.to_datetime(nse_data.index)
    except Exception as e:
        warnings.warn(
            'Could not get data for {0} from NSE due to {1}'.format(symbol, e)
        )
        return pd.DataFrame()
    t2 = time.time()
    rename_columns(nse_data)
    nse_data['symbol'] = [symbol for i in range(len(nse_data))]
    nse_data = nse_data.reset_index().sort_values(['symbol', 'date', 'close'])
    nse_data = nse_data.drop_duplicates(
        subset=['symbol', 'date'], keep='last'
    )
    stock_sprecific_traded_dates = TRADED_DATES[from_date:to_date]
    total_trades = stock_sprecific_traded_dates.join(nse_data).drop(['days_count'], axis=1)
    missing_dates = total_trades.index.difference(nse_data.index)
    missing_count = len(missing_dates)

    if missing_count > 0:
        non_null_close = total_trades[['prev_close', 'close']].dropna()
        if len(non_null_close[non_null_close.close.shift(1) != non_null_close.prev_close]) <= 1:
            total_trades[['symbol', 'close']] = total_trades[['symbol', 'close']].ffill()
            total_trades.prev_close = total_trades.prev_close.fillna(total_trades.close)
            total_trades.open = total_trades.open.fillna(total_trades.close)
            total_trades.high = total_trades.high.fillna(total_trades.close)
            total_trades.low = total_trades.low.fillna(total_trades.close)
            total_trades['last'] = total_trades['last'].fillna(total_trades.close)
            total_trades.vwap = total_trades.vwap.fillna(total_trades.close)
            total_trades.volume = total_trades.volume.fillna(0)
            total_trades.turnover = total_trades.turnover.fillna(0)
        else:
            print('Error with data in {0}'.format(symbol))
    nse_data = nse_data.set_index(['symbol', 'date'])
    nse_data['high_low_spread'] = (nse_data.high - nse_data.low) / nse_data.low
    nse_data['open_close_spread'] = (nse_data.close - nse_data.open) / nse_data.open
    nse_data['simple_returns'] = (
        (nse_data.close - nse_data.prev_close) / nse_data.prev_close
    )
    nse_data['log_returns'] = np.log(nse_data.close / nse_data.prev_close)
    nse_data['daily_volatility'] = get_daily_volatility(nse_data.log_returns)

    # Adjusting other columns for maintaining integrity
    nse_data['pct_deliverble'] = nse_data['pct_deliverble'] * 100
    nse_data = nse_data.astype(np.float)
    t3 = time.time()
    print(t2-t1, t3-t2)
    return nse_data

In [None]:
nse.force_load_data('symbol_meta')