In [3]:
from market import Market
import pandas as pd

%matplotlib inline

In [4]:
import warnings
from datetime import datetime
from io import StringIO

import numpy as np
import pandas as pd
import requests
from nsepy import get_history

import concurrent.futures
from helpers import rename_columns

In [24]:
m = Market()
traded_dates = pd.read_hdf('constants.h5', 'traded_dates')

In [32]:
def fetch_historical_data(symbol, start=None):
    '''Fetch all data from NSE and Adj Close from Yahoo Finance'''
    from_date = m.get_date(start, start=True)
    to_date = m.get_date(date='2006-09-01', start=False)
    # Get data from NSE
    try:
        nse_data = get_history(
            symbol=symbol, start=from_date,
            end=to_date, series='EQ'
        )
        nse_data = pd.concat([nse_data_EQ, nse_data_BE, nse_data_BL])
        if nse_data.empty:
            warnings.warn(
                'No data recieved from NSE for {0} from {1} to {2}'.
                format(symbol, from_date.date(), to_date.date())
            )
            return nse_data
        nse_data.drop(
            ['Deliverable Volume'], 1, inplace=True
        )
        nse_data.index = pd.to_datetime(nse_data.index)
    except Exception as e:
        warnings.warn(
            'Could not get data for {0} from NSE due to {1}'.format(symbol, e)
        )
        return pd.DataFrame()

    rename_columns(nse_data)
    nse_data['symbol'] = [symbol for i in range(len(nse_data))]
    nse_data = nse_data.reset_index().sort_values(['date', 'symbol', 'series', 'close'])
    nse_data = nse_data.drop_duplicates(
        subset=['symbol', 'date'], keep='last'
    )
    stock_sprecific_traded_dates = traded_dates[from_date:]
    total_trades = stock_sprecific_traded_dates.join(nse_data).drop(['days_count'], axis=1)
    missing_dates = total_trades.index.difference(nse_data.index)
    missing_count = len(missing_dates)

    if missing_count > 0:
        non_null_close = total_trades[['prev_close', 'close']].dropna()
        if len(non_null_close[non_null_close.close.shift(1) != non_null_close.prev_close]) <= 1:
            total_trades[['symbol', 'close',]] = total_trades[['symbol', 'close',]].ffill()
            total_trades.prev_close = total_trades.prev_close.fillna(total_trades.close)
            total_trades.open = total_trades.open.fillna(total_trades.close)
            total_trades.high = total_trades.high.fillna(total_trades.close)
            total_trades.low = total_trades.low.fillna(total_trades.close)
            total_trades['last'] = total_trades['last'].fillna(total_trades.close)
            total_trades.vwap = total_trades.vwap.fillna(total_trades.close)
            total_trades.volume = total_trades.volume.fillna(0)
            total_trades.turnover = total_trades.turnover.fillna(0)
        else:
            print(missing_dates)
#     nse_data['simple_returns'] = (
#         (nse_data.close - nse_data.prev_close) / nse_data.prev_close
#     )
#     nse_data['log_returns'] = np.log(nse_data.close / nse_data.prev_close)
#     nse_data = m.handle_abnormal_returns(symbol_data=nse_data)
#     nse_data['daily_volatility'] = m.get_daily_volatility(nse_data.log_returns)

#     # Adjusting other columns for maintaining integrity
#     nse_data.volume = nse_data.volume.astype(np.float)
#     nse_data.trades = nse_data.trades.astype(np.float)
#     nse_data['pct_deliverble'] = nse_data['pct_deliverble'] * 100
    return nse_data

In [33]:
data = fetch_historical_data('hindalco', '2006-05-01')

In [27]:
data

Unnamed: 0,date,symbol,series,prev_close,open,high,low,last,close,vwap,volume,turnover,trades,pct_deliverble
0,2006-05-02,hindalco,EQ,224.45,230.00,240.00,230.00,235.90,235.95,235.29,10137998.0,2.385404e+09,,0.5347
1,2006-05-03,hindalco,EQ,235.95,236.00,239.80,227.05,230.95,231.05,230.72,6663868.0,1.537466e+09,,0.5488
2,2006-05-04,hindalco,EQ,231.05,235.00,235.00,222.00,224.70,224.90,228.39,4884911.0,1.115680e+09,,0.4670
3,2006-05-05,hindalco,EQ,224.90,226.50,233.00,225.25,230.00,229.95,229.94,4064947.0,9.347110e+08,,0.3783
4,2006-05-08,hindalco,EQ,229.95,230.00,233.95,227.65,228.75,228.75,230.30,4222862.0,9.725140e+08,,0.5623
5,2006-05-09,hindalco,EQ,228.75,229.00,231.00,222.80,230.85,229.25,226.55,3584834.0,8.121470e+08,,0.4326
6,2006-05-10,hindalco,EQ,229.25,231.10,243.80,231.10,241.50,239.45,235.36,8214673.0,1.933381e+09,,0.5311
7,2006-05-11,hindalco,EQ,239.45,240.50,248.50,237.20,244.10,243.20,244.19,9068591.0,2.214463e+09,,0.4329
8,2006-05-12,hindalco,EQ,243.20,244.00,251.30,241.10,243.30,242.85,246.47,8524482.0,2.101034e+09,,0.4771
9,2006-05-15,hindalco,EQ,242.85,244.00,244.00,205.95,210.00,209.60,222.13,15781775.0,3.505589e+09,,0.4566


In [None]:
data['2010-10':'2010-11'][['series', 'prev_close', 'close']]

In [None]:
data.close.plot()

In [None]:
days = pd.read_hdf('constants.h5', 'traded_dates')

In [None]:
data = days.join(data).drop(['days_count'], axis=1)['2012':]

In [None]:
data.dropna()