In [21]:
import pandas as pd
import numpy as np
from bokeh.io import show, output_notebook, push_notebook
from bokeh.plotting import figure

from bokeh.models import CategoricalColorMapper, HoverTool, ColumnDataSource, Panel
from bokeh.models.widgets import CheckboxGroup, Slider, RangeSlider, Tabs, DateRangeSlider, CheckboxButtonGroup,RadioButtonGroup

from bokeh.layouts import column, row, WidgetBox
from bokeh.palettes import Category20_16

from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
import yaml
from bokeh.themes import Theme
from datetime import datetime, timedelta
import time
from time import mktime
import scipy.special

from scipy.stats import gaussian_kde
pd.set_option('display.max_rows', None)
output_notebook()

In [47]:
niftyData = pd.read_csv('NSEI.csv');
dayMapping = {-1 : "all_days", 0:'Monday', 1 : 'Tuesday', 2 : 'Wednesday', 3 :'Thursday', 4 : 'Friday', 5 : 'Saturday', 6 : 'Sunday'}
niftyData['%change'] = niftyData['Close'].pct_change() * 100;
niftyData['prev_abs_change'] = niftyData['Close'].diff();
niftyData['intraday_abs_change'] = niftyData['Close'] - niftyData['Open'];
niftyData['intraday_abs_Range'] = niftyData['High'] - niftyData['Low'];
niftyData['Date'] = niftyData['Date'].apply(pd.to_datetime)
#niftyData['Date2'] = niftyData['Date'].dt.date
niftyData['DayOfWeek'] = niftyData['Date'].dt.dayofweek # start with 0 and thursday is on 3
niftyData = niftyData.fillna(0, axis=1)
colorPallete = ['#3B0F6F', '#DD4968', '#FD9F6C', '#000003','#FBFCBF', '#8C2980']

In [16]:
filtered = niftyData[(niftyData['intraday_abs_change'] > 300.0) | (niftyData['intraday_abs_change'] < -300.0)]
filtered

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,%change,prev_abs_change,intraday_abs_change,intraday_abs_Range,DayOfWeek
86,2008-01-21,5705.0,5705.0,4977.100098,5208.799805,5208.799805,0.0,-8.702435,-496.5,-496.200195,727.899902,0
87,2008-01-22,5203.350098,5203.350098,4448.5,4899.299805,4899.299805,0.0,-5.941868,-309.5,-304.050293,754.850098,1
88,2008-01-23,4903.049805,5328.049805,4891.600098,5203.399902,5203.399902,0.0,6.207011,304.100097,300.350097,436.449707,2
90,2008-01-25,5035.049805,5399.25,5035.049805,5383.350098,5383.350098,0.0,6.951492,349.899903,348.300293,364.200195,4
274,2008-10-24,2936.25,2936.25,2525.050049,2584.0,2584.0,0.0,-12.202909,-359.149902,-352.25,411.199951,4
404,2009-05-18,3673.149902,4384.299805,3673.149902,4323.149902,4323.149902,0.0,17.744066,651.5,650.0,711.149903,0
2256,2016-11-09,8067.5,8476.200195,8002.25,8432.0,8432.0,314300.0,-1.305661,-111.549805,364.5,473.950195,2
2962,2019-09-20,10746.799805,11381.900391,10691.0,11274.200195,11274.200195,1400.0,5.319113,569.40039,527.40039,690.900391,4
3079,2020-03-12,10039.950195,10040.75,9508.0,9590.150391,9590.150391,1343500.0,-8.301939,-868.25,-449.799804,532.75,3
3080,2020-03-13,9107.599609,10159.400391,8555.150391,9955.200195,9955.200195,1388000.0,3.806508,365.049804,847.600586,1604.25,4


In [72]:
def modify_doc(doc):
    #https://github.com/bokeh/bokeh/blob/2.0.2/examples/howto/server_embed/notebook_embed.ipynb
    ##https://towardsdatascience.com/data-visualization-with-bokeh-in-python-part-ii-interactions-a4cf994e2512
    #https://towardsdatascience.com/data-visualization-with-bokeh-in-python-part-iii-a-complete-dashboard-dc6a86aa6e23
    #https://github.com/WillKoehrsen/Bokeh-Python-Visualization/blob/master/interactive/exploration/interactive_development.ipynb
    def style(p):
            # Title 
            p.title.align = 'center'
            p.title.text_font_size = '20pt'
            p.title.text_font = 'serif'

            # Axis titles
            p.xaxis.axis_label_text_font_size = '14pt'
            p.xaxis.axis_label_text_font_style = 'bold'
            p.yaxis.axis_label_text_font_size = '14pt'
            p.yaxis.axis_label_text_font_style = 'bold'

            # Tick labels
            p.xaxis.major_label_text_font_size = '12pt'
            p.yaxis.major_label_text_font_size = '12pt'

            return p

    def make_plot(src):
            # Blank plot with correct labels
            p = figure(plot_width = 700, plot_height = 700, 
                      title = 'Histogram of Nifty Movement with Days',
                      x_axis_label = 'Movement', y_axis_label = 'Proportion')

            # Quad glyphs to create a histogram
            p.quad(source = src, bottom = 0, top = 'proportion', left = 'left', right = 'right',
                  color = 'color', fill_alpha = 0.7, hover_fill_color = 'color', legend = 'name',
                  hover_fill_alpha = 1.0, line_color = 'black')
            
            #p.line('x',  'pdf', source = src,color = "firebrick", line_width=4, alpha=0.7, legend_label="PDF")
            p.multi_line('x',  'proportion', color='blue',source = src, line_width=3, alpha=0.7, legend_label="average")
            #p.line('x', 'cdf', source = src, line_color="orange", line_width=2, alpha=0.7, legend_label="CDF")


            # Hover tool with vline mode
            hover = HoverTool(tooltips=[('Day', '@name'), 
                                        ('Range', '@interval'),
                                        ('No. Of Days', '@frequency'),
                                       ('Fraction', '@proportion'),
                                       ('pdf', '@pdf')],
                              mode='vline')

            p.add_tools(hover)

            # Styling
            p = style(p)

            return p
        
        
    def make_dataset(dataframe, sel_column, selected_days,dateRange,bin_width=10,range_start = -800, range_end = 300):
          range_extent = range_end - range_start
          dayMapping = {"all_days" : -1 ,'Monday' :  0, 'Tuesday' :1 , 'Wednesday': 2 , 
                       'Thursday': 3 , 'Friday': 4 , 'Saturday': 5, 'Sunday' : 6}
          all_data = pd.DataFrame(columns=['proportion', 'left', 'right', 
                                                'interval',
                                                'name', 'color','frequency'])
          startDate = dateRange[0] ##datetime.date.fromtimestamp(dateRange[0])
          endDate = dateRange[1] ##datetime.date.fromtimestamp(dateRange[1])
          dataframe = dataframe[(niftyData['Date']>startDate) & (niftyData['Date'] <endDate)]  # & (niftyData['Date'] <endDate)
          for i, day in enumerate(selected_days) :
            if (day == 'all_days') : ## pass -1 for the case of all data set..
              subset = dataframe
            else :
              sel_dateValue = dayMapping[day]
              subset = dataframe[dataframe['DayOfWeek'] == sel_dateValue] # subset where the given day = selectedday

        #             kde = gaussian_kde(subset['arr_delay'], bw_method=bandwidth)
        #             # Evenly space x values
        #             x = np.linspace(range_start, range_end, 100)
        #             # Evaluate pdf at every value of x
        #             y = kde.pdf(x)
        #             xs.append(list(x))
        #             ys.append(list(y))
            mu = subset[sel_column].mean()
            sigma= subset[sel_column].std()
            hist, edges = np.histogram(subset[sel_column], bins = int(range_extent / bin_width),range = [range_start, range_end])
            arr_df = pd.DataFrame({'proportion': hist / np.sum(hist),"frequency" : hist,
                                    "left": edges[:-1],
                                    "right": edges[1:]})
            arr_df["interval"] = ["%d to %d" % (left, right) for left, 
                                  right in zip(arr_df["left"], arr_df["right"])]
                    # Assign the carrier for labels
            arr_df['name'] = day
            # Color each carrier differently
            arr_df['color'] = colorPallete[i]
            arr_df['x'] = (arr_df['left']+arr_df['right'])/2
            arr_df['pdf'] = 1/(sigma * np.sqrt(2*np.pi)) * np.exp(-(arr_df['x']-mu)**2 / (2*sigma**2))
            arr_df['cdf'] = (1+scipy.special.erf((arr_df['x']-mu)/np.sqrt(2*sigma**2)))/2
            # Add to the overall dataframe
            all_data = all_data.append(arr_df)                      

          all_data = all_data.sort_values(['name', 'left'])
          return  ColumnDataSource(all_data)


    def update(attr, old, new):
            initial_selected_days = [days_selection.labels[i] for i in days_selection.active]
            initial_selected_type = graph_type_sel_group.labels[graph_type_sel_group.active];
            initialSelectedDateRange =  date_range_slider.value #(stardate, enddate)
            if(isinstance(initialSelectedDateRange[0], (int,float))):
                initialSelectedDateRange = (datetime.fromtimestamp(initialSelectedDateRange[0] / 1000),datetime.fromtimestamp(initialSelectedDateRange[1] / 1000)) 
            new_src = make_dataset(dataframe = niftyData, sel_column=initial_selected_type, 
                    selected_days=initial_selected_days,bin_width = bins_selection.value,
                                   range_start = range_start, 
                                   range_end = range_end, dateRange = initialSelectedDateRange)
            src.data.update(new_src.data)
            
    available_days = ["all_days" ,'Monday', 'Tuesday', 'Wednesday', 
                       'Thursday', 'Friday']
    available_type = ['%change', 'prev_abs_change', 'intraday_abs_change','intraday_abs_Range']
    range_start = -300;
    range_end = 300;
    
    max_years =13
    sel_years = 10;
#     days_selection = CheckboxGroup(labels=available_days, active = [1,2,3])
#     days_selection.on_change('active', update)
    
    days_selection = CheckboxButtonGroup(
        labels=available_days, active=[1,2,3])
    days_selection.on_change('active', update)
    
    graph_type_sel_group = RadioButtonGroup(
        labels=available_type, active=1)
    graph_type_sel_group.on_change('active', update)



    #type_selection = CheckboxGroup(labels=available_type, active = [1])
    #type_selection.on_change('active', update)

    bins_selection = Slider(start = 1, end = 50, 
                            step = 1, value = 25,
                            title = 'Bin Width')
    bins_selection.on_change('value', update)
    

    endtime  = datetime.now()
    starttime = (datetime.now() - timedelta(days=365*max_years));    
    print(starttime, endtime, "passed dates")
    
    date_range_slider = DateRangeSlider(title="Date Range: ", 
                                        start=starttime, end=endtime, 
                                        value=(starttime,endtime), step=1)
    date_range_slider.on_change('value', update)

    initial_selected_days = [days_selection.labels[i] for i in days_selection.active]
    initial_selected_type = graph_type_sel_group.labels[graph_type_sel_group.active];
    initialSelectedDateRange =  date_range_slider.value #(stardate, enddate)
    
    src = make_dataset(dataframe = niftyData, sel_column=initial_selected_type, selected_days=initial_selected_days,bin_width = bins_selection.value,range_start = range_start, range_end = range_end,dateRange = initialSelectedDateRange)
    p = make_plot(src)
    controls = WidgetBox(days_selection, graph_type_sel_group, bins_selection,date_range_slider)
    # Create a row layout
    layout = column(controls, p)
    # Make a tab with the layout 
    tab = Panel(child=layout, title = 'Delay Histogram')
    tabs = Tabs(tabs=[tab])
    doc.add_root(tabs)
# Set up an application
handler = FunctionHandler(modify_doc)
app = Application(handler)
    

In [73]:
show(app)

2007-05-22 23:14:26.011651 2020-05-18 23:14:26.011644 passed dates




In [220]:
print("jjfv")

jjfv
