In [65]:
import pandas as pd
import numpy as np
import plotly.express as px
from scipy.stats.mstats import hmean
from functools import reduce
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeClassifier 
from sklearn import tree
from scipy.stats import skew
from scipy.stats import kurtosis
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.signal import find_peaks
import operator


In [66]:
## Load the strategy data

df = pd.read_csv('data/df_strategy.csv',
                  dtype={'min': str,
                          'hour': str,
                          'dayofweek': str,
                          'dayofmonth': str,
                          'dayofyear': str,
                          'week': str,
                          'month': str,
                          'quarter': str,
                          'year': str,
                          'quarter': str,
                          'quarter': str,  }  )

## Proces datetime columns

df['starting_time'] = pd.to_datetime(df['starting_time'], format='%Y-%m-%d %H:%M:%S')
df['entry_time'] = pd.to_datetime(df['entry_time'], format='%Y-%m-%d %H:%M:%S')
df['stoploss_time'] = pd.to_datetime(df['stoploss_time'], format='%Y-%m-%d %H:%M:%S')
df['buy_time'] = pd.to_datetime(df['buy_time'], format='%Y-%m-%d %H:%M:%S')
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

df.info()

In [67]:
df.columns

Index(['starting_time', 'entry_time', 'buy_time', 'stoploss_time', 'buy_price',
       'stoploss', 'target', 'target_ratio', 'common_support_val_5min',
       'common_resistance_val_5min', 'length_support_val_5min',
       'price_support_val_5min', 'time_support_val_5min',
       'length_resistance_val_5min', 'price_resistance_val_5min',
       'time_resistance_val_5min', 'common_support_val_1min',
       'common_resistance_val_1min', 'length_support_val_1min',
       'price_support_val_1min', 'time_support_val_1min',
       'length_resistance_val_1min', 'price_resistance_val_1min',
       'time_resistance_val_1min', 'common_support_val_15min',
       'common_resistance_val_15min', 'length_support_val_15min',
       'price_support_val_15min', 'time_support_val_15min',
       'length_resistance_val_15min', 'price_resistance_val_15min',
       'time_resistance_val_15min', 'common_support_val_1hour',
       'common_resistance_val_1hour', 'length_support_val_1hour',
       'price_support_v

In [70]:
## First Trade of the day data
df_1st_trade = df.groupby('date').first().reset_index()

In [71]:
def first_trade_analysis(df_1st_trade,xval,yval,levels=None):

    if len(levels)>1:
        df_1st_trade['key'] = df_1st_trade[levels].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
    else:
        df_1st_trade['key'] = df_1st_trade[levels[0]]
        df_1st_trade['key'] = df_1st_trade['key'].astype(str)
    # print(df_1st_trade.info())
    fig = px.scatter(df_1st_trade,x = xval,y=yval,color = 'key',custom_data=['date','key'], title = 'First trade analysis by {} vs {} Colour by {}'.format(xval,yval,'key'))
    fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

    fig.update_traces(
    hovertemplate="<br>".join([
        "x: %{x}",
        "y: %{y}",
        "date: %{customdata[0]}",
        "key: %{customdata[1]}"
    ]))

    return fig

In [72]:
fig = first_trade_analysis(df_1st_trade,'gap','target_ratio', levels = ['month'])
fig.show()

In [73]:
fig = first_trade_analysis(df_1st_trade,'gap','target_ratio', levels = ['month','dayofmonth'])
fig.show()

In [154]:
### Filter data based on conditions  and use that in charts

df_1 = df[df['month']==5]

In [155]:
### Traget Ratio Threshold Analysis
def upward_vol(data, interval_levels , volatility_threshold):

    if len(interval_levels)>1:
        data['key'] = data[interval_levels].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
    else:
        data['key'] = data[interval_levels[0]]
        data['key'] = data['key'].astype(str)

    ## Considering required columns only
    data_req = data[['target_ratio','key']]
    
    ## Group and apply aggregartion by defined interval
    data_req_gr_by_intrvl = data_req.groupby( 'key' )['target_ratio'].apply(lambda x: (x > volatility_threshold ).sum()).reset_index()

    fig = px.histogram(data_req_gr_by_intrvl,x = 'key',y = 'target_ratio', title = 'Count of times Traget ratio was higher than {} by {} '.format(volatility_threshold,interval_levels))
    return fig

In [158]:
fig = upward_vol(df,['month','dayofweek'], 2)
fig.show()

In [76]:
### Traget Ratio Threshold Analysis
def downward_vol(data, interval_levels , volatility_threshold):

    if len(interval_levels)>1:
        data['key'] = data[interval_levels].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
    else:
        data['key'] = data[interval_levels[0]]
        data['key'] = data['key'].astype(str)

    ## Considering required columns only
    data_req = data[['target_ratio','key']]
    
    ## Group and apply aggregartion by defined interval
    data_req_gr_by_intrvl = data_req.groupby( 'key' )['target_ratio'].apply(lambda x: (x < volatility_threshold ).sum()).reset_index()

    fig = px.histogram(data_req_gr_by_intrvl,x = 'key',y = 'target_ratio', title = 'Count of times Traget ratio was lower than {} by {} '.format(volatility_threshold,interval_levels))
    return fig

In [77]:
fig = downward_vol(df,['dayofweek'], 0.5)
fig.show()

## DISTRIBUTION PLOTS

In [123]:
def distribution_plot(df,levels, val_col):
    levels.append(val_col)
    levels.append('entry_time')
    
    df_agg = df[levels]
    if len(levels)>1:
        df_agg['key'] = df_agg[levels[:-2]].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
    else:
        df_agg['key'] = df_agg[levels[0]]

    fig = px.violin(df_agg, y = val_col, color = 'key',title='Distribution of {}  by {}'.format(val_col, levels[:-2]),
                    box=True, points="all",
                    hover_data=df_agg.columns)

    return fig
    

In [124]:
## Distribution of target ratio by hour

fig = distribution_plot(df,['hour'], 'target_ratio')
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [125]:
## Distribution of target ratio by dayofweek

fig = distribution_plot(df,['dayofweek'], 'target_ratio')
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [126]:
## Distribution of target ratio by dayofweek, hour

fig = distribution_plot(df,['dayofweek','hour'], 'target_ratio')
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [127]:
## Distribution of target ratio by dayofmonth

fig = distribution_plot(df,['dayofmonth'], 'target_ratio')
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [128]:
## Distribution of target ratio by month

fig = distribution_plot(df,['month'], 'target_ratio')
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [129]:
## Distribution of target ratio by month, year

fig = distribution_plot(df,['month','year'], 'target_ratio')
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [130]:

def timeframe_cluster(df, value_col, interval_eda, value_col_ratio_type):


   #Cap very High & low values (For very high and low target ratio : outliers)
   percentiles = df[value_col].quantile([0.01, 0.99]).values
   df[value_col][ df[value_col] <= percentiles[0] ] = percentiles[0]
   df[value_col][ df[value_col] >= percentiles[1]] = percentiles[1]

   ##Sort the required data by date
   df_sort = df.sort_values(by='entry_time')

   ##Value went above 80 perc & went down below 20 perc
   high_threshold = np.quantile(df_sort[value_col], 0.80)
   low_threshold = np.quantile(df_sort[value_col], 0.20)

   def went_high(x):
      if x > high_threshold:
         return 1
      else:
         return 0
   def went_low(x):
      if x < low_threshold:
         return 1
      else:
         return 0

   df_sort['Went_high'] = df_sort[value_col].apply(went_high)
   df_sort['Went_low'] = df_sort[value_col].apply(went_low)
   
   
   ##############     EDA by interval    #################
   
   

   def harmean(x):
      if ( (x == 0).sum() / len(x) ) >= 0.7:
         return 0
               
      else:
         return hmean( x[x != 0] )
               
            

            

   if value_col_ratio_type == 'Y':
      df_processed_cv = df_sort.groupby(interval_eda)[value_col].apply( lambda x: x.std()/harmean(x) ).reset_index()
      df_processed_cv.columns = df_processed_cv.columns.str.replace(value_col, 'Consistency')     

   else:
      df_processed_cv = df_sort.groupby(interval_eda)[value_col].apply( lambda x: x.std()/x.mean() ).reset_index()
      df_processed_cv.columns = df_processed_cv.columns.str.replace(value_col, 'Consistency')


   df_processed_went_high = df_sort.groupby(interval_eda)['Went_high'].apply( lambda x: x.sum()/x.count() ).reset_index()
   df_processed_went_high.columns = df_processed_went_high.columns.str.replace(value_col, 'Went_high')

   df_processed_went_low = df_sort.groupby(interval_eda)['Went_low'].apply( lambda x: x.sum()/x.count() ).reset_index()
   df_processed_went_low.columns = df_processed_went_low.columns.str.replace(value_col, 'Went_low')

   def IQR(x):

      return np.quantile(x, 0.75) - np.quantile(x, 0.25)
      
   df_processed_iqr = df_sort.groupby(interval_eda)[value_col].apply( lambda x: IQR(x) ).reset_index()
   df_processed_iqr.columns = df_processed_iqr.columns.str.replace(value_col, 'Spread')
   
   df_processed = reduce(lambda df1,df2: pd.merge(df1,df2,on=interval_eda), [df_processed_cv, df_processed_went_high, df_processed_went_low, df_processed_iqr])

   df_processed['key'] = df_processed[interval_eda].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
   df_processed.fillna(0, inplace=True)
   
   #Cluster : Kmeans
   scaler = MinMaxScaler()
   kmeans_fit = KMeans(4).fit( scaler.fit_transform( df_processed[['Consistency', 'Went_high', 'Went_low', 'Spread']] ) )
   identified_clusters = kmeans_fit.fit_predict( scaler.fit_transform( df_processed[['Consistency', 'Went_high', 'Went_low', 'Spread']] ) )
   df_processed['Clusters'] = identified_clusters
   df_processed['Clusters'] = df_processed['Clusters'].astype('str')

   def f(row):
      if row['Clusters'] == '0':
         val = 'red'
      elif row['Clusters'] == '1':
         val = 'green'
      elif row['Clusters'] == '2':
         val = 'blue'
      elif row['Clusters'] == '3':
         val = 'yellow'
      
      return val

   df_processed['cluster_color'] = df_processed.apply(f, axis=1)

   fig = make_subplots(rows=3, cols=1,subplot_titles=("Consistency vs. Went High", "Consistency vs. Went Low", "Consistency vs. Spread"))
   fig.append_trace(go.Scatter(x = df_processed['Consistency'],y = df_processed['Went_high'],mode='markers', marker_color=df_processed['cluster_color'],hovertext=df_processed['Clusters'],
                               hoverinfo="text",),
                               row=1, col=1)
   fig.append_trace(go.Scatter(x = df_processed['Consistency'],y = df_processed['Went_low'], mode='markers', marker_color=df_processed['cluster_color'],hovertext=df_processed['Clusters'],
                               hoverinfo="text",),
                               row=2, col=1)
   fig.append_trace(go.Scatter(x = df_processed['Consistency'],y = df_processed['Spread'], mode='markers', marker_color=df_processed['cluster_color'],hovertext=df_processed['Clusters'],
                               hoverinfo="text",),
                               row=3, col=1)

   fig.update_layout(height=800, width=1200, title_text="Stacked Subplots")
   fig.update_layout(showlegend=False)
   
#  df_processed.plot.scatter('Consistency','Went_high',c = 'Clusters', cmap='rainbow', title = 'Consistency vs. Went High')
#  df_processed.plot.scatter('Consistency','Went_low',c = 'Clusters', cmap='rainbow', title = 'Consistency vs. Went Low')
#  df_processed.plot.scatter('Consistency','Spread',c = 'Clusters', cmap='rainbow', title = 'Consistency vs. Spread')
   
   return df_processed, fig

In [131]:
### Clustering by dayofmonth
df_processed, fig = timeframe_cluster(df, 
                                value_col = 'target_ratio', 
                                interval_eda = ['dayofmonth'], 
                                value_col_ratio_type = 'Y')

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [132]:
### Clustering by hour and day of week
df_processed, fig = timeframe_cluster(df, 
                                value_col = 'target_ratio', 
                                interval_eda = ['dayofweek','hour'], 
                                value_col_ratio_type = 'Y')

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [133]:
### Clustering by day of year
df_processed, fig = timeframe_cluster(df, 
                                value_col = 'target_ratio', 
                                interval_eda = ['dayofyear'], 
                                value_col_ratio_type = 'Y')

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [134]:
### Clustering by year and week
df_processed, fig = timeframe_cluster(df, 
                                value_col = 'target_ratio', 
                                interval_eda = ['year','week'], 
                                value_col_ratio_type = 'Y')

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [135]:
### Clustering by week
df_processed, fig = timeframe_cluster(df, 
                                value_col = 'target_ratio', 
                                interval_eda = ['year','month','week'], 
                                value_col_ratio_type = 'Y')

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [136]:

def timeframe_cluster_stat(df, value_col, interval_eda):


   #Cap very High & low values (For very high and low target ratio : outliers)
   percentiles = df[value_col].quantile([0.01, 0.99]).values
   df[value_col][ df[value_col] <= percentiles[0] ] = percentiles[0]
   df[value_col][ df[value_col] >= percentiles[1]] = percentiles[1]

   ##Sort the required data by date
   df_sort = df.sort_values(by='entry_time')
     


   df_processed_skew = df_sort.groupby(interval_eda)[value_col].apply( lambda x: x.skew() ).reset_index()
   df_processed_skew.columns = df_processed_skew.columns.str.replace(value_col, 'Skewness')     

   df_processed_kurt = df_sort.groupby(interval_eda)[value_col].apply( lambda x: x.kurtosis() ).reset_index()
   df_processed_kurt.columns = df_processed_kurt.columns.str.replace(value_col, 'Kurtosis')  

   
   df_processed = reduce(lambda df1,df2: pd.merge(df1,df2,on=interval_eda), [df_processed_skew, df_processed_kurt])

   df_processed['key'] = df_processed[interval_eda].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
   df_processed.fillna(0, inplace=True)

   
   #Cluster : Kmeans
   scaler = MinMaxScaler()
   kmeans_fit = KMeans(4).fit( scaler.fit_transform( df_processed[['Skewness', 'Kurtosis']] ) )
   identified_clusters = kmeans_fit.fit_predict( scaler.fit_transform( df_processed[['Skewness', 'Kurtosis']] ) )
   df_processed['Clusters'] = identified_clusters
   df_processed['Clusters'] = df_processed['Clusters'].astype('str')

   fig = px.scatter(df_processed, x = 'Skewness',y = 'Kurtosis', color='Clusters')


   
#  df_processed.plot.scatter('Consistency','Went_high',c = 'Clusters', cmap='rainbow', title = 'Consistency vs. Went High')
#  df_processed.plot.scatter('Consistency','Went_low',c = 'Clusters', cmap='rainbow', title = 'Consistency vs. Went Low')
#  df_processed.plot.scatter('Consistency','Spread',c = 'Clusters', cmap='rainbow', title = 'Consistency vs. Spread')
   
   return df_processed, fig

In [137]:
### Clustering by dayofmonth
df_processed, fig = timeframe_cluster_stat(df, 
                                value_col = 'target_ratio', 
                                interval_eda = ['dayofmonth'])

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [138]:
### Clustering by hour and day of week
df_processed, fig = timeframe_cluster_stat(df, 
                                value_col = 'target_ratio', 
                                interval_eda = ['dayofweek','hour'])

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [139]:
### Clustering by day of year
df_processed, fig = timeframe_cluster_stat(df, 
                                value_col = 'target_ratio', 
                                interval_eda = ['dayofyear'])

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [140]:
### Clustering by year and week
df_processed, fig = timeframe_cluster_stat(df, 
                                value_col = 'target_ratio', 
                                interval_eda = ['year','week'])

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [141]:
### Clustering by week
df_processed, fig = timeframe_cluster_stat(df, 
                                value_col = 'target_ratio', 
                                interval_eda = ['year','month','week'])

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [146]:
def datewise_trend_analysis(df, time_col, value_col):


    #Cap very High & low values
    percentiles = df[value_col].quantile([0.01, 0.99]).values
    df[value_col][ df[value_col] <= percentiles[0] ] = percentiles[0]
    df[value_col][ df[value_col] >= percentiles[1]] = percentiles[1]

    df['date'] = df[time_col].dt.strftime('%m/%d/%Y')
    df = df.set_index('date')
    
    #Filter df by dates with min 10 entries
    df_filtered = df[df.index.value_counts() >= 10]
    
    peak_valley_date_list = []
    for d in df_filtered.index.unique():

        df_by_date = df_filtered[(df_filtered.index == d)].sort_values(time_col)
        df_by_date['seasonality'] = seasonal_decompose(df_by_date[value_col], period=5).seasonal.tolist()
        
        # Find indices , Time & count of peaks
        peak_idx = find_peaks(df_by_date['seasonality'])[0]
        peak_time = df_by_date.iloc[peak_idx][time_col].tolist()
        peak_interval = list(map( ( operator.sub ), peak_time[1:], peak_time[:-1]))
        peak_interval_min = list( map(lambda x: x/pd.Timedelta(minutes=1), peak_interval) )

        # Find indices , Time & count of seasonality
        valley_idx = find_peaks(-df_by_date['seasonality'])[0]
        valley_time = df_by_date.iloc[valley_idx][time_col].tolist()
        valley_interval = list(map( ( operator.sub ), valley_time[1:], valley_time[:-1]))
        valley_interval_min = list( map(lambda x: x/pd.Timedelta(minutes=1), valley_interval) )
        
        def weird_division(n, d):
                return n / d if d else 0

        # Final data by each date
        peak_valley_by_date = pd.DataFrame({'Date': d,
                                            'No. of Peaks occured': len(peak_idx), 'Peak occured at': [peak_time], 'Peak Intervals in min': [peak_interval_min], 
                                            'Total peak_intervals in min': sum(peak_interval_min), 'Avg. peak_intervals in min': weird_division( sum(peak_interval_min), len(peak_idx) - 1 ),
                                            'No. of Valleys occured': len(valley_idx), 'Valley occured at': [valley_time], 'Valley Intervals in min': [valley_interval_min],
                                            'Total valley_intervals in min': sum(valley_interval_min), 'Avg. valley_intervals in min': weird_division( sum(valley_interval_min), len(valley_idx) - 1 )})
                                            
        peak_valley_date_list.append(peak_valley_by_date)

    peak_valley_date_df = pd.concat(peak_valley_date_list)


    
    return peak_valley_date_df


In [147]:
peak_valley_date_df =    datewise_trend_analysis(
            df,
            time_col = 'entry_time',                                                            #Input for timecol as timeindex
            value_col= 'target_ratio'                                                        #Input confirming the value column is ratio type variable
             )



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Boolean Series key will be reindexed to match DataFrame index.



In [148]:
peak_valley_date_df

Unnamed: 0,Date,No. of Peaks occured,Peak occured at,Peak Intervals in min,Total peak_intervals in min,Avg. peak_intervals in min,No. of Valleys occured,Valley occured at,Valley Intervals in min,Total valley_intervals in min,Avg. valley_intervals in min
0,07/31/2017,1,[2017-07-31 11:55:00],[],0.0,0.000000,2,"[2017-07-31 10:15:00, 2017-07-31 12:30:00]",[135.0],135.0,135.000000
0,10/09/2017,2,"[2017-10-09 10:20:00, 2017-10-09 12:45:00]",[145.0],145.0,145.000000,1,[2017-10-09 12:00:00],[],0.0,0.000000
0,11/03/2017,4,"[2017-11-03 10:30:00, 2017-11-03 11:10:00, 201...","[40.0, 90.0, 15.0]",145.0,48.333333,4,"[2017-11-03 11:05:00, 2017-11-03 12:00:00, 201...","[55.0, 45.0, 100.0]",200.0,66.666667
0,12/14/2017,2,"[2017-12-14 11:15:00, 2017-12-14 12:50:00]",[95.0],95.0,95.000000,2,"[2017-12-14 11:25:00, 2017-12-14 13:35:00]",[130.0],130.0,130.000000
0,01/19/2018,2,"[2018-01-19 11:20:00, 2018-01-19 14:00:00]",[160.0],160.0,160.000000,2,"[2018-01-19 12:45:00, 2018-01-19 14:45:00]",[120.0],120.0,120.000000
...,...,...,...,...,...,...,...,...,...,...,...
0,08/23/2021,3,"[2021-08-23 11:15:00, 2021-08-23 12:15:00, 202...","[60.0, 75.0]",135.0,67.500000,4,"[2021-08-23 09:45:00, 2021-08-23 11:55:00, 202...","[130.0, 50.0, 125.0]",305.0,101.666667
0,08/31/2021,2,"[2021-08-31 10:20:00, 2021-08-31 12:00:00]",[100.0],100.0,100.000000,1,[2021-08-31 10:55:00],[],0.0,0.000000
0,09/22/2021,4,"[2021-09-22 10:20:00, 2021-09-22 11:25:00, 202...","[65.0, 80.0, 45.0]",190.0,63.333333,3,"[2021-09-22 11:05:00, 2021-09-22 11:30:00, 202...","[25.0, 100.0]",125.0,62.500000
0,09/30/2021,2,"[2021-09-30 12:10:00, 2021-09-30 13:00:00]",[50.0],50.0,50.000000,2,"[2021-09-30 12:20:00, 2021-09-30 13:05:00]",[45.0],45.0,45.000000


In [152]:
col_list = ['buy_price',
       'stoploss', 'target', 'target_ratio', 'common_support_val_5min',
       'common_resistance_val_5min', 'min', 'hour', 'dayofweek', 'dayofmonth',
       'dayofyear', 'week', 'month', 'quarter', 'year', 'gap', 'date']

In [153]:
corr_matrix = df[ col_list ].corr(method='pearson')[['stoploss', 'target', 'target_ratio','buy_price']]

fig = px.imshow(corr_matrix, text_auto=True)
fig.show()