<a href="https://colab.research.google.com/github/Nischal5123/foreCache-interaction/blob/develop/Subset_Statistics_Mann_Kendall_Trend_ForeCache.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from statsmodels.tsa.stattools import adfuller
import pymannkendall as mk

In [None]:
pip install pymannkendall



In [None]:
# this is implemented from STATE ACTION ANALYSIS FILE : https://colab.research.google.com/drive/1n9nJdd84etLey3piTKkRJmttpZlW1gor#scrollTo=bgGiHn-LobHm

#GET ALL STATE ACTION MODEL FOR NDSI 2D AND NDSI 3D
global_drive_path="/content/drive/MyDrive/ForeCache/interactions/stateActionModel/"
path=(str(global_drive_path) +"/ndsi-3d")
fileNames_NDSI_3D=[]
for filename in os.listdir(path):
    if filename.endswith(".csv"):
        filePath=(os.path.join(path, filename))
        fileNames_NDSI_3D.append(filename)
    else:
        continue
path=(str(global_drive_path) +"/ndsi-2d")
fileNames_NDSI_2D=[]
for filename in os.listdir(path):
    if filename.endswith(".csv"):
        filePath=(os.path.join(path, filename))
        fileNames_NDSI_2D.append(filename)
    else:
        continue
matchingName=[]
df_twoD=[]
df_threeD=[]
path_twoD=(str(global_drive_path) +"/ndsi-2d")
path_threeD=(str(global_drive_path) +"/ndsi-3d")
for i in range(len(fileNames_NDSI_2D)):
  for j in range(len(fileNames_NDSI_3D)):
  # if fileNames_NDSI_2D[i].removesuffix('taskname_ndsi-2d-task_') == fileNames_NDSI_3D[i].removesuffix('taskname_ndsi-3d-task_'):
   if fileNames_NDSI_2D[i][22:] == fileNames_NDSI_3D[j][22:]:
      #print(os.path.join(path, str(fileNames_NDSI_2D[i])))
      df=pd.read_csv(os.path.join(path_twoD, str(fileNames_NDSI_2D[i])),sep=',')
      df_twoD.append(df)
      df=pd.read_csv(os.path.join(path_threeD, str(fileNames_NDSI_3D[j])),sep=',')
      df_threeD.append(df)
      matchingName.append(fileNames_NDSI_2D[i])
#STORE ALL USERS NAME
users=[]
for name in matchingName:
    users.append(name[22:-4:1])

In [None]:
class StationarityTests:
    def __init__(self, significance=.05):
        self.SignificanceLevel = significance
        self.pValue = None
        self.isStationary = None
        self.trend= None

    def ADF_Stationarity_Test(self, timeseries, printResults = True):
      #Dickey-Fuller test:
      adfTest = adfuller(timeseries, autolag='AIC')
      
      self.pValue = adfTest[1]
      
      if (self.pValue<self.SignificanceLevel):
          self.isStationary = True
      else:
          self.isStationary = False
      
      if printResults:
          dfResults = pd.Series(adfTest[0:4], index=['ADF Test Statistic','P-Value','# Lags Used','# Observations Used'])
          #Add Critical Values
          for key,value in adfTest[4].items():
              dfResults['Critical Value (%s)'%key] = value
          print('Augmented Dickey-Fuller Test Results:')
          print(dfResults)
    
    def Mann_Kendall_Stationarity_Test(self, timeseries, printResults = True):
      #Mann Kendall Trend Test:
      mktTest = mk.original_test(timeseries)
      
      self.pValue = mktTest[2]
      self.trend= mktTest[0]
      self.isStationary = mktTest[1]
      
      if printResults:
          dfResults = pd.Series(mktTest[0:3], index=['Trend Type','Stationarity','P-Value'])
          #Add Critical Values
          for key,value in mktTest[3].items():
              dfResults['Critical Value (%s)'%key] = value
          print('Mann Kendall Trend Test:')
          print(dfResults)

In [None]:
def make_numerical_dataframe(df):
  df['State_Action']=(df['Action']+ df['State'])
 
  df['Action']=pd.factorize(df['Action'])[0]
  df['State']=pd.factorize(df['State'])[0]
  state_action_correlated_series=pd.factorize(df['State_Action'])[0]
 
  action_only_series=df['Action'].values
  state_only_series=df['State'].values
  options=[state_action_correlated_series,action_only_series,state_only_series]
  return options

In [None]:
#3D Mann Kendall
saveDir=(str(global_drive_path) +"StationarityTests/ndsi-3d/MKT_Sliding")
for i in tqdm(range(len(df_threeD))):
  final_dataframe_3d = pd.DataFrame(columns=['User','MKT Stationary_State_Action','MKT Stationary_Action','MKT Stationary_State'])
  window_start=0
  window=30
  
  window_dataframe=(df_threeD[i])
  window_dataframe['State_Action']=(window_dataframe['Action']+ window_dataframe['State'])
  window_dataframe['Action']=pd.factorize(window_dataframe['Action'])[0]
  window_dataframe['State']=pd.factorize(window_dataframe['State'])[0]
  window_dataframe['State_Action']=pd.factorize(window_dataframe['State_Action'])[0]
  print(users[i])
  while (window_start+window) <= (len(df_threeD[i])):
    
    window_dataframe_subset=window_dataframe.iloc[window_start:window_start + window,]
    state_action_correlated_series=window_dataframe_subset['State_Action'].values
    action_only_series=window_dataframe_subset['Action'].values
    state_only_series=window_dataframe_subset['State'].values
    three_series=[state_action_correlated_series,action_only_series,state_only_series]
    three_series_results=[window_start]
    for series in (three_series):
      sTest_state = StationarityTests()
      sTest_state.Mann_Kendall_Stationarity_Test(series, printResults = False)
      three_series_results.append(sTest_state.trend)
    df_length = len(final_dataframe_3d)
    final_dataframe_3d.loc[df_length] = three_series_results
    userfilename= str(users[i]) + "MKT_sliding.csv"
    final_dataframe_3d.to_csv(os.path.join(saveDir,userfilename), index=False)
    window_start=window_start + window
    # state_action_correlated_series=[]
    # action_only_series=[]
    # state_only_series=[]

 12%|█▏        | 2/17 [00:00<00:01, 11.93it/s]

userid_ac5b65d7-f260-4534-9fae-d998b726ed32
userid_da52acdd-3cea-483c-86e5-2c2740d371aa
userid_82316e37-1117-4663-84b4-ddb6455c83b2


 24%|██▎       | 4/17 [00:00<00:01, 12.95it/s]

userid_d6555293-35c7-4c27-b522-38d67d2d1b1a
userid_ff56863b-0710-4a58-ad22-4bf2889c9bc0


 47%|████▋     | 8/17 [00:00<00:00, 12.51it/s]

userid_bc9f9425-e867-4876-a5d9-791229916767
userid_7d014c75-453b-4188-9e4e-fb086984896a
userid_bda49380-37ad-41c5-a109-7fa198a7691a
userid_8bf5b71b-dfd4-403f-8791-06427b26ccaf
userid_cd3ef507-e3e9-4edb-9222-90a69a9bf272


 65%|██████▍   | 11/17 [00:00<00:00, 15.34it/s]

userid_93be0a02-3f7a-46c9-b526-aa956ed00856
userid_6d49fab8-273b-4a91-948b-ecd14556b049
userid_733a1ac5-0b01-485e-9b29-ac33932aa240
userid_e4221e48-0930-40db-af3c-f4de1ed87582


100%|██████████| 17/17 [00:01<00:00, 15.06it/s]

userid_954edb7c-4eae-47ab-9338-5c5c7eccac2d
userid_a6aab5f5-fdb6-41df-9fc6-221d70f8c6e8
userid_8b544d24-3274-4bb0-9719-fd2bccc87b02





In [None]:
#Man Kendall for 2d
saveDir=(str(global_drive_path) +"StationarityTests/ndsi-2d/MKT_Sliding")
for i in tqdm(range(len(df_twoD))):
  final_dataframe_2d = pd.DataFrame(columns=['User','MKT Stationary_State_Action','MKT Stationary_Action','MKT Stationary_State'])
  window_start=0
  window=10
  
  window_dataframe_2d=(df_twoD[i])
  window_dataframe_2d['State_Action']=(window_dataframe_2d['Action']+ window_dataframe_2d['State'])
  window_dataframe_2d['Action']=pd.factorize(window_dataframe_2d['Action'])[0]
  window_dataframe_2d['State']=pd.factorize(window_dataframe_2d['State'])[0]
  window_dataframe_2d['State_Action']=pd.factorize(window_dataframe_2d['State_Action'])[0]
  print(users[i])
  while (window_start+window) <= (len(df_twoD[i])):
    
    window_dataframe_subset=window_dataframe_2d.iloc[window_start:window_start + window,]
    state_action_correlated_series=window_dataframe_subset['State_Action'].values
    action_only_series=window_dataframe_subset['Action'].values
    state_only_series=window_dataframe_subset['State'].values
    three_series=[state_action_correlated_series,action_only_series,state_only_series]
    three_series_results=[window_start]
    for series in (three_series):
      sTest_state = StationarityTests()
      sTest_state.Mann_Kendall_Stationarity_Test(series, printResults = False)
      three_series_results.append(sTest_state.trend)
    df_length = len(final_dataframe_2d)
    final_dataframe_2d.loc[df_length] = three_series_results
    userfilename= str(users[i]) + "MKT_sliding.csv"
    final_dataframe_2d.to_csv(os.path.join(saveDir,userfilename), index=False)
    window_start=window_start + window
    # state_action_correlated_series=[]
    # action_only_series=[]
    # state_only_series=[]

  0%|          | 0/17 [00:00<?, ?it/s]

userid_ac5b65d7-f260-4534-9fae-d998b726ed32


 12%|█▏        | 2/17 [00:00<00:05,  2.97it/s]

userid_da52acdd-3cea-483c-86e5-2c2740d371aa
userid_82316e37-1117-4663-84b4-ddb6455c83b2
userid_d6555293-35c7-4c27-b522-38d67d2d1b1a


 24%|██▎       | 4/17 [00:01<00:02,  4.71it/s]

userid_ff56863b-0710-4a58-ad22-4bf2889c9bc0


 29%|██▉       | 5/17 [00:01<00:03,  3.88it/s]

userid_bc9f9425-e867-4876-a5d9-791229916767
userid_7d014c75-453b-4188-9e4e-fb086984896a


 41%|████      | 7/17 [00:01<00:02,  4.32it/s]

userid_bda49380-37ad-41c5-a109-7fa198a7691a


 53%|█████▎    | 9/17 [00:02<00:01,  4.36it/s]

userid_8bf5b71b-dfd4-403f-8791-06427b26ccaf
userid_cd3ef507-e3e9-4edb-9222-90a69a9bf272

 59%|█████▉    | 10/17 [00:02<00:01,  4.66it/s]


userid_93be0a02-3f7a-46c9-b526-aa956ed00856


 71%|███████   | 12/17 [00:02<00:01,  4.18it/s]

userid_6d49fab8-273b-4a91-948b-ecd14556b049
userid_733a1ac5-0b01-485e-9b29-ac33932aa240
userid_e4221e48-0930-40db-af3c-f4de1ed87582

 88%|████████▊ | 15/17 [00:03<00:00,  6.49it/s]


userid_954edb7c-4eae-47ab-9338-5c5c7eccac2d
userid_a6aab5f5-fdb6-41df-9fc6-221d70f8c6e8


 94%|█████████▍| 16/17 [00:03<00:00,  6.51it/s]

userid_8b544d24-3274-4bb0-9719-fd2bccc87b02


100%|██████████| 17/17 [00:03<00:00,  4.62it/s]


Unnamed: 0,Action,State,TimeStamp,Angle,State_Action
10,2,2,2016-06-08 18:12:38.558,10.449105,4
11,2,2,2016-06-08 18:12:41.054,354.659732,4
12,2,2,2016-06-08 18:12:41.538,353.731233,4
13,2,2,2016-06-08 18:12:43.919,349.541828,4
14,2,2,2016-06-08 18:12:44.382,345.467115,4


In [None]:
#3D Mann Kendall
saveDir=(str(global_drive_path) +"StationarityTests/ndsi-3d")
final_dataframe_3d = pd.DataFrame(columns=['User','MKT Stationary_State_Action','MKT Stationary_Action','MKT Stationary_State'])
for i in tqdm(range(len(df_threeD[i]))):
  three_series=make_numerical_dataframe(df_threeD[i])
  three_series_results=[users[i]]
  for series in (three_series):
    sTest_state = StationarityTests()
    sTest_state.Mann_Kendall_Stationarity_Test(series, printResults = False)
    three_series_results.append(sTest_state.trend)
  df_length = len(final_dataframe_3d)
  final_dataframe_3d.loc[df_length] = three_series_results
  final_dataframe_3d.to_csv(os.path.join(saveDir,"MKT.csv"), index=False)

In [None]:
#FOLLOWING BLOCKS TO TEST ON SINGLE FILE 

In [None]:
state_action_correlated_series=df['Action'].values + df['State'].values
action_only_series=df['Action'].values
state_only_series=df['State'].values
options=[state_action_correlated_series,action_only_series,state_only_series]

In [None]:
class StationarityTests:
    def __init__(self, significance=.05):
        self.SignificanceLevel = significance
        self.pValue = None
        self.isStationary = None

    def ADF_Stationarity_Test(self, timeseries, printResults = True):
      #Dickey-Fuller test:
      adfTest = adfuller(timeseries, autolag='AIC')
      
      self.pValue = adfTest[1]
      
      if (self.pValue<self.SignificanceLevel):
          self.isStationary = True
      else:
          self.isStationary = False
      
      if printResults:
          dfResults = pd.Series(adfTest[0:4], index=['ADF Test Statistic','P-Value','# Lags Used','# Observations Used'])
          #Add Critical Values
          for key,value in adfTest[4].items():
              dfResults['Critical Value (%s)'%key] = value
          print('Augmented Dickey-Fuller Test Results:')
          print(dfResults)

In [None]:
for series in (options):
  sTest_state = StationarityTests()
  sTest_state.ADF_Stationarity_Test(series, printResults = True)
  print("Is the time series stationary? {0}".format(sTest_state.isStationary))
  print('\n')

Augmented Dickey-Fuller Test Results:
ADF Test Statistic      -2.970378
P-Value                  0.037755
# Lags Used              5.000000
# Observations Used     59.000000
Critical Value (1%)     -3.546395
Critical Value (5%)     -2.911939
Critical Value (10%)    -2.593652
dtype: float64
Is the time series stationary? True


Augmented Dickey-Fuller Test Results:
ADF Test Statistic      -5.476252
P-Value                  0.000002
# Lags Used              0.000000
# Observations Used     64.000000
Critical Value (1%)     -3.536928
Critical Value (5%)     -2.907887
Critical Value (10%)    -2.591493
dtype: float64
Is the time series stationary? True


Augmented Dickey-Fuller Test Results:
ADF Test Statistic      -3.063729
P-Value                  0.029356
# Lags Used              4.000000
# Observations Used     60.000000
Critical Value (1%)     -3.544369
Critical Value (5%)     -2.911073
Critical Value (10%)    -2.593190
dtype: float64
Is the time series stationary? True


