<a href="https://colab.research.google.com/github/Nischal5123/foreCache-interaction/blob/main/Subset_Statistics_Mann_Kendall_Trend_ForeCache.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [66]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from statsmodels.tsa.stattools import adfuller
import pymannkendall as mk

In [2]:
pip install pymannkendall

Collecting pymannkendall
  Downloading pymannkendall-1.4.2-py3-none-any.whl (12 kB)
Installing collected packages: pymannkendall
Successfully installed pymannkendall-1.4.2


In [67]:
# this is implemented from STATE ACTION ANALYSIS FILE : https://colab.research.google.com/drive/1n9nJdd84etLey3piTKkRJmttpZlW1gor#scrollTo=bgGiHn-LobHm

#GET ALL STATE ACTION MODEL FOR NDSI 2D AND NDSI 3D
global_drive_path="/content/drive/MyDrive/ForeCache/interactions/stateActionModel/"
path=(str(global_drive_path) +"/ndsi-3d")
fileNames_NDSI_3D=[]
for filename in os.listdir(path):
    if filename.endswith(".csv"):
        filePath=(os.path.join(path, filename))
        fileNames_NDSI_3D.append(filename)
    else:
        continue
path=(str(global_drive_path) +"/ndsi-2d")
fileNames_NDSI_2D=[]
for filename in os.listdir(path):
    if filename.endswith(".csv"):
        filePath=(os.path.join(path, filename))
        fileNames_NDSI_2D.append(filename)
    else:
        continue
matchingName=[]
df_twoD=[]
df_threeD=[]
path_twoD=(str(global_drive_path) +"/ndsi-2d")
path_threeD=(str(global_drive_path) +"/ndsi-3d")
for i in range(len(fileNames_NDSI_2D)):
  for j in range(len(fileNames_NDSI_3D)):
  # if fileNames_NDSI_2D[i].removesuffix('taskname_ndsi-2d-task_') == fileNames_NDSI_3D[i].removesuffix('taskname_ndsi-3d-task_'):
   if fileNames_NDSI_2D[i][22:] == fileNames_NDSI_3D[j][22:]:
      #print(os.path.join(path, str(fileNames_NDSI_2D[i])))
      df=pd.read_csv(os.path.join(path_twoD, str(fileNames_NDSI_2D[i])),sep=',')
      df_twoD.append(df)
      df=pd.read_csv(os.path.join(path_threeD, str(fileNames_NDSI_3D[j])),sep=',')
      df_threeD.append(df)
      matchingName.append(fileNames_NDSI_2D[i])
#STORE ALL USERS NAME
users=[]
for name in matchingName:
    users.append(name[22:-4:1])

In [68]:
class StationarityTests:
    def __init__(self, significance=.05):
        self.SignificanceLevel = significance
        self.pValue = None
        self.isStationary = None
        self.trend= None

    def ADF_Stationarity_Test(self, timeseries, printResults = True):
      #Dickey-Fuller test:
      adfTest = adfuller(timeseries, autolag='AIC')
      
      self.pValue = adfTest[1]
      
      if (self.pValue<self.SignificanceLevel):
          self.isStationary = True
      else:
          self.isStationary = False
      
      if printResults:
          dfResults = pd.Series(adfTest[0:4], index=['ADF Test Statistic','P-Value','# Lags Used','# Observations Used'])
          #Add Critical Values
          for key,value in adfTest[4].items():
              dfResults['Critical Value (%s)'%key] = value
          print('Augmented Dickey-Fuller Test Results:')
          print(dfResults)
    
    def Mann_Kendall_Stationarity_Test(self, timeseries, printResults = True):
      #Mann Kendall Trend Test:
      mktTest = mk.original_test(timeseries)
      
      self.pValue = mktTest[2]
      self.trend= mktTest[0]
      self.isStationary = mktTest[1]
      
      if printResults:
          dfResults = pd.Series(mktTest[0:3], index=['Trend Type','Stationarity','P-Value'])
          #Add Critical Values
          for key,value in mktTest[3].items():
              dfResults['Critical Value (%s)'%key] = value
          print('Mann Kendall Trend Test:')
          print(dfResults)

In [69]:
def make_numerical_dataframe(df):
  df['State_Action']=(df['Action']+ df['State'])
 
  df['Action']=pd.factorize(df['Action'])[0]
  df['State']=pd.factorize(df['State'])[0]
  state_action_correlated_series=pd.factorize(df['State_Action'])[0]
 
  action_only_series=df['Action'].values
  state_only_series=df['State'].values
  options=[state_action_correlated_series,action_only_series,state_only_series]
  return options

In [64]:

#3D Mann Kendall
saveDir=(str(global_drive_path) +"StationarityTests/ndsi-3d/MKT_Sliding")
failed_files=[]
for i in tqdm(range(len(df_threeD))):
  final_dataframe_3d = pd.DataFrame(columns=['User','MKT Stationary_State_Action'])
  subtask=0
  length_parsedFile=0


  window_dataframe=(df_threeD[i])
  window_dataframe['State_Action']=(window_dataframe['Action']+ window_dataframe['State'])
  window_dataframe['Action']=pd.factorize(window_dataframe['Action'])[0]
  window_dataframe['State']=pd.factorize(window_dataframe['State'])[0]
  window_dataframe['State_Action']=pd.factorize(window_dataframe['State_Action'])[0]
  subtask_values=window_dataframe["Subtask"].unique()
  original_fileLength=len(window_dataframe)

  print("Starting for user", users[i])
  while ((length_parsedFile < original_fileLength) and (subtask <= len(subtask_values))):
    
    window_dataframe_subset=window_dataframe.loc[window_dataframe['Subtask'] == subtask_values[subtask]]
    length_parsedFile+=len(window_dataframe_subset)
    print("Length of file",len(window_dataframe_subset), " in ","Subtask: ",subtask_values[subtask])
    #creating series
    state_action_correlated_series=window_dataframe_subset['State_Action'].values
    # action_only_series=window_dataframe_subset['Action'].values
    # state_only_series=window_dataframe_subset['State'].values
    three_series=[state_action_correlated_series]
    three_series_results=[subtask_values[subtask]]
    for series in (three_series):
      if len(series)>2:
        sTest_state = StationarityTests()
        sTest_state.Mann_Kendall_Stationarity_Test(series, printResults = False)
        three_series_results.append(sTest_state.trend)
        
      else:
        three_series_results.append("Not Enough Data")
        print("Failed for user", users[i])
        failed_files.append(users[i])
    df_length = len(final_dataframe_3d)
    final_dataframe_3d.loc[df_length] = three_series_results
    userfilename= str(users[i]) + "MKT_sliding.csv"
    final_dataframe_3d.to_csv(os.path.join(saveDir,userfilename), index=False)
    subtask +=1
    

 11%|█         | 2/19 [00:00<00:01, 16.11it/s]

Starting for user userid_ac5b65d7-f260-4534-9fae-d998b726ed32
Length of file 105  in  Subtask:  1
Length of file 2  in  Subtask:  2
Failed for user userid_ac5b65d7-f260-4534-9fae-d998b726ed32
Length of file 71  in  Subtask:  3
Starting for user userid_82316e37-1117-4663-84b4-ddb6455c83b2
Length of file 69  in  Subtask:  1
Length of file 25  in  Subtask:  2
Length of file 35  in  Subtask:  3
Starting for user userid_da52acdd-3cea-483c-86e5-2c2740d371aa
Length of file 171  in  Subtask:  1
Length of file 2  in  Subtask:  2
Failed for user userid_da52acdd-3cea-483c-86e5-2c2740d371aa


 32%|███▏      | 6/19 [00:00<00:00, 14.31it/s]

Starting for user userid_ff56863b-0710-4a58-ad22-4bf2889c9bc0
Length of file 92  in  Subtask:  1
Length of file 32  in  Subtask:  2
Length of file 207  in  Subtask:  3
Starting for user userid_d6555293-35c7-4c27-b522-38d67d2d1b1a
Length of file 42  in  Subtask:  1
Length of file 21  in  Subtask:  2
Starting for user userid_bc9f9425-e867-4876-a5d9-791229916767
Length of file 52  in  Subtask:  1
Length of file 2  in  Subtask:  2
Failed for user userid_bc9f9425-e867-4876-a5d9-791229916767
Starting for user userid_bda49380-37ad-41c5-a109-7fa198a7691a

 42%|████▏     | 8/19 [00:00<00:00, 15.22it/s]


Length of file 89  in  Subtask:  1
Length of file 1  in  Subtask:  2
Failed for user userid_bda49380-37ad-41c5-a109-7fa198a7691a
Starting for user userid_7d014c75-453b-4188-9e4e-fb086984896a
Length of file 91  in  Subtask:  1
Length of file 3  in  Subtask:  2
Starting for user userid_8bf5b71b-dfd4-403f-8791-06427b26ccaf
Length of file 104  in  Subtask:  1
Length of file 2  in  Subtask:  2
Failed for user userid_8bf5b71b-dfd4-403f-8791-06427b26ccaf
Length of file 9  in  Subtask:  3
Starting for user userid_cd3ef507-e3e9-4edb-9222-90a69a9bf272
Length of file 89  in  Subtask:  1


 63%|██████▎   | 12/19 [00:00<00:00, 14.17it/s]

Length of file 2  in  Subtask:  2
Failed for user userid_cd3ef507-e3e9-4edb-9222-90a69a9bf272
Starting for user userid_3abeecbe-327a-441e-be2a-0dd3763c1d45
Length of file 105  in  Subtask:  1
Length of file 2  in  Subtask:  2
Failed for user userid_3abeecbe-327a-441e-be2a-0dd3763c1d45
Starting for user userid_93be0a02-3f7a-46c9-b526-aa956ed00856
Length of file 75  in  Subtask:  1
Length of file 6  in  Subtask:  2
Length of file 3  in  Subtask:  3
Length of file 4  in  Subtask:  4
Length of file 3  in  Subtask:  5
Starting for user userid_e4221e48-0930-40db-af3c-f4de1ed87582
Length of file 85  in  Subtask:  1
Length of file 68  in  Subtask:  2


 89%|████████▉ | 17/19 [00:01<00:00, 18.61it/s]

Length of file 4  in  Subtask:  3
Length of file 49  in  Subtask:  4
Starting for user userid_6d49fab8-273b-4a91-948b-ecd14556b049
Length of file 61  in  Subtask:  1
Length of file 2  in  Subtask:  2
Failed for user userid_6d49fab8-273b-4a91-948b-ecd14556b049
Starting for user userid_733a1ac5-0b01-485e-9b29-ac33932aa240
Length of file 92  in  Subtask:  1
Length of file 1  in  Subtask:  2
Failed for user userid_733a1ac5-0b01-485e-9b29-ac33932aa240
Starting for user userid_a6aab5f5-fdb6-41df-9fc6-221d70f8c6e8
Length of file 89  in  Subtask:  1
Length of file 1  in  Subtask:  2
Failed for user userid_a6aab5f5-fdb6-41df-9fc6-221d70f8c6e8
Starting for user userid_44968286-f204-4ad6-a9b5-d95b38e97866
Length of file 45  in  Subtask:  1
Length of file 2  in  Subtask:  2
Failed for user userid_44968286-f204-4ad6-a9b5-d95b38e97866
Length of file 38  in  Subtask:  3
Starting for user userid_954edb7c-4eae-47ab-9338-5c5c7eccac2d
Length of file 81  in  Subtask:  1
Length of file 2  in  Subtask:  2
F

100%|██████████| 19/19 [00:01<00:00, 15.91it/s]

Length of file 65  in  Subtask:  3
Starting for user userid_8b544d24-3274-4bb0-9719-fd2bccc87b02
Length of file 19  in  Subtask:  1
Length of file 10  in  Subtask:  2
Length of file 26  in  Subtask:  3





In [65]:
failed_files

['userid_ac5b65d7-f260-4534-9fae-d998b726ed32',
 'userid_da52acdd-3cea-483c-86e5-2c2740d371aa',
 'userid_bc9f9425-e867-4876-a5d9-791229916767',
 'userid_bda49380-37ad-41c5-a109-7fa198a7691a',
 'userid_8bf5b71b-dfd4-403f-8791-06427b26ccaf',
 'userid_cd3ef507-e3e9-4edb-9222-90a69a9bf272',
 'userid_3abeecbe-327a-441e-be2a-0dd3763c1d45',
 'userid_6d49fab8-273b-4a91-948b-ecd14556b049',
 'userid_733a1ac5-0b01-485e-9b29-ac33932aa240',
 'userid_a6aab5f5-fdb6-41df-9fc6-221d70f8c6e8',
 'userid_44968286-f204-4ad6-a9b5-d95b38e97866',
 'userid_954edb7c-4eae-47ab-9338-5c5c7eccac2d']

In [70]:
#2D Mann Kendall
saveDir=(str(global_drive_path) +"StationarityTests/ndsi-2d/MKT_Sliding")
failed_files=[]
for i in tqdm(range(len(df_twoD))):
  final_dataframe_3d = pd.DataFrame(columns=['User','MKT Stationary_State_Action'])
  subtask=0
  length_parsedFile=0


  window_dataframe=(df_twoD[i])
  window_dataframe['State_Action']=(window_dataframe['Action']+ window_dataframe['State'])
  window_dataframe['Action']=pd.factorize(window_dataframe['Action'])[0]
  window_dataframe['State']=pd.factorize(window_dataframe['State'])[0]
  window_dataframe['State_Action']=pd.factorize(window_dataframe['State_Action'])[0]
  subtask_values=window_dataframe["Subtask"].unique()
  original_fileLength=len(window_dataframe)

  print("Starting for user", users[i])
  while ((length_parsedFile < original_fileLength) and (subtask <= len(subtask_values))):
    
    window_dataframe_subset=window_dataframe.loc[window_dataframe['Subtask'] == subtask_values[subtask]]
    length_parsedFile+=len(window_dataframe_subset)
    print("Length of file",len(window_dataframe_subset), " in ","Subtask: ",subtask_values[subtask])
    #creating series
    state_action_correlated_series=window_dataframe_subset['State_Action'].values
    # action_only_series=window_dataframe_subset['Action'].values
    # state_only_series=window_dataframe_subset['State'].values
    three_series=[state_action_correlated_series]
    three_series_results=[subtask_values[subtask]]
    for series in (three_series):
      if len(series)>2:
        sTest_state = StationarityTests()
        sTest_state.Mann_Kendall_Stationarity_Test(series, printResults = False)
        three_series_results.append(sTest_state.trend)
        
      else:
        three_series_results.append("Not Enough Data")
        print("Failed for user", users[i])
        failed_files.append(users[i])
    df_length = len(final_dataframe_3d)
    final_dataframe_3d.loc[df_length] = three_series_results
    userfilename= str(users[i]) + "MKT_sliding.csv"
    final_dataframe_3d.to_csv(os.path.join(saveDir,userfilename), index=False)
    subtask +=1
    

  0%|          | 0/19 [00:00<?, ?it/s]

Starting for user userid_ac5b65d7-f260-4534-9fae-d998b726ed32
Length of file 42  in  Subtask:  1


  5%|▌         | 1/19 [00:00<00:15,  1.14it/s]

Length of file 51  in  Subtask:  2
Length of file 19  in  Subtask:  3
Starting for user userid_82316e37-1117-4663-84b4-ddb6455c83b2
Length of file 11  in  Subtask:  1


 11%|█         | 2/19 [00:01<00:10,  1.58it/s]

Length of file 17  in  Subtask:  2
Length of file 21  in  Subtask:  3
Starting for user userid_da52acdd-3cea-483c-86e5-2c2740d371aa
Length of file 57  in  Subtask:  1


 16%|█▌        | 3/19 [00:01<00:08,  1.85it/s]

Length of file 26  in  Subtask:  2
Length of file 28  in  Subtask:  3
Length of file 4  in  Subtask:  4
Length of file 10  in  Subtask:  5
Starting for user userid_ff56863b-0710-4a58-ad22-4bf2889c9bc0
Length of file 91  in  Subtask:  1


 21%|██        | 4/19 [00:02<00:07,  2.12it/s]

Length of file 30  in  Subtask:  2
Length of file 55  in  Subtask:  3
Starting for user userid_d6555293-35c7-4c27-b522-38d67d2d1b1a
Length of file 33  in  Subtask:  1


 26%|██▋       | 5/19 [00:02<00:06,  2.31it/s]

Length of file 11  in  Subtask:  2
Length of file 38  in  Subtask:  3
Starting for user userid_bc9f9425-e867-4876-a5d9-791229916767
Length of file 13  in  Subtask:  1


 32%|███▏      | 6/19 [00:02<00:05,  2.41it/s]

Length of file 1  in  Subtask:  2
Failed for user userid_bc9f9425-e867-4876-a5d9-791229916767
Length of file 18  in  Subtask:  3
Length of file 13  in  Subtask:  4
Starting for user userid_bda49380-37ad-41c5-a109-7fa198a7691a
Length of file 39  in  Subtask:  1


 37%|███▋      | 7/19 [00:03<00:05,  2.38it/s]

Length of file 27  in  Subtask:  2
Length of file 27  in  Subtask:  3
Length of file 36  in  Subtask:  4
Starting for user userid_7d014c75-453b-4188-9e4e-fb086984896a
Length of file 36  in  Subtask:  1


 42%|████▏     | 8/19 [00:03<00:04,  2.40it/s]

Length of file 143  in  Subtask:  2
Length of file 12  in  Subtask:  3
Length of file 4  in  Subtask:  4
Starting for user userid_8bf5b71b-dfd4-403f-8791-06427b26ccaf
Length of file 45  in  Subtask:  1


 47%|████▋     | 9/19 [00:04<00:04,  2.41it/s]

Length of file 25  in  Subtask:  2
Length of file 11  in  Subtask:  3
Starting for user userid_cd3ef507-e3e9-4edb-9222-90a69a9bf272
Length of file 27  in  Subtask:  1


 53%|█████▎    | 10/19 [00:04<00:03,  2.39it/s]

Length of file 17  in  Subtask:  2
Length of file 22  in  Subtask:  3
Starting for user userid_3abeecbe-327a-441e-be2a-0dd3763c1d45
Length of file 27  in  Subtask:  1
Length of file 1  in  Subtask:  2
Failed for user userid_3abeecbe-327a-441e-be2a-0dd3763c1d45
Length of file 9  in  Subtask:  3
Starting for user userid_93be0a02-3f7a-46c9-b526-aa956ed00856
Length of file 28  in  Subtask:  1


 63%|██████▎   | 12/19 [00:04<00:02,  3.09it/s]

Length of file 38  in  Subtask:  2
Length of file 64  in  Subtask:  3
Starting for user userid_e4221e48-0930-40db-af3c-f4de1ed87582
Length of file 14  in  Subtask:  1


 68%|██████▊   | 13/19 [00:05<00:02,  2.81it/s]

Length of file 1  in  Subtask:  2
Failed for user userid_e4221e48-0930-40db-af3c-f4de1ed87582
Length of file 14  in  Subtask:  3
Length of file 18  in  Subtask:  4
Starting for user userid_6d49fab8-273b-4a91-948b-ecd14556b049
Length of file 16  in  Subtask:  1


 74%|███████▎  | 14/19 [00:05<00:01,  2.55it/s]

Length of file 20  in  Subtask:  2
Length of file 35  in  Subtask:  3
Length of file 5  in  Subtask:  4
Starting for user userid_733a1ac5-0b01-485e-9b29-ac33932aa240
Length of file 14  in  Subtask:  1


 79%|███████▉  | 15/19 [00:06<00:01,  2.46it/s]

Length of file 5  in  Subtask:  2
Length of file 24  in  Subtask:  3
Starting for user userid_a6aab5f5-fdb6-41df-9fc6-221d70f8c6e8
Length of file 37  in  Subtask:  1


 84%|████████▍ | 16/19 [00:06<00:01,  2.41it/s]

Length of file 13  in  Subtask:  2
Length of file 10  in  Subtask:  3
Starting for user userid_44968286-f204-4ad6-a9b5-d95b38e97866
Length of file 23  in  Subtask:  1
Length of file 56  in  Subtask:  2
Length of file 3  in  Subtask:  3
Length of file 5  in  Subtask:  4
Starting for user userid_954edb7c-4eae-47ab-9338-5c5c7eccac2d
Length of file 22  in  Subtask:  1


 95%|█████████▍| 18/19 [00:07<00:00,  2.88it/s]

Length of file 17  in  Subtask:  2
Length of file 10  in  Subtask:  3
Length of file 10  in  Subtask:  4
Starting for user userid_8b544d24-3274-4bb0-9719-fd2bccc87b02
Length of file 22  in  Subtask:  1


100%|██████████| 19/19 [00:07<00:00,  2.44it/s]

Length of file 35  in  Subtask:  2
Length of file 2  in  Subtask:  3
Failed for user userid_8b544d24-3274-4bb0-9719-fd2bccc87b02
Length of file 15  in  Subtask:  4





In [71]:
failed_files

['userid_bc9f9425-e867-4876-a5d9-791229916767',
 'userid_3abeecbe-327a-441e-be2a-0dd3763c1d45',
 'userid_e4221e48-0930-40db-af3c-f4de1ed87582',
 'userid_8b544d24-3274-4bb0-9719-fd2bccc87b02']