In [104]:
import numpy as np
import glob
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
from scipy.stats import wilcoxon

In [105]:
fileNames2D = np.sort(glob.glob("data/NDSI-2D/U_*"))

In [106]:
def get_probabilities(dataframe,current_phase):
    map = {'Sensemaking': 0, 'Foraging': 0, 'Navigation': 0}
    for state in dataframe['State']:
        map[state] += 1
    return map[current_phase] / (map['Foraging'] + map['Sensemaking'] + map['Navigation'])


In [107]:
class StationarityTests:
    def __init__(self, significance=.05):
        self.SignificanceLevel = significance
        self.pValue = None
        self.isStationary = None
        self.trend= None

    def Wilcoxon_Test(self,series_X, series_Y):

        return wilcoxon(series_X, series_Y)


In [108]:

all_pvalue=[]
for state in ['Foraging','Navigation','Sensemaking']:
    print(f"\nProcessing test for state: {state}")
     # Create an empty DataFrame to store test data
    test_dfs = pd.DataFrame(columns=['User', 'First_Half', 'Second_Half'])
    # Iterate through file names
    for i, u in enumerate(fileNames2D):
        # Read CSV file
        df = pd.read_csv(u)

        # Extract user information from file path
        u = u.lstrip('data/NDSI-2D\\')
        u = u.rstrip('.csv')

        #print(f"\nProcessing data for user: {u}")

        # Split the DataFrame into two halves
        mid = round(len(df) / 2)
        slice1 = df[:mid].reset_index(drop=True)
        slice2 = df[mid:].reset_index(drop=True)

        # Calculate Navigation probabilities for each half
        probab_1 = get_probabilities(slice1,state)
        probab_2 = get_probabilities(slice2,state)

        #print(f"Probabilities for {u} - First Half: {probab_1}, Second Half: {probab_2}")

        # Create a DataFrame with user, first_half, and second_half probabilities
        user_df = pd.DataFrame({'User': [u], 'First_Half': [probab_1], 'Second_Half': [probab_2]})

        # Concatenate the result to the test_dfs DataFrame for all users
        test_dfs = pd.concat([test_dfs, user_df], ignore_index=True)

    # Perform Wilcoxon signed-rank test on the probabilities of the two halves
    stats_test = StationarityTests()
    result = stats_test.Wilcoxon_Test(test_dfs['First_Half'], test_dfs['Second_Half'])
    print(result)
    del test_dfs

    print(f"State: {state} Users are Non-stationary {result.pvalue<0.05}")
    all_pvalue.append(result.pvalue)



Processing test for state: Foraging
WilcoxonResult(statistic=82.0, pvalue=0.6008710115741303)
State: Foraging Users are Non-stationary False

Processing test for state: Navigation
WilcoxonResult(statistic=28.0, pvalue=0.002712249755859375)
State: Navigation Users are Non-stationary True

Processing test for state: Sensemaking
WilcoxonResult(statistic=45.0, pvalue=0.023950576782226562)
State: Sensemaking Users are Non-stationary True


  test_dfs = pd.concat([test_dfs, user_df], ignore_index=True)
  test_dfs = pd.concat([test_dfs, user_df], ignore_index=True)
  test_dfs = pd.concat([test_dfs, user_df], ignore_index=True)


In [111]:
from scipy import stats
stats.false_discovery_control(all_pvalue,method='bh')

array([0.60087101, 0.00813675, 0.03592587])

In [112]:
from statsmodels.stats.multitest import multipletests
multipletests(all_pvalue,alpha=0.05,method='bonferroni')

(array([False,  True, False]),
 array([1.        , 0.00813675, 0.07185173]),
 0.016952427508441503,
 0.016666666666666666)