<a href="https://colab.research.google.com/github/AvantiShri/gcp_analysis/blob/main/BulkAnalyze_standardGCPpvals.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
#mount google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
import glob
import json
import numpy as np
import scipy

def get_chisquareminus1(egg_values):
  #print("Num nan:", np.sum(np.isnan(egg_values)))
  #Radin 2023 (Anomalous entropic effects in physical systems associated
  # with collective consciousness) said "All individual samples within a matrix
  # less than 55 or greater than 145 were set to nan" so we do that here
  egg_values = np.where((egg_values < 55), np.nan, egg_values)
  egg_values = np.where((egg_values > 145), np.nan, egg_values)
  #print("Num nan post mask:", np.sum(np.isnan(egg_values)))

  num_nonnan_eggs = np.sum(np.isnan(egg_values)==False, axis=1) #get the number of non NaN eggs per row
  #If any rows are all-nan, aboort as if we dropped that row it would mess
  # up the temporal spacing
  #assert (np.sum(num_nonnan_eggs==0)==0), np.sum(num_nonnan_eggs==0)

  sum_eggs = np.nansum(egg_values, axis=1) #get the sum across all eggs - NaNs are automatically skipped
  z_sum_eggs = (sum_eggs - num_nonnan_eggs*100)/np.sqrt(num_nonnan_eggs*200*0.25) #get z scores as per a binomial dist

  return np.square(z_sum_eggs)-1, z_sum_eggs

%cd /content/drive/MyDrive/GCP_data

#first get the chisquare stats for all the events
all_events = [(x.split("_")[0]).split("/")[1] for x in glob.glob("extracted/Event*.json")]

event_to_sumcsm1pval = {}
event_to_sumcsm1z = {}

for event in all_events:
  metadata = json.load(open("extracted/"+event+"_metadata.json"))
  event_name = metadata["Name"]
  egg_values = np.load("extracted/"+event+"_test_eggvalues.npy")
  csm1, z_sum_eggs = get_chisquareminus1(egg_values)
  #do the statistical test...since there are atleast 3600 such variables,
  # their sum is normally distribution
  # a chi-square dist with k deg of freedom has variance 2k
  sum_csm1_z = np.sum(csm1)/(np.sqrt(2*len(csm1)))
  sum_csm1_pval = 1 - scipy.stats.norm.cdf(sum_csm1_z)
  event_to_sumcsm1z[event] = sum_csm1_z
  event_to_sumcsm1pval[event] = sum_csm1_pval

  print(event,event_name,sum_csm1_z,sum_csm1_pval)

/content/drive/MyDrive/GCP_data
Event247 Peru Earthquake 0.70001045615229 0.2419603872603413
Event249 Global OM 0.272611990125704 0.3925757470758531
Event248 Burning Man 1.5233261766563233 0.06383856064317239
Event245 Fire The Grid 1.4989513440871953 0.06694312751333098
Event246 Minneapolis Bridge Collapse -0.35944010384461944 0.6403670610184301
Event243 Palestinian Government Falls 0.7912074343100255 0.21441147696981844
Event244 Live Earth 0.4403533984192942 0.32984058571770636
Event242 Lightning Strikes Giuliani 0.6002151219106447 0.2741814384634136
Event239 Massacre at Virginia Tech 1.2894825913047963 0.09861518234112998
Event240 Tai Chi Chigong Day 0.545176061930644 0.292816216838626
Event238 Solomon Islands Quake 0.7183005909502939 0.23628598294160552
Event319 Pakistan Volleyball Bombing -1.4188463892718581 0.922028097552849
Event317 New Year Mean 2010 0.7788893385180765 0.21802245240766416
Event316 Bomb Attempt Dec 25 0.2860126941457468 0.38743419949325153
Event315 Haj Stoning 0.

In [4]:
def get_meta_pval(pvals):
  chisquare_stat = -2*np.sum(np.log(pvals))
  deg_freedom = 2*len(pvals)
  return (1 - scipy.stats.chi2.cdf(chisquare_stat, df=deg_freedom))

get_meta_pval(list(event_to_sumcsm1pval.values()))

2.313402247544616e-09

In [6]:
#write the p values to a file
with open("event_to_sumcsm1pval.json",'w') as f:
  f.write(json.dumps(event_to_sumcsm1pval))

with open("event_to_sumcsm1z.json",'w') as f:
  f.write(json.dumps(event_to_sumcsm1z))