In [1]:
from experiment import Experiment
from utils.behavior_data import BehaviorData
from utils.content import StatesHandler
from visuals import Plotter
import torch
import numpy as np
from utils.state_data import StateData
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter

In [2]:
elem_map = [
      "Healthy Intake",
      "Unhealthy Intake",
      "Fitness Level",
      "Knowledge"
]

ageMatching = [0, 1, 2, 3]
# ageMatching = [2, 3]
# ageMatching = [0, 1]

# genderMatching = [1]
genderMatching = [1, 3]
# genderMatching = [3]

incomeMatching = [1, 3]
# incomeMatching = [1]
# incomeMatching = [3]

educationMatching = [0, 1, 2, 3]
# educationMatching = [0]
# educationMatching = [1, 2, 3]


respondPerc = 1.00

In [3]:
def sub_adj(**kw):
    cf = {"wspace": 0.2, "hspace": 0.2}
    cf.update(**kw)
    plt.subplots_adjust(**cf)

def ticks(*aa, x=True, y=True, **kw):
    cf = {"rotation": 0}
    for a in aa:
        if x:
            a.tick_params(axis="x", **cf)
        if y:
            a.tick_params(axis="y", **cf)
            

def label(ax, i, l, fi=None, **cf):
    cf = {"rotation": 0, "va": "center_baseline",
          "labelpad": 5, "fontsize": 12,
          "ha": "right",}
    if fi is None:
        fi = i
    if fi < 0:
        ax.set_ylabel(l, **cf)
    else:
        ax[fi].set_ylabel(l, **cf)  
    
def calc_graph_name():
    name = ""
    title = ""
    if (respondPerc == .3):
        title += ">50% Response Rate\n"
    elif (respondPerc == .47):
        title += ">25% Response Rate\n"
    else:
        title += f"Top {respondPerc:.0%} Responders\n"
    name += f"Top{respondPerc}"
    if 1 in genderMatching and 3 in genderMatching:
        None
    elif 1 in genderMatching:
            name += "M"
            title += " Male"
    else:
        name += "F"
        title += " Female"
    
    if 0 in ageMatching and 1 in ageMatching and 2 in ageMatching and 3 in ageMatching:
        None
    elif 0 in ageMatching and 1 in ageMatching:
        name += "18-35"
        title += " 18-35"
    elif 0 in ageMatching:
        name += "18-25"
        title += " 18-25"
    elif 1 in ageMatching:
        name += "26-35"
        title += " 26-35"
    elif 2 in ageMatching and 3 in ageMatching:
        name += "35+"
        title += " 35+"
    elif 2 in ageMatching:
        name += "35-50"
        title += " 35-50"
    elif 3 in ageMatching:
        name += "51+"
        title += " 51+"
    
    if 1 in incomeMatching and 3 in incomeMatching:
        None
    elif 3 in incomeMatching:
        name += "Inc"
        title += " Income"
    elif 1 in incomeMatching:
        name += "NoInc"
        title += " No Income"
        
    if 0 in educationMatching and 1 in educationMatching and 2 in educationMatching and 3 in educationMatching:
        None
    elif 1 in educationMatching and 2 in educationMatching and 3 in educationMatching:
        name += "MoreEd"
        title += " Some Higher Ed.+"
    elif 0 in educationMatching:
        name += "SomeEd"
        title += " Some Ed."
    elif 1 in educationMatching:
        name += "SomeHigherEd"
        title += "Some Higher Ed."
    elif 2 in educationMatching:
        name += "Degree"
        title += " Degree"
    elif 3 in educationMatching:
        name += "PostGrad"
        name += " Post Grad Ed."
    
    name += ".png"
        
    return name, title

def plot_state_elem_running_change(data, title="Individual Participant Changes", name="BarChange", aiset=True, dataSubset=[]):
    numFigs = len(dataSubset)
    if numFigs == 0:
        numFigs = len(data[0])
        dataSubset = np.arange(numFigs)
    plt.clf()
    L = len(data)
    fig, ax = plt.subplots(nrows=numFigs, figsize=(6,1.5+2.1*numFigs))
    x = np.arange(L)
    for i, elem in enumerate(dataSubset):
        diff = data[:, elem]
        diff = np.sort(diff)[::-1]
        C = np.array(["r"] * diff.shape[0])
        imp = diff > 0
        dec = diff < 0
        imp_perc = imp.sum() / imp.shape[0]
        dec_perc = dec.sum() / dec.shape[0]
        C[imp] = "c"
        C[dec] = "r"
        imp_stop = np.where(imp==True)[0][-1]
        dec_start = np.where(dec==True)[0][0]
        if (numFigs > 1):
            thisPlot = ax[i]
        else:
            thisPlot = ax
        thisPlot.axvline(imp_stop+.4, alpha=0.8, ymin=0.5, linestyle="--", color="c", label="Improvement")
        thisPlot.axvline(dec_start-.4, alpha=0.8, ymax=0.5, linestyle="--", color="r", label="Deterioration")
        thisPlot.axhline(diff.mean(), xmin=0, xmax=1, color="violet", linestyle="-.", label=f"Mean\n={diff.mean():.3f}")
        thisPlot.text(int(imp.sum()*.85), -.45, f"{imp_perc*100:.1f}%")
        thisPlot.text((~dec).sum(), .3, f"{dec_perc*100:.1f}%")
        thisPlot.bar(x, diff, color=C)
        thisPlot.set_ylim((-2,2))
        if (numFigs > 1):
            label(ax, i, elem_map[elem])
        else:
            label(ax, -1, elem_map[elem])
        thisPlot.axhline(0, alpha=0.8, linestyle="--", color="k")
        if i < (L-1):
            thisPlot.get_xaxis().set_visible(False)
        thisPlot.legend(loc="lower left", ncol=2, columnspacing=1.0)
        if i == (L-1):
            thisPlot.set_xlabel("Participant ID")
        ticks(thisPlot)
        
    sub_adj()
    if (aiset):
        if (respondPerc == .3):
            title += "\n>50% Response Rate\n"
        elif (respondPerc == .47):
            title += "\n>25% Response Rate\n"
        else:
            title += f"\nTop {respondPerc:.0%} Responders\n"
        name += f"Top{respondPerc}"
    else:
        title += f"\nNon AI Group\n"
        name += "Base"
    if 1 in genderMatching and 3 in genderMatching:
        None
    elif 1 in genderMatching:
        name += "M"
        title += " Male"
    else:
        name += "F"
        title += " Female"
        
    if 1 in ageMatching and 2 in ageMatching and 3 in ageMatching and 0 in ageMatching:
        None
    elif 0 in ageMatching and 1 in ageMatching:
        name += "18-35"
        title += " 18-35"
    elif 0 in ageMatching:
        name += "18-25"
        title += " 18-25"
    elif 1 in ageMatching:
        name += "26-35"
        title += " 26-35"
    elif 2 in ageMatching and 3 in ageMatching:
        name += "35+"
        title += " 35+"
    elif 2 in ageMatching:
        name += "35-50"
        title += " 35-50"
    elif 3 in ageMatching:
        name += "51+"
        title += " 51+"
    
    if 1 in incomeMatching and 3 in incomeMatching:
        None
    elif 3 in incomeMatching:
        name += "Inc"
        title += " Income"
    elif 1 in incomeMatching:
        name += "NoInc"
        title += " No Income"
        
    if 0 in educationMatching and 1 in educationMatching and 2 in educationMatching and 3 in educationMatching:
        None
    elif 1 in educationMatching and 2 in educationMatching and 3 in educationMatching:
        name += "MoreEd"
        title += " Some Higher Ed.+"
    elif 0 in educationMatching:
        name += "SomeEd"
        title += " Some Ed."
    elif 1 in educationMatching:
        name += "SomeHigherEd"
        title += "Some Higher Ed."
    elif 2 in educationMatching:
        name += "Degree"
        title += " Degree"
    elif 3 in educationMatching:
        name += "PostGrad"
        name += " Post Grad Ed."
    
        
            
    if title.endswith("\n"):
        title = title[0:-1]
    if numFigs > 1:
        ax[0].set_title(title, fontsize=14)
    else:
        ax.set_title(title, fontsize=14)
    fig.tight_layout()
    fig.savefig(f"./imgDet/{name}.png")
    plt.clf()


def load_questionnaire_states(endline=False, detail=0, aiset=True):
        if (detail > 2):
            sh = StatesHandler(map="map_questionnaire_final.json", endline=endline)
        elif (detail > 1):
            sh = StatesHandler(map="map_individual.json", endline=endline)
        elif (detail > 0):
            sh = StatesHandler(map="map_detailed.json", endline=endline)
        elif (detail > -1):
            sh = StatesHandler(map="map.json", endline=endline)
        else:
            sh = StatesHandler(map="map_traditional.json", endline=endline)
        whatsapps, states, slist = sh.compute_states()
        def modify_whatsapp(x):
            # helper function to parse the whatsapp numbers
            x = str(x)
            x = x[len(x)-10:]
            return int(x)
        participantIDs = torch.tensor(np.loadtxt("arogya_content/all_ai_participants.csv", delimiter=",", skiprows=1, dtype="int64"))
        participantIDs[:, 1].apply_(modify_whatsapp)
        
        # filter responses to only include ones in the AI participant set
        isect, idIdxs, stateIdxs = np.intersect1d(participantIDs[:, 1], whatsapps, return_indices=True)
        if (aiset):
            # combine the glific IDs with the states into a dictionary and return
            return dict(zip(participantIDs[idIdxs, 0].numpy(), states[stateIdxs].numpy()))
        else:
            stats = np.delete(states.numpy(), stateIdxs, axis=0)
            wapps = np.delete(whatsapps.numpy(), stateIdxs)
            return dict(zip(wapps, stats))
        
post = load_questionnaire_states(True, -1, True)
print(len(post))
pre = load_questionnaire_states(False, -1, True)
print(len(pre))


diffs = []
demos = []

bd = BehaviorData(minw=2, maxw=31, include_state=True, include_pid=False, top_respond_perc=respondPerc)

for glifid in post.keys():
    if glifid in pre and glifid in bd.data['pid'].to_numpy():
        diffs.append(post[glifid] - pre[glifid])
        demos.append(pre[glifid][-5:])
    else:
        None
        #print(glifid)
    
print(len(diffs))

demos = np.array(demos)
allDiffValsAI = np.array(diffs)[:, 0:-5]

480
806
0 537
pmsg_sids
paction_sids
pmsg_ids
qids
response
(12888, 70) (12888, 8) 70
['state' 'state' 'state' 'state' 'state' 'state' 'state' 'state' 'state'
 'state' 'state' 'state' 'state' 'state' 'state' 'state' 'state' 'state'
 'state' 'state' 'state' 'state' 'pmsg_sids_last_0_q1'
 'pmsg_sids_last_0_q1' 'pmsg_sids_last_0_q1' 'pmsg_sids_last_0_q1'
 'pmsg_sids_last_0_q1' 'pmsg_sids_last_0_q2' 'pmsg_sids_last_0_q2'
 'pmsg_sids_last_0_q2' 'pmsg_sids_last_0_q2' 'pmsg_sids_last_0_q2'
 'paction_sids_last_0_q1' 'paction_sids_last_0_q1'
 'paction_sids_last_0_q1' 'paction_sids_last_0_q1'
 'paction_sids_last_0_q1' 'paction_sids_last_0_q2'
 'paction_sids_last_0_q2' 'paction_sids_last_0_q2'
 'paction_sids_last_0_q2' 'paction_sids_last_0_q2' 'pmsg_ids_last_0_q1'
 'pmsg_ids_last_0_q1' 'pmsg_ids_last_0_q1' 'pmsg_ids_last_0_q1'
 'pmsg_ids_last_0_q1' 'pmsg_ids_last_0_q1' 'pmsg_ids_last_0_q2'
 'pmsg_ids_last_0_q2' 'pmsg_ids_last_0_q2' 'pmsg_ids_last_0_q2'
 'pmsg_ids_last_0_q2' 'pmsg_ids_last_0_q2' '

In [4]:
ages, counts = np.unique(demos[:, 0], return_counts=True)
print("Age brackets (18-25, 26-35, 36-50, 51+): ", counts)
genders, counts = np.unique(demos[:, 1], return_counts=True)
print("Gender (M, F): ", counts)
incomes, counts = np.unique(demos[:, 3], return_counts=True)
print("Income (N, Y): ", counts)
edus, counts = np.unique(demos[:, 4], return_counts=True)
print("Education (Some, Some Higher, Degree, PostGrad): ", counts)
# fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4));
# ax.boxplot(demos[:, 0], meanline=True, showmeans=True);
# ax.set(title="Age")
ageMatch = np.isin(demos[:, 0], ageMatching)
genderMatch = np.isin(demos[:, 1], genderMatching)
allMatch = ageMatch & genderMatch
incomeMatch = np.isin(demos[:, 3], incomeMatching)
allMatch = allMatch & incomeMatch
eduMatch = np.isin(demos[:, 4], educationMatching)
allMatch = allMatch & eduMatch

diffValsAI = allDiffValsAI[allMatch]

print(len(diffValsAI))

Age brackets (18-25, 26-35, 36-50, 51+):  [110 165 169  33]
Gender (M, F):  [186 291]
Income (N, Y):  [ 77 400]
Education (Some, Some Higher, Degree, PostGrad):  [230 127  84  36]
477


In [5]:
post = load_questionnaire_states(True, -1, False)
print(len(post))
pre = load_questionnaire_states(False, -1, False)
print(len(pre))


diffs = []
demos = []

for glifid in post.keys():
    if glifid in pre:
        diffs.append(post[glifid] - pre[glifid])
        demos.append(pre[glifid][-5:])
    else:
        None
        
allDiffValsNAI = np.array(diffs)[:, 0:-5]
demos = np.array(demos)
print(len(diffs))

568
807
441


In [6]:
ages, counts = np.unique(demos[:, 0], return_counts=True)
print("Age brackets (18-25, 26-35, 36-50, 51+): ", counts)
genders, counts = np.unique(demos[:, 1], return_counts=True)
print("Gender (M, F): ", counts)
incomes, counts = np.unique(demos[:, 3], return_counts=True)
print("Income (N, Y): ", counts)
edus, counts = np.unique(demos[:, 4], return_counts=True)
print("Education (Some, Some Higher, Degree, PostGrad): ", counts)
# fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4));
# ax.boxplot(demos[:, 0], meanline=True, showmeans=True);
# ax.set(title="Age")
ageMatch = np.isin(demos[:, 0], ageMatching)
genderMatch = np.isin(demos[:, 1], genderMatching)
allMatch = ageMatch & genderMatch
incomeMatch = np.isin(demos[:, 3], incomeMatching)
allMatch = allMatch & incomeMatch
eduMatch = np.isin(demos[:, 4], educationMatching)
allMatch = allMatch & eduMatch
diffValsNAI = allDiffValsNAI[allMatch]

print(len(diffValsNAI))

Age brackets (18-25, 26-35, 36-50, 51+):  [114 141 155  31]
Gender (M, F):  [165 276]
Income (N, Y):  [ 97 344]
Education (Some, Some Higher, Degree, PostGrad):  [212 118  76  35]
441


In [7]:
plot_state_elem_running_change(diffValsAI, aiset=True)
plot_state_elem_running_change(diffValsNAI, aiset=False)

# plot_state_elem_running_change(diffValsAI, aiset=True, dataSubset=[3, 0, 1], name="Consumption")
# plot_state_elem_running_change(diffValsNAI, aiset=False, dataSubset=[3, 0, 1], name="Consumption")

# plot_state_elem_running_change(diffValsAI, aiset=True, dataSubset=[10, 11], name="Knowledge")
# plot_state_elem_running_change(diffValsNAI, aiset=False, dataSubset=[10, 11], name="Knowledge")

# plot_state_elem_running_change(diffValsAI, aiset=True, dataSubset=[3, 8, 9, 10], name="Summary")
# plot_state_elem_running_change(diffValsNAI, aiset=False, dataSubset=[3, 8, 9, 10], name="Summary")

# plot_state_elem_running_change(diffValsAI, aiset=True, dataSubset=[8, 9, 7], name="Exercise")
# plot_state_elem_running_change(diffValsNAI, aiset=False, dataSubset=[8, 9, 7], name="Exercise")

# plot_state_elem_running_change(diffValsAI, aiset=True, dataSubset=[9], name="ExerciseInc")
# plot_state_elem_running_change(diffValsNAI, aiset=False, dataSubset=[9], name="ExerciseInc")

# plot_state_elem_running_change(diffValsAI, aiset=True, dataSubset=[8, 9], name="AvgEx")
# plot_state_elem_running_change(diffValsNAI, aiset=False, dataSubset=[8, 9], name="AvgEx")

diffMeansAI = diffValsAI.mean(axis=0)
diffMeansNAI = diffValsNAI.mean(axis=0)

print(f'{"Component":50}\t{"Baseline Mean":15}\t{"AI Mean":10}')

print("_____________________________________________________________________________________________")

for x in range(len(elem_map)):
    print(f'{elem_map[x]:50}:\t{diffMeansNAI[x]:.5f},\t{diffMeansAI[x]:.5f}')
    

nameString, titleString = calc_graph_name()


fig, ax = plt.subplots(nrows=len(elem_map), ncols=2, sharey="row", figsize=(8, 25));
for x in range(len(elem_map)):
    ax[x, 0].boxplot(diffValsNAI[:, x], meanline=True, showmeans=True);
    ax[x, 0].set(title="Base " + elem_map[x], ylabel="Change")
    ax[x, 1].boxplot(diffValsAI[:, x], meanline=True, showmeans=True);
    ax[x, 1].set(title="AI " + elem_map[x], ylabel="Change")
plt.subplots_adjust(hspace=.25, wspace=.5)
fig.suptitle(titleString, fontsize=14, verticalalignment='top')
fig.tight_layout(rect=[0, 0, 1, 0.99])
fig.savefig(f"./imgDet/Box{nameString}")
plt.clf()


fig, ax = plt.subplots(nrows=len(elem_map), ncols=2, figsize=(8, 25));
for x in range(len(elem_map)):
    ax[x, 0].hist(diffValsNAI[:, x], weights = np.ones(len(diffValsNAI[:, x])) / len(diffValsNAI[:, x]));
    ax[x, 0].set(title="Base " + elem_map[x], xlabel="Change")
    ax[x, 0].yaxis.set_major_formatter(PercentFormatter(1))
    ax[x, 1].hist(diffValsAI[:, x], weights = np.ones(len(diffValsAI[:, x])) / len(diffValsAI[:, x]));
    ax[x, 1].set(title="AI " + elem_map[x], xlabel="Change")
    ax[x, 1].yaxis.set_major_formatter(PercentFormatter(1))
plt.subplots_adjust(hspace=.30, wspace=.5)
fig.suptitle(titleString, fontsize=14, verticalalignment='top')
fig.tight_layout(rect=[0, 0, 1, .99])
fig.savefig(f"./imgDet/Hist{nameString}")
plt.clf()



fig, ax = plt.subplots(nrows=len(elem_map), ncols=1, figsize=(4, 25));
for x in range(len(elem_map)):
    ax[x].hist([diffValsNAI[:, x], diffValsAI[:, x]], color=['tan', 'blue'], label=['Base', 'AI'], stacked=False, weights = [np.ones(len(diffValsNAI[:, x])) / len(diffValsNAI[:, x]), np.ones(len(diffValsAI[:, x])) / len(diffValsAI[:, x])]);
    ax[x].set(title=elem_map[x], xlabel="Change")
    ax[x].yaxis.set_major_formatter(PercentFormatter(1))
    ax[x].legend()
plt.subplots_adjust(hspace=.40, wspace=.25)
fig.suptitle(titleString, fontsize=14, verticalalignment='top')
fig.tight_layout(rect=[0, 0, 1, 0.99])
fig.savefig(f"./imgDet/BothHist{nameString}")
plt.clf()


Component                                         	Baseline Mean  	AI Mean   
_____________________________________________________________________________________________
Healthy Intake                                    :	-0.03039,	-0.03564
Unhealthy Intake                                  :	0.04762,	0.03354
Fitness Level                                     :	0.06803,	0.08574
Knowledge                                         :	0.15629,	0.13611


<Figure size 640x480 with 0 Axes>

<Figure size 600x990 with 0 Axes>

<Figure size 600x990 with 0 Axes>

<Figure size 800x2500 with 0 Axes>

<Figure size 800x2500 with 0 Axes>

<Figure size 400x2500 with 0 Axes>