In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
%matplotlib inline

In [None]:
def summ(num1, *args):
    total = num1
    for num in args:
        total = total + num
    return total

In [None]:
eco_df = pd.read_excel('../data/TEIS-NSS Project Data 10-2022.xlsx',sheet_name=('ECO with Exit21-22'))
elig_df = pd.read_excel('../data/TEIS-NSS Project Data 10-2022.xlsx',sheet_name=('Elig Timeline Rpt 2018-2022'))

In [None]:
eco_df.head()

In [None]:
eco_df.drop(eco_df.columns.difference(['CHILD_ID', 'Ent SOCIAL_SCALE', 'Ent KNOWLEDGE_SCALE', 'Ent APPROPRIATE_ACTION_SCALE', 'Exit SOCIAL_SCALE', 'Exit KNOWLEDGE_SCALE', 'Exit APPROPRIATE_ACTION_SCALE', 'OC1 - a', 'OC1 - b', 'OC1 - c', 'OC1 - d', 'OC1 - e', 'OC1 Data Check', 'OC2 - a', 'OC2 - b', 'OC2 - c', 'OC2 - d', 'OC2 - e', 'OC2 Data Check', 'OC3 - a', 'OC3 - b', 'OC3 - c', 'OC3 - d', 'OC3 - e', 'OC3 Data Check']), axis=1, inplace=True)
display(eco_df)

In [None]:
elig_df.head()

In [None]:
elig_df.drop(elig_df.columns.difference(['Child ID', 'Init. Elig. Category']), axis=1, inplace=True)
print(elig_df)

In [None]:
eco_df = eco_df[eco_df['OC2 Data Check'] != 0]
display(eco_df)

In [None]:
eco_df = eco_df[eco_df['OC2 Data Check'].notna()]
display(eco_df)

In [None]:
eco_df.rename({'CHILD_ID':'child_id', 'Ent SOCIAL_SCALE':'ent_social', 'Ent KNOWLEDGE_SCALE':'ent_knowledge', 'Ent APPROPRIATE_ACTION_SCALE':'ent_app_action', 'Exit SOCIAL_SCALE':'exit_social', 'Exit KNOWLEDGE_SCALE':'exit_knowledge', 'Exit APPROPRIATE_ACTION_SCALE':'exit_app_action', 'OC1 - a':'oc1a', 'OC1 - b':'oc1b', 'OC1 - c':'oc1c', 'OC1 - d':'oc1d', 'OC1 - e':'oc1e', 'OC1 Data Check':'oc1_data_check', 'OC2 - a':'oc2a', 'OC2 - b':'oc2b', 'OC2 - c':'oc2c', 'OC2 - d':'oc2d', 'OC2 - e':'oc2e', 'OC2 Data Check':'oc2_data_check', 'OC3 - a':'oc3a', 'OC3 - b':'oc3b', 'OC3 - c':'oc3c', 'OC3 - d':'oc3d', 'OC3 - e':'oc3e', 'OC3 Data Check':'oc3_data_check'}, axis=1, inplace=True)

In [None]:
elig_df.rename({'Child ID':'child_id', 'Init. Elig. Category':'init_elig_category'}, axis=1, inplace=True)

In [None]:
eco_elig_combined = pd.merge(eco_df,elig_df, on='child_id')
eco_elig_combined.info()

In [None]:
diff_by_elig = eco_elig_combined[['ent_social', 'ent_knowledge', 'ent_app_action', 'exit_social', 'exit_knowledge', 'exit_app_action', 'init_elig_category']]
diff_by_elig['social_diff'] = diff_by_elig['exit_social'] - diff_by_elig['ent_social']
diff_by_elig['knowledge_diff'] = diff_by_elig['exit_knowledge'] - diff_by_elig['ent_knowledge']
diff_by_elig['app_action_diff'] = diff_by_elig['exit_app_action'] - diff_by_elig['ent_app_action']
diff_by_elig = diff_by_elig[['social_diff', 'knowledge_diff', 'app_action_diff', 'init_elig_category']]
display(diff_by_elig)

## Sum improvement on all fronts based on elig category

In [None]:
sum_improvement = diff_by_elig.groupby('init_elig_category').sum()
sum_improvement['category_count'] = diff_by_elig.groupby(['init_elig_category'])['init_elig_category'].count()
display(sum_improvement)

## Avg improvement on all fronts based on elig category

In [None]:
avg_improvement = diff_by_elig.groupby('init_elig_category').mean()
display(avg_improvement)

## Probability of applicant improving 

#### Counts of outcomes for the social category. 

In [None]:
social_outcomes = diff_by_elig['social_diff']
social_gain = 0
social_same = 0
social_loss = 0
for social_outcomes['social_diff'] in social_outcomes:
    if social_outcomes['social_diff'] > 0:
        social_gain += 1
    elif social_outcomes['social_diff'] == 0:
        social_same += 1
    else:
        social_loss += 1

#### Counts of outcomes for the knowledge category.

In [None]:
knowledge_outcomes = diff_by_elig['knowledge_diff']
knowledge_gain = 0
knowledge_same = 0
knowledge_loss = 0
for knowledge_outcomes['knowledge_diff'] in knowledge_outcomes:
    if knowledge_outcomes['knowledge_diff'] > 0:
        knowledge_gain += 1
    elif knowledge_outcomes['knowledge_diff'] == 0:
        knowledge_same += 1
    else:
        knowledge_loss += 1

#### Counts of outcomes for the app action category.

In [None]:
app_action_outcomes = diff_by_elig['app_action_diff']
action_gain = 0
action_same = 0
action_loss = 0
for app_action_outcomes['app_action_diff'] in app_action_outcomes:
    if app_action_outcomes['app_action_diff'] > 0:
        action_gain += 1
    elif app_action_outcomes['app_action_diff'] == 0:
        action_same += 1
    else:
        action_loss += 1

#### Data frame with the outcome counts.

In [None]:
outcome_data = {'outcome' : ['gain', 'stayed the same', 'loss'], 'social' : [social_gain, social_same, social_loss], 'knowledge' : [knowledge_gain, knowledge_same, knowledge_loss], 'app_action' : [action_gain, action_same, action_loss]}
outcome_counts = pd.DataFrame(outcome_data)
display(outcome_counts)

#### Data frame with the probability of each outcome.

In [None]:
outcome_probability_data = {'outcome' : ['gain', 'stay the same', 'loss'], 'social_probability' : [social_gain/summ(social_gain,social_loss,social_same), social_same/summ(social_gain,social_loss,social_same), social_loss/summ(social_gain,social_loss,social_same)], 'knowledge_probability' : [knowledge_gain/summ(knowledge_gain,knowledge_loss,knowledge_same), knowledge_same/summ(knowledge_gain,knowledge_loss,knowledge_same), knowledge_loss/summ(knowledge_gain,knowledge_loss,knowledge_same)], 'app_action_probability' : [action_gain/summ(action_gain,action_loss,action_same), action_same/summ(action_gain,action_loss,action_same), action_loss/summ(action_gain,action_loss,action_same)]}
outcome_probability = pd.DataFrame(outcome_probability_data)
display(outcome_probability)

## Probability of applicant improving based on the elig category

### Developmental Evaluation:

In [None]:
diff_by_dev_eval = diff_by_elig[(diff_by_elig.init_elig_category == 'Developmental Evaluation')]
display(diff_by_dev_eval)

#### Counts of social outcomes for the Developmental Evaluation category.

In [None]:
social_outcomes_de = diff_by_dev_eval['social_diff']
social_gain_de = 0
social_same_de = 0
social_loss_de = 0
for social_outcomes_de['social_diff'] in social_outcomes_de:
    if social_outcomes_de['social_diff'] > 0:
        social_gain_de += 1
    elif social_outcomes_de['social_diff'] == 0:
        social_same_de += 1
    else:
        social_loss_de += 1

#### Counts of knowledge outcomes for the Developmental Evaluation category.

In [None]:
knowledge_outcomes_de = diff_by_dev_eval['knowledge_diff']
knowledge_gain_de = 0
knowledge_same_de = 0
knowledge_loss_de = 0
for knowledge_outcomes_de['knowledge_diff'] in knowledge_outcomes_de:
    if knowledge_outcomes_de['knowledge_diff'] > 0:
        knowledge_gain_de += 1
    elif knowledge_outcomes_de['knowledge_diff'] == 0:
        knowledge_same_de += 1
    else:
        knowledge_loss_de += 1

#### Counts of app action outcomes by the Developmental Evaluation category.

In [None]:
app_action_outcomes_de = diff_by_dev_eval['app_action_diff']
action_gain_de = 0
action_same_de = 0
action_loss_de = 0
for app_action_outcomes_de['app_action_diff'] in app_action_outcomes_de:
    if app_action_outcomes_de['app_action_diff'] > 0:
        action_gain_de += 1
    elif app_action_outcomes_de['app_action_diff'] == 0:
        action_same_de += 1
    else:
        action_loss_de += 1

#### Data frame with outcome counts for Developmental Evaluation.

In [None]:
outcome_data_de = {'outcome' : ['gain', 'stayed the same', 'loss'], 'social' : [social_gain_de, social_same_de, social_loss_de], 'knowledge' : [knowledge_gain_de, knowledge_same_de, knowledge_loss_de], 'app_action' : [action_gain_de, action_same_de, action_loss_de]}
dev_eval_counts = pd.DataFrame(outcome_data_de)
display(dev_eval_counts)

#### Data frame with the probability for each outcome in the Developmental Evaluation category.

In [None]:
outcome_probability_data_de = {'outcome' : ['gain', 'stay the same', 'loss'], 'social_probability' : [social_gain_de/summ(social_gain_de,social_loss_de,social_same_de), social_same_de/summ(social_gain_de,social_loss_de,social_same_de), social_loss_de/summ(social_gain_de,social_loss_de,social_same_de)], 'knowledge_probability' : [knowledge_gain_de/summ(knowledge_gain_de,knowledge_loss_de,knowledge_same_de), knowledge_same_de/summ(knowledge_gain_de,knowledge_loss_de,knowledge_same_de), knowledge_loss_de/summ(knowledge_gain_de,knowledge_loss_de,knowledge_same_de)], 'app_action_probability' : [action_gain_de/summ(action_gain_de,action_loss_de,action_same_de), action_same_de/summ(action_gain_de,action_loss_de,action_same_de), action_loss_de/summ(action_gain_de,action_loss_de,action_same_de)]}
dev_eval_probability = pd.DataFrame(outcome_probability_data_de)
display(dev_eval_probability)

### Diagnosed Condition:

In [None]:
diff_by_dia_con = diff_by_elig[(diff_by_elig.init_elig_category == 'Diagnosed Condition')]
display(diff_by_dia_con)

#### Counts of social outcomes for the Diagnosed Condition category.

In [None]:
social_outcomes_dc = diff_by_dia_con['social_diff']
social_gain_dc = 0
social_same_dc = 0
social_loss_dc = 0
for social_outcomes_dc['social_diff'] in social_outcomes_dc:
    if social_outcomes_dc['social_diff'] > 0:
        social_gain_dc += 1
    elif social_outcomes_dc['social_diff'] == 0:
        social_same_dc += 1
    else:
        social_loss_dc += 1

#### Counts of knowledge outcomes for the Diagnosed Condition category.

In [None]:
knowledge_outcomes_dc = diff_by_dia_con['knowledge_diff']
knowledge_gain_dc = 0
knowledge_same_dc = 0
knowledge_loss_dc = 0
for knowledge_outcomes_dc['knowledge_diff'] in knowledge_outcomes_dc:
    if knowledge_outcomes_dc['knowledge_diff'] > 0:
        knowledge_gain_dc += 1
    elif knowledge_outcomes_dc['knowledge_diff'] == 0:
        knowledge_same_dc += 1
    else:
        knowledge_loss_dc += 1

#### Counts of app action outcomes for the Diagnosed Condition category.

In [None]:
app_action_outcomes_dc = diff_by_dia_con['app_action_diff']
action_gain_dc = 0
action_same_dc = 0
action_loss_dc = 0
for app_action_outcomes_dc['app_action_diff'] in app_action_outcomes_dc:
    if app_action_outcomes_dc['app_action_diff'] > 0:
        action_gain_dc += 1
    elif app_action_outcomes_dc['app_action_diff'] == 0:
        action_same_dc += 1
    else:
        action_loss_dc += 1

#### Data frame with the outcome counts for Diagnosed Condition.

In [None]:
outcome_data_dc = {'outcome' : ['gain', 'stayed the same', 'loss'], 'social' : [social_gain_dc, social_same_dc, social_loss_dc], 'knowledge' : [knowledge_gain_dc, knowledge_same_dc, knowledge_loss_dc], 'app_action' : [action_gain_dc, action_same_dc, action_loss_dc]}
dia_con_counts = pd.DataFrame(outcome_data_dc)
display(dia_con_counts)

#### Data frame with the outcome probabilities in the Diagnosed Condition category.

In [None]:
outcome_probability_data_dc = {'outcome' : ['gain', 'stay the same', 'loss'], 'social_probability' : [social_gain_dc/summ(social_gain_dc,social_loss_dc,social_same_dc), social_same_dc/summ(social_gain_dc,social_loss_dc,social_same_dc), social_loss_dc/summ(social_gain_dc,social_loss_dc,social_same_dc)], 'knowledge_probability' : [knowledge_gain_dc/summ(knowledge_gain_dc,knowledge_loss_dc,knowledge_same_dc), knowledge_same_dc/summ(knowledge_gain_dc,knowledge_loss_dc,knowledge_same_dc), knowledge_loss_dc/summ(knowledge_gain_dc,knowledge_loss_dc,knowledge_same_dc)], 'app_action_probability' : [action_gain_dc/summ(action_gain_dc,action_loss_dc,action_same_dc), action_same_dc/summ(action_gain_dc,action_loss_dc,action_same_dc), action_loss_dc/summ(action_gain_dc,action_loss_dc,action_same_dc)]}
dia_con_probability = pd.DataFrame(outcome_probability_data_dc)
display(dia_con_probability)

### Both, Developmental Evaluation & Diagnosed Condition:

In [None]:
diff_by_both = diff_by_elig[(diff_by_elig.init_elig_category == 'Diagnosed Condition, Developmental Evaluation')]
display(diff_by_both)

#### Counts of social outcomes for the "both" category.

In [None]:
social_outcomes_b = diff_by_both['social_diff']
social_gain_b = 0
social_same_b = 0
social_loss_b = 0
for social_outcomes_b['social_diff'] in social_outcomes_b:
    if social_outcomes_b['social_diff'] > 0:
        social_gain_b += 1
    elif social_outcomes_b['social_diff'] == 0:
        social_same_b += 1
    else:
        social_loss_b += 1

#### Counts of knowledge outcomes for the "both" category.

In [None]:
knowledge_outcomes_b = diff_by_both['knowledge_diff']
knowledge_gain_b = 0
knowledge_same_b = 0
knowledge_loss_b = 0
for knowledge_outcomes_b['knowledge_diff'] in knowledge_outcomes_b:
    if knowledge_outcomes_b['knowledge_diff'] > 0:
        knowledge_gain_b += 1
    elif knowledge_outcomes_b['knowledge_diff'] == 0:
        knowledge_same_b += 1
    else:
        knowledge_loss_b += 1

#### Counts of app action outcomes for the "both" category.

In [None]:
app_action_outcomes_b = diff_by_both['app_action_diff']
action_gain_b = 0
action_same_b = 0
action_loss_b = 0
for app_action_outcomes_b['app_action_diff'] in app_action_outcomes_b:
    if app_action_outcomes_b['app_action_diff'] > 0:
        action_gain_b += 1
    elif app_action_outcomes_b['app_action_diff'] == 0:
        action_same_b += 1
    else:
        action_loss_b += 1

#### Data frame with outcome counts for the "both" category.

In [None]:
outcome_data_b = {'outcome' : ['gain', 'stayed the same', 'loss'], 'social' : [social_gain_b, social_same_b, social_loss_b], 'knowledge' : [knowledge_gain_b, knowledge_same_b, knowledge_loss_b], 'app_action' : [action_gain_b, action_same_b, action_loss_b]}
both_counts = pd.DataFrame(outcome_data_b)
display(both_counts)

#### Data frame with the outcome probabilities for the "both" category.

In [None]:
outcome_probability_data_b = {'outcome' : ['gain', 'stay the same', 'loss'], 'social_probability' : [social_gain_b/summ(social_gain_b,social_loss_b,social_same_b), social_same_b/summ(social_gain_b,social_loss_b,social_same_b), social_loss_b/summ(social_gain_b,social_loss_b,social_same_b)], 'knowledge_probability' : [knowledge_gain_b/summ(knowledge_gain_b,knowledge_loss_b,knowledge_same_b), knowledge_same_b/summ(knowledge_gain_b,knowledge_loss_b,knowledge_same_b), knowledge_loss_b/summ(knowledge_gain_b,knowledge_loss_b,knowledge_same_b)], 'app_action_probability' : [action_gain_b/summ(action_gain_b,action_loss_b,action_same_b), action_same_b/summ(action_gain_b,action_loss_b,action_same_b), action_loss_b/summ(action_gain_b,action_loss_b,action_same_b)]}
both_probability = pd.DataFrame(outcome_probability_data_b)
display(both_probability)

## Graphs for the collected data.

In [None]:
labels = ['gain', 'stay the same', 'loss']
colors = {'gain':'C2', 'stay the same':'C1', 'loss':'C3'}

#### Pie chart for Developmental Evaluation probabilities.

In [None]:
dev_eval_probability.set_index('outcome', inplace=True)
display(dev_eval_probability)

In [None]:
dev_eval_probability.plot(kind='pie', legend=False, autopct='%.0f%%', subplots=True, figsize=(14,8), colors=[colors[key] for key in labels])

#### Pie chart for Diagnosed Condition probabilities.

In [None]:
dia_con_probability.set_index('outcome', inplace=True)
display(dia_con_probability)

In [None]:
dia_con_probability.plot(kind='pie', legend=False, autopct='%.0f%%', subplots=True, figsize=(14,8), colors=[colors[key] for key in labels])

#### Pie chart for the probabilities of the "both" category.

In [None]:
both_probability.set_index('outcome', inplace=True)
display(both_probability)

In [None]:
both_probability.plot(kind='pie', legend=False, autopct='%.0f%%', subplots=True, figsize=(14,8), colors=[colors[key] for key in labels])