In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

%matplotlib inline
pd.set_option('display.max_columns', None)

In [None]:
colors = ["#C37681", "#5BAC82", "#838385", "#89C1DF", "#EAE086", "#F1B87E"]
# Set your custom color palette
sns.set_palette(sns.color_palette(colors))

In [None]:
adaptive = sns.color_palette('light:#C37681')
cognitive = sns.color_palette('light:#EAE086')
communication = sns.color_palette('light:#838385')
motor = sns.color_palette('light:#89C1DF')
socialemo = sns.color_palette('light:#5BAC82')


In [None]:
teis = pd.read_csv('data/teis.csv')

In [None]:
teis.head(1)

In [None]:
teis_pr_domains = teis[['Adaptive Percentile Rank', 'Social-Emotional Percentile Rank', 'Communication Percentile Rank',  'Motor Percentile Rank', 'Cognitive Percentile Rank', 'BDI-3 Total Percentile Rank']]  
teis_pr_subdomains = teis[['Adaptive-Self Care PR', 'Adaptive-Personal Responsibility PR', 'Social Emotional-Adult Interaction PR', 'Social Emotional-Peer Interaction PR', 'Social Emotional-Self Concept / Social Role PR', 'Communication-Receptive Communication PR', 'Communication-Expressive Communication PR', 'Motor-Gross Motor PR', 'Motor-Fine Motor PR', 'Motor-Perceptual Motor PR', 'Cognitive-Attention and Memory PR', 'Cognitive-Reasoning / Academic Skills PR', 'Cognitive-Perception and Concepts PR']]

In [None]:
sns.boxplot(data=teis_pr_domains)
plt.axhline(y=50, color='r', linestyle='--', label='50th Percentile')
plt.title('Box Plot for Percentile Ranks')
plt.ylabel('Percentile Rank')
plt.xticks(rotation=45, ha='right')  #x-axis labels at a 45-degree angle
plt.legend()
plt.savefig('data/q1_domain_chart.png')

In [None]:
sns.boxplot(data=teis_pr_subdomains)
plt.axhline(y=50, color='r', linestyle='--', label='50th Percentile') # horizontal line at the 50th percentile mark
plt.title('Box Plot for Percentile Ranks')
plt.ylabel('Percentile Rank')
plt.xticks(rotation=45, ha='right')  #x-axis labels at a 45-degree angle
plt.legend()


In [None]:
colors = ["#C37681"] * 2 + ["#5BAC82"] * 3 + ["#838385"] * 2 + ["#89C1DF"] * 3 + ["#EAE086"] * 3

# Create a boxplot using seaborn with custom color assignments
sns.boxplot(data=teis_pr_subdomains, palette=colors)

# Add a horizontal line at the 50th percentile mark
plt.axhline(y=50, color='r', linestyle='--', label='50th Percentile')

# Set plot title and labels
plt.title('Box Plot for Percentile Ranks')
plt.ylabel('Percentile Rank')
plt.xticks(rotation=45, ha='right')  # x-axis labels at a 45-degree angle
plt.legend()
plt.savefig('data/q1_subdomain_chart.png')


teis_pr_domains = teis_pr_domains.loc[
    ~((teis_pr_domains['Adaptive Percentile Rank'] == 0) &
      (teis_pr_domains['Social-Emotional Percentile Rank'] == 0) &
      (teis_pr_domains['Communication Percentile Rank'] == 0) &
      (teis_pr_domains['Motor Percentile Rank'] == 0) &
      (teis_pr_domains['Cognitive Percentile Rank'] == 0))
]
teis_pr_domains

teis_domains  teis_subdomains  use both these dataframes. find min, max, mean for each perceintile rank. thenfidn out standard deviation for each column. from here look at standard devations accros the entire data frame  to detmrien patters in standard deviaton. 

In [None]:
teis_pr_domains1 = teis_pr_domains.copy()

In [None]:
teis_pr_domains1['min'] = teis_pr_domains1.min(axis=1)
teis_pr_domains1['max'] = teis_pr_domains1.max(axis=1)
teis_pr_domains1['std'] = teis_pr_domains1.std(axis=1).round(2)
teis_pr_domains1['mean_score'] = teis_pr_domains1.mean(axis=1).round(2)

In [None]:
teis_pr_domains1

In [None]:
teis_pr_domains1.corr().round(3)

In [None]:
teis_pr_domains1.describe().round(2)

In [None]:
teis_pr_domains2 = teis_pr_domains1.copy()

In [None]:
teis_pr_domains2['ADAP PR Mean Dif'] = (teis_pr_domains2['Adaptive Percentile Rank'] - teis_pr_domains2['Adaptive Percentile Rank'].mean()).round(2)
teis_pr_domains2['SOC PR Mean Dif'] = (teis_pr_domains2['Social-Emotional Percentile Rank'] - teis_pr_domains2['Social-Emotional Percentile Rank'].mean()).round(2)
teis_pr_domains2['COM PR Mean Dif'] = (teis_pr_domains2['Communication Percentile Rank'] - teis_pr_domains2['Communication Percentile Rank'].mean()).round(2)
teis_pr_domains2['MOT PR Mean Dif'] = (teis_pr_domains2['Motor Percentile Rank'] - teis_pr_domains2['Motor Percentile Rank'].mean()).round(2)
teis_pr_domains2['COG PR Mean Dif'] = (teis_pr_domains2['Cognitive Percentile Rank'] - teis_pr_domains2['Cognitive Percentile Rank'].mean()).round(2)


In [None]:
teis_pr_domains2 = teis_pr_domains2[['Adaptive Percentile Rank', 'ADAP PR Mean Dif', 'Social-Emotional Percentile Rank', 'SOC PR Mean Dif', 'Communication Percentile Rank', 
                                    'COM PR Mean Dif', 'Motor Percentile Rank', 'MOT PR Mean Dif', 'Cognitive Percentile Rank', 'COG PR Mean Dif', 'mean_score', 'min', 'max', 'std']]
teis_pr_domains2

In [None]:
teis_q2_data = teis_pr_domains2.describe().round(2)
teis_q2_data.to_csv('data/teis_q2_data.csv',index=False)
teis_q2_data

In [None]:
teis_pr_domains2.corr().round(3)

In [None]:
#numbers come from 75th percentile
teis_min_max = teis_pr_domains1[(teis_pr_domains1['min'] < 16) & (teis_pr_domains1['max'] > 81)]
teis_min_max.corr().round(3)

In [None]:
#numbers come from 25th and 75th percentile
teis_min_max2 = teis_pr_domains1[(teis_pr_domains1['min'] <= 0) & (teis_pr_domains1['max'] > 81)]
teis_min_max2.corr().round(3)

In [None]:
teis_average = teis_pr_domains1[(teis_pr_domains1['min'] < 16)]
teis_average.corr().round(3)

In [None]:
teis_max3 = teis_pr_domains1[(teis_pr_domains1['max'] > 81)]
teis_max3.corr().round(3)