In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

***Creating the Dataframes that will be used for the Notebook***

In [None]:
teis_df=pd.read_excel(r'C:\Users\Thund\NSS_Data_Analytics\Projects\TEIS\teis-concatahoula_leopards\data\BDI3 All Evals for NSS 11-2023.xlsx')

In [None]:
teis_df.head()

In [None]:
teis_df.describe()

In [None]:
teis_df=teis_df.drop_duplicates()
teis_df.reset_index()
teis_df= teis_df.rename(columns={'Location - Sub Level 1':'Location'})

In [None]:
#data type checking cell:
type(teis_df['Adaptive RDI'][2])

***Cleaning the main dataframe: teis_df***

In [None]:
teis_df=teis_df.drop(labels=['Code 1', 'Code 2', 'Code 3', 'Code 4', 'Code 5', 'Code 6', 'Code 7', 'Code 8', 'Code 9', 'Code 10'], axis='columns')

In [None]:
teis_df

In [None]:
teis_df.tail(3297)

#no_ids=teis_df.tail(3291)
#no_ids

In [None]:
counts=teis_df.groupby('Location')['Child ID'].count().reset_index()
counts

In [None]:
level_counts = teis_df.groupby('Program Label')['Child ID'].count()
level_counts

In [None]:
domain_df= teis_df[['Child ID', 'Gender', 'Adaptive Developmental Quotient', 'Social-Emotional Developmental Quotient', 'Communication Developmental Quotient', 'Motor Developmental Quotient', 'Cognitive Developmental Quotient']]
domain_df

In [None]:
n_bins=20
fig, axs = plt.subplots(1, 5, sharey=True,figsize=(15,3))

axs[0].hist(domain_df['Adaptive Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[0].set_title('Adaptive')
axs[1].hist(domain_df['Social-Emotional Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[1].set_title('Social-Emotional')
axs[2].hist(domain_df['Communication Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[2].set_title('Communication')
axs[3].hist(domain_df['Motor Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[3].set_title('Motor')
axs[4].hist(domain_df['Cognitive Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[4].set_title('Cognitive')



u_gender=domain_df.loc[domain_df['Gender']=='U']
u_gender

****Gathering Averages for each column****

In [None]:
adaptive_avg = domain_df.groupby('Gender')['Adaptive Developmental Quotient'].agg(np.mean).to_frame()
adaptive_avg


In [None]:
social_avg = domain_df.groupby('Gender')['Social-Emotional Developmental Quotient'].agg(np.mean).to_frame()
social_avg

In [None]:
#pulling two averaged columns into a shared data frame to build off of
average_df= adaptive_avg.merge(social_avg, how='outer', on='Gender')
average_df

****Averaging the rest of the columns****

In [None]:
comm_avg = domain_df.groupby('Gender')['Communication Developmental Quotient'].agg(np.mean).to_frame()
comm_avg

In [None]:
motor_avg = domain_df.groupby('Gender')['Motor Developmental Quotient'].agg(np.mean).to_frame()
motor_avg

In [None]:
cog_avg = domain_df.groupby('Gender')['Cognitive Developmental Quotient'].agg(np.mean).to_frame()
cog_avg

average_df= adaptive_avg.merge(social_avg, how='outer', on='Gender')
average_df

average_df= average_df.merge(comm_avg, how='outer', on='Gender')
average_df

****Merging the averaged columns into 1 dataframe****

In [None]:
average_df= average_df.merge(comm_avg, how='outer', on='Gender')
average_df= average_df.merge(motor_avg, how='outer', on='Gender')
average_df= average_df.merge(cog_avg, how='outer', on='Gender')
average_df.reset_index()

In [None]:
sns.barplot(data=average_df)
plt.title('Average DQ of Domains by Gender')
plt.ylabel('Average DQ')
plt.xlabel('Domain')
plt.xticks(rotation = 50)
#plt.savefig('tripsbyday.png')

***Adaptive Sub Domain df***

In [None]:
adaptive_sub_df=teis_df[['Child ID','Gender', 'Adaptive-Self Care SS','Adaptive-Personal Responsibility SS']]
#adaptive_sub_df.fillna("N/A",inplace=True)
adaptive_sub_df

****Gathering the averages for the sub-domain****

In [None]:
adapt_selfcare_avg = adaptive_sub_df.groupby('Gender')['Adaptive-Self Care SS'].agg(np.mean).to_frame()
adapt_selfcare_avg

In [None]:
adapt_pr_avg = adaptive_sub_df.groupby('Gender')['Adaptive-Personal Responsibility SS'].agg(np.mean).to_frame()
adapt_pr_avg

***Adding them together***

In [None]:
adapt_sub_avg= adapt_selfcare_avg.merge(adapt_pr_avg, how='outer', on='Gender')
adapt_sub_avg

***Social Emotional***

In [None]:
se_sub_df=teis_df[['Child ID','Gender', 'Social Emotional-Adult Interaction SS','Social Emotional-Peer Interaction SS','Social Emotional-Self Concept / Social Role SS']]
#se_sub_df.fillna("N/A",inplace=True)
se_sub_df

se_adult_avg = se_sub_df.groupby('Gender')['Social Emotional-Adult Interaction SS'].agg(np.mean).to_frame()
se_adult_avg

se_peer_avg = se_sub_df.groupby('Gender')['Social Emotional-Peer Interaction SS'].agg(np.mean).to_frame()
se_peer_avg

se_self_social_avg = se_sub_df.groupby('Gender')['Social Emotional-Self Concept / Social Role SS'].agg(np.mean).to_frame()
se_self_social_avg

**Adding Them Together - Social-Emotional**

In [None]:
adapt_sub_avg= adapt_selfcare_avg.merge(adapt_pr_avg, how='outer', on='Gender')
adapt_sub_avg

se_sub_avg= se_adult_avg.merge(se_peer_avg, how = 'outer', on='Gender')
se_sub_avg

se_sub_avg2= se_sub_avg.merge(se_self_social_avg, how = 'outer', on='Gender')
se_sub_avg2

***Adding the Examiner Column to investigate trends later on***

In [None]:
domain_df['Examiner']=teis_df['Adaptive-Self Care Examiner']
domain_df

In [None]:
n_bins=20
fig, axs = plt.subplots(1, 5, sharey=True,figsize=(15,3))

axs[0].hist(domain_df['Adaptive Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[0].set_title('Adaptive')
axs[1].hist(domain_df['Social-Emotional Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[1].set_title('Social-Emotional')
axs[2].hist(domain_df['Communication Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[2].set_title('Communication')
axs[3].hist(domain_df['Motor Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[3].set_title('Motor')
axs[4].hist(domain_df['Cognitive Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[4].set_title('Cognitive')

***Counting the number of Evaluations by Examiner***

In [None]:
examiner_df=teis_df.groupby('Adaptive-Self Care Examiner')['Child ID'].count().sort_values(ascending=False).reset_index(name='Number of Evals')

examiner_df['Examiner']=examiner_df['Adaptive-Self Care Examiner']
examiner_df

#examiner_df_max=examiner_df[examiner_df['Number of Evals'] == examiner_df['Number of Evals'].max()]
#print(examiner_df_max)
#examiner_df_min=examiner_df[examiner_df['Number of Evals'] == examiner_df['Number of Evals'].min()]
#print(examiner_df_min)

***Getting Average Score for each examiner in each domain and merging it into one dataframe***

In [None]:
examiner_adapt_avg= domain_df.groupby('Examiner')['Adaptive Developmental Quotient'].agg(np.mean).to_frame()
examiner_social_avg=domain_df.groupby('Examiner')['Social-Emotional Developmental Quotient'].agg(np.mean).to_frame()
examiner_comm_avg=domain_df.groupby('Examiner')['Communication Developmental Quotient'].agg(np.mean).to_frame()
examiner_motor_avg=domain_df.groupby('Examiner')['Motor Developmental Quotient'].agg(np.mean).to_frame()
examiner_cog_avg=domain_df.groupby('Examiner')['Cognitive Developmental Quotient'].agg(np.mean).to_frame()

In [None]:
examiner_avg_df=examiner_adapt_avg.merge(examiner_social_avg, how='outer', on='Examiner')
examiner_avg_df=examiner_avg_df.merge(examiner_comm_avg, how='outer', on='Examiner')
examiner_avg_df=examiner_avg_df.merge(examiner_motor_avg, how='outer', on='Examiner')
examiner_avg_df=examiner_avg_df.merge(examiner_cog_avg, how='outer', on='Examiner')


examiner_avg_df

***Adding the counts to the averages for one grand dataframe***

In [None]:
examiner_grand_df=examiner_avg_df.merge(examiner_df, how='inner', on='Examiner').drop(labels='Adaptive-Self Care Examiner', axis='columns')
examiner_grand_df.sort_values('Number of Evals',ascending=False)

In [None]:
plt.hist(x=examiner_grand_df['Number of Evals'], bins=20)

In [None]:
n_bins=20
fig, axs = plt.subplots(1, 5, sharex=True, sharey=True,figsize=(15,3))

axs[0].hist(examiner_grand_df['Adaptive Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[0].set_title('Adaptive')
axs[1].hist(examiner_grand_df['Social-Emotional Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[1].set_title('Social-Emotional')
axs[2].hist(examiner_grand_df['Communication Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[2].set_title('Communication')
axs[3].hist(examiner_grand_df['Motor Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[3].set_title('Motor')
axs[4].hist(examiner_grand_df['Cognitive Developmental Quotient'], bins=n_bins, histtype='stepfilled')
axs[4].set_title('Cognitive')



In [None]:
sns.scatterplot(examiner_grand_df, x='Adaptive Developmental Quotient', y='Number of Evals')

In [None]:
sns.scatterplot(examiner_grand_df, x='Social-Emotional Developmental Quotient', y='Number of Evals')

In [None]:
sns.scatterplot(examiner_grand_df, x='Communication Developmental Quotient', y='Number of Evals')

In [None]:
sns.scatterplot(examiner_grand_df, x='Motor Developmental Quotient', y='Number of Evals')

In [None]:
sns.scatterplot(examiner_grand_df, x='Cognitive Developmental Quotient', y='Number of Evals')

In [None]:
fig, axs = plt.subplots(1, 5, sharex=True, sharey=True,figsize=(15,3))

axs[0].scatter(x= examiner_grand_df['Adaptive Developmental Quotient'], y=examiner_grand_df['Number of Evals'])
axs[0].set_title('Adaptive')
axs[1].scatter(examiner_grand_df['Social-Emotional Developmental Quotient'], y=examiner_grand_df['Number of Evals'])
axs[1].set_title('Social-Emotional')
axs[2].scatter(examiner_grand_df['Communication Developmental Quotient'], y=examiner_grand_df['Number of Evals'])
axs[2].set_title('Communication')
axs[3].scatter(examiner_grand_df['Motor Developmental Quotient'], y=examiner_grand_df['Number of Evals'])
axs[3].set_title('Motor')
axs[4].scatter(examiner_grand_df['Cognitive Developmental Quotient'], y=examiner_grand_df['Number of Evals'])
axs[4].set_title('Cognitive')

***Some preliminary eda for created dataframes***

In [None]:
examiner_avg_df.describe()

In [None]:
avg_per_domain = domain_df.describe().reset_index()
avg_per_domain

In [None]:
low_cog=examiner_avg_df['Cognitive Developmental Quotient'].sort_values(ascending=True).head(105)
low_cog

In [None]:
examiner_adapt_count= domain_df.groupby('Examiner')['Adaptive Developmental Quotient'].count().to_frame()
examiner_adapt_count

In [None]:
adaptive_examiner_df=teis_df[['Child ID','Gender','Adaptive-Self Care Examiner','Adaptive-Personal Responsibility Examiner','Adaptive-Self Care SS','Adaptive-Personal Responsibility SS']]
adaptive_examiner_df

In [None]:
selfcare_examiner_scores=adaptive_examiner_df.groupby('Adaptive-Self Care Examiner')['Adaptive-Self Care SS'].agg(np.mean).to_frame().reset_index()
selfcare_examiner_scores

In [None]:
pr_examiner_scores=adaptive_examiner_df.groupby('Adaptive-Personal Responsibility Examiner')['Adaptive-Personal Responsibility SS'].agg(np.mean).to_frame().reset_index()
pr_examiner_scores