In [None]:
import pandas as pd
import numpy as np
import re
import datetime

In [None]:
data = pd.read_csv('../data/all_evals_clean.csv', dtype = {'Child ID':object})
data = data.set_index('Index')
data.head(20)

In [None]:
data_desc = data.describe()
data_desc

### We need to make the date columns datetime64 instead of objects

In [None]:
date_cols = []
pattern = r'Date of'

for col in data.columns:
    if re.search(pattern,col):
        date_cols.append(col)
#end

In [None]:
len(date_cols)

In [None]:
data[date_cols] = data[date_cols].apply(pd.to_datetime)
list(data.dtypes)

### Let's define a slice function to make sub-tables based on domain

In [None]:
def slice_data(data, domain):
    
    pattern = re.compile(domain)
    
    new_cols = []
    
    for col in data.columns:
        if (re.search(pattern,col)):
            new_cols.append(col)
    #end
    
    new_df = data[new_cols]
    
    return new_df

In [None]:
domains = ['Adaptive', 'Social', 'Communication', 'Motor', 'Cognitive', 'Total']

In [None]:
df_dict = {}

for domain in domains:
    df = slice_data(data,domain)
    df_dict[domain] = df

In [None]:
adapt = df_dict['Adaptive']
soc   = df_dict['Social']
comm  = df_dict['Communication']
motor = df_dict['Motor']
cogn  = df_dict['Cognitive']
total = df_dict['Total']

In [None]:
adapt.describe()

In [None]:
adapt

# 4. Does the child's age seem to impact their scoring?

We need to determine how to get the child's age at the time of testing. To do this, we need to subtract their birthday from the date of testing. However, this can be tricky because some testing was done on multiple days.

For now, let's just get a simple case to work (that is, not worry about the different test dates just yet).

### First, let's get everyone's ages

In [None]:
ages = []

for ind, values in data.iterrows():
    date_col = 2
    try:
        #print(ind, data.loc[ind,'Date of Birth'], data.loc[ind,'Adaptive-Self Care Date of Testing'])
        birthday = data.loc[ind,'Date of Birth']
        testing  = datetime.datetime.strptime(adapt.loc[ind,'Adaptive-Self Care Date of Testing'],'%m/%d/%Y')
        #print(birthday, testing)
        #print(type(birthday),type(testing))
        age = (testing - birthday).days
        ages.append(age)
        
    except:
        while date_col < 14:
            
            print('EXCEPTION:',ind, data.loc[ind,'Date of Birth'], data.loc[ind,date_cols[date_col]],
                  '\nTrying column',date_cols[date_col])
            
            if type(data.loc[ind,date_cols[date_col]]) == str:
                print(ind,data.loc[ind,'Date of Birth'], data.loc[ind,date_cols[date_col]])
                birthday = data.loc[ind,'Date of Birth']
                testing  = data.loc[ind,date_cols[date_col]]
                #print(birthday, testing)
                #print(type(birthday),type(testing))
                age = (testing - birthday).days
                ages.append(age)
                print('CLEAR')
                break

            else:
                date_col += 1
        
        ages.append(np.nan)
        print('CLEAR: NULL')

In [None]:
ages

In [None]:
len(ages)

In [None]:
ages = np.asarray(ages)
ages

In [None]:
ages_year = np.floor(ages/365)
ages_year

In [None]:
data['Age'] = ages_year
data

### Next, let's get some stats in there!

In [None]:
max(data.Age)

In [None]:
age_desc = data.groupby('Age').mean(numeric_only = True)
age_desc