# **ExtraLong 2021:** Quality Control
Project:    ExtraLong <br>
Maintainer: Katja Zoner <br>
Updated:    11/02/2021 <br>

## **Inclusion Criteria:**  Euler's Number > ~~-367~~ -212 
This was revised to -212, which is the -2SD cutoff after removing the individual with -6000 euler scan.

## **Pre-QC:** <br> - 814 subjects <br> - 2453 sessions

In [78]:
import pandas as pd
import numpy as np

CUTOFF=-212

In [70]:
# Read in csv of FreeQC quality ratings for all sessions in 2021 datafreeze
fname = "./csv/quality_2021-11-01.csv"
df = pd.read_csv(fname)
df

Unnamed: 0,bblid,seslabel,cnr_graycsf_lh,cnr_graycsf_rh,cnr_graywhite_lh,cnr_graywhite_rh,holes_lh,holes_rh,holes_total,euler_lh,euler_rh,euler_total
0,11399,3468,0.509,0.485,1.181,1.013,23,22,45,-44,-42,-86
1,11399,3592,0.454,0.409,0.948,0.723,43,46,89,-84,-90,-174
2,11801,5145,0.522,0.481,0.965,0.834,19,12,31,-36,-22,-58
3,11801,5200,0.449,0.426,0.911,0.712,22,13,35,-42,-24,-66
4,11801,8591,0.487,0.470,0.929,0.813,18,12,30,-34,-22,-56
...,...,...,...,...,...,...,...,...,...,...,...,...
2448,139272,10109,0.508,0.464,1.116,0.987,13,13,26,-24,-24,-48
2449,139490,8461,0.453,0.454,0.914,0.975,179,188,367,-356,-374,-730
2450,139490,10564,0.594,0.610,1.085,1.090,61,42,103,-120,-82,-202
2451,139553,8410,0.473,0.474,1.187,1.114,36,23,59,-70,-44,-114


In [71]:
# Remove unnecessary columns and rename remaining columns
del df['cnr_graycsf_lh']
del df['cnr_graycsf_rh']
del df['cnr_graywhite_lh']
del df['cnr_graywhite_rh']
del df['holes_lh']
del df['holes_rh']
del df['holes_total']
del df['euler_lh']
del df['euler_rh']
df.columns = ['subid', 'sesid', 'euler']

In [79]:
# Get excluded sessions from 2021 datafreeze based on -2SD cutoff
poor_quality = df[df['euler'] <= CUTOFF].sort_values('euler')
poor_quality

Unnamed: 0,subid,sesid,euler,exclude
297,20871,11787,-6140,True
1919,117595,5482,-792,True
1253,98585,4905,-788,True
2449,139490,8461,-730,True
1330,100278,9491,-726,True
...,...,...,...,...
650,87737,3961,-214,True
1375,102954,9540,-214,True
387,82232,2706,-214,True
1130,95460,9380,-212,True


In [80]:
# Add column to indicate session's inclusion/exclusion status
df['exclude'] = False
df.loc[df['sesid'].isin(poor_quality.sesid.unique()),'exclude'] = True
df

Unnamed: 0,subid,sesid,euler,exclude
0,11399,3468,-86,False
1,11399,3592,-174,False
2,11801,5145,-58,False
3,11801,5200,-66,False
4,11801,8591,-56,False
...,...,...,...,...
2448,139272,10109,-48,False
2449,139490,8461,-730,True
2450,139490,10564,-202,False
2451,139553,8410,-114,False


# Also exclude subjects with less than 2 quality sessions!

In [81]:
# For each subject, get count of quality sessions
quality_only = df[df.exclude == False]
count_by_subid = quality_only.groupby(["subid"]).size().reset_index()
count_by_subid.columns = ["subid", "ntimepoints"]
count_by_subid.sort_values("ntimepoints")

# Get list of subjects with less than 2 quality sessions. (n=8)
exclude_subids = count_by_subid.subid[count_by_subid["ntimepoints"]<2]

# For subjects with less than 2 quality sessions, indicate to exclude.
df.loc[df.subid.isin(exclude_subids), 'exclude'] = True

In [82]:
# Get final count of included/excluded sessions
print(f"Number of excluded sessions: {len(df[df.exclude == True])}")
print(f"Final session count: {len(df[df.exclude == False])}")
print(f"Final subject count: {len(df[df.exclude == False].subid.unique())}")


Number of excluded sessions: 103
Final session count: 2350
Final subject count: 778


In [83]:
# Convert subid to 6 digit string
df.subid = df.subid.astype(str)
df.subid = df.subid.str.zfill(6)

# Convert sesid to 5 digit string
df.sesid = df.sesid.astype(str)
df.sesid = df.sesid.str.zfill(5)

In [84]:
# Export dataframe as csv
fname="./csv/inclusion_exclusion_datafreeze_2021_revised_11-19.csv"
df.to_csv(fname,index=False)

In [85]:
# Look at subjects with many sessions
df

Unnamed: 0,subid,sesid,euler,exclude
0,011399,03468,-86,False
1,011399,03592,-174,False
2,011801,05145,-58,False
3,011801,05200,-66,False
4,011801,08591,-56,False
...,...,...,...,...
2448,139272,10109,-48,False
2449,139490,08461,-730,True
2450,139490,10564,-202,True
2451,139553,08410,-114,False
