In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

analysis = {"mse":[],
            "total_lesions":[],
            "max_lesion_volume":[],
            "min_lesion_volume":[],
            "total_volume_of_lesions":[],
            "average_volume":[],
            "median_volume":[],
            "inf_total_lesions":[],
            "inf_max_lesion_volume":[],
            "inf_min_lesion_volume":[],
            "inf_total_volume_of_lesions":[],
            "inf_average_volume":[],
            "inf_median_volume":[],
            "jux_total_lesions":[],
            "jux_max_lesion_volume":[],
            "jux_min_lesion_volume":[],
            "jux_total_volume_of_lesions":[],
            "jux_average_volume":[],
            "jux_median_volume":[],
            "per_total_lesions":[],
            "per_max_lesion_volume":[],
            "per_min_lesion_volume":[],
            "per_total_volume_of_lesions":[],
            "per_average_volume":[],
            "per_median_volume":[],
            "sub_total_lesions":[],
            "sub_max_lesion_volume":[],
            "sub_min_lesion_volume":[],
            "sub_total_volume_of_lesions":[],
            "sub_average_volume":[],
            "sub_median_volume":[],
           }
les_type = ['inf','jux','per','sub']

#import table and leave only important values
df = pd.DataFrame.from_csv("/data/henry1/mahamber/lesion_info_baseline.csv",index_col=False)
df.drop(df.columns[[0,2,3,4,5,6]],axis=1,inplace=True)
#organize table by mseID and lesion type
df1 = pd.pivot_table(df, values=["volume","distance from midbrain","distance from ventricles","distance from gray matter"], index = ["mseID","type","lesion"])
#function that removes zeros and gives values as array
def remove_zeros(df):
    df1 = df.unstack(level=1,fill_value=0)
    df1 = df1.stack(dropna=False)
    return df1.values
#call specific index
df2 = df1.groupby(level=[0,1])
df3 = df1.groupby(level=[0])
#gather important statistics and place into callable arrays
count_les = remove_zeros(df2.count())
sum_les = remove_zeros(df2.sum())
max_les = remove_zeros(df2.max())
min_les = remove_zeros(df2.min())
ave_les = remove_zeros(df2.mean())
med_les = remove_zeros(df2.median())

count_tot = df3.count().values
sum_tot = df3.sum().values
max_tot = df3.max().values
min_tot = df3.min().values
ave_tot = df3.mean().values
med_tot = df3.median().values
#loop all important information into new table
subjects = df1.index.get_level_values(0).unique()
for x in range(len(subjects)):
    analysis['mse'].append(subjects[x])
    analysis["total_lesions"].append(count_tot[x][3])
    analysis["max_lesion_volume"].append(max_tot[x][3])
    analysis["min_lesion_volume"].append(min_tot[x][3])
    analysis["total_volume_of_lesions"].append(sum_tot[x][3])
    analysis["average_volume"].append(ave_tot[x][3])
    analysis["median_volume"].append(med_tot[x][3])
    for y in range(len(les_type)):
        count = (x * 4) + y
        analysis["%s_total_lesions" % les_type[y]].append(count_les[count][3])
        analysis["%s_max_lesion_volume" % les_type[y]].append(max_les[count][3])
        analysis["%s_min_lesion_volume" % les_type[y]].append(min_les[count][3])
        analysis["%s_total_volume_of_lesions" % les_type[y]].append(sum_les[count][3])
        analysis["%s_average_volume" % les_type[y]].append(ave_les[count][3])
        analysis["%s_median_volume" % les_type[y]].append(med_les[count][3])

lesion_info = pd.DataFrame(analysis,columns= [  "mse",
                                                "total_lesions",
                                                "max_lesion_volume",
                                                "min_lesion_volume",
                                                "total_volume_of_lesions",
                                                "average_volume",
                                                "median_volume",
                                                "inf_total_lesions",
                                                "inf_max_lesion_volume",
                                                "inf_min_lesion_volume",
                                                "inf_total_volume_of_lesions",
                                                "inf_average_volume",
                                                "inf_median_volume",
                                                "jux_total_lesions",
                                                "jux_max_lesion_volume",
                                                "jux_min_lesion_volume",
                                                "jux_total_volume_of_lesions",
                                                "jux_average_volume",
                                                "jux_median_volume",
                                                "per_total_lesions",
                                                "per_max_lesion_volume",
                                                "per_min_lesion_volume",
                                                "per_total_volume_of_lesions",
                                                "per_average_volume",
                                                "per_median_volume",
                                                "sub_total_lesions",
                                                "sub_max_lesion_volume",
                                                "sub_min_lesion_volume",
                                                "sub_total_volume_of_lesions",
                                                "sub_average_volume",
                                                "sub_median_volume"])

In [7]:
#obtain EDSS scores
clinical_data = pd.DataFrame.from_csv('/data/henry1/keshavan/lesion_seg/notebooks/demographics.csv')
edss = clinical_data['metric'] == 'EDSS'
dc = clinical_data['metric'] == 'DiseaseCourse'
dd = clinical_data['metric'] == 'DiseaseDuration'
msfc251 = clinical_data['metric'] == 'MSFC 25FTW Trial1 Seconds'
msfc252 = clinical_data['metric'] == 'MSFC 25FTW Trial2 Seconds'
ms = clinical_data['msid'] == 'ms0056'
edss_scores = clinical_data[edss][['mse','msid','tp','value']].rename(columns={'value':'edss'}).sort_values('mse',axis=0).reset_index().drop('index',axis=1)
dc_scores = clinical_data[dc][['mse','msid','tp','value']].rename(columns={'value':'dc'}).sort_values('mse',axis=0).reset_index().drop('index',axis=1)
dd_scores = clinical_data[dd][['mse','msid','tp','value']].rename(columns={'value':'dd'}).sort_values('mse',axis=0).reset_index().drop('index',axis=1)
msfc251_scores = clinical_data[msfc251][['mse','msid','tp','value']].rename(columns={'value':'msfc 25 1'}).sort_values('mse',axis=0).reset_index().drop('index',axis=1)
msfc252_scores = clinical_data[msfc252][['mse','msid','tp','value']].rename(columns={'value':'msfc 25 2'}).sort_values('mse',axis=0).reset_index().drop('index',axis=1)
scores_to_include = [edss_scores,dc_scores,dd_scores,msfc251_scores,msfc252_scores]
scores = reduce(lambda left,right: pd.merge(left,right,on=['mse','msid','tp']),scores_to_include)
clinical_data['metric'].unique()
scores.columns

Index([u'mse', u'msid', u'tp', u'edss', u'dc', u'dd', u'msfc 25 1',
       u'msfc 25 2'],
      dtype='object')

In [9]:
#merge scores and lesion data and output to CSV
scores_lesion = pd.merge(lesion_info,scores)
scores_lesion.fillna(0.0,inplace=True)
scores_lesion.to_csv('/data/henry1/mahamber/clinical_info.csv')

Unnamed: 0,mse,total_lesions,max_lesion_volume,min_lesion_volume,total_volume_of_lesions,average_volume,median_volume,inf_total_lesions,inf_max_lesion_volume,inf_min_lesion_volume,...,sub_total_volume_of_lesions,sub_average_volume,sub_median_volume,msid,tp,edss,dc,dd,msfc 25 1,msfc 25 2
0,mse1,15,429.0,15.0,1661.0,110.733333,72.0,1,53.0,53.0,...,459.0,114.75,66.5,ms0170,1,3.5,RR,35.0,6.3,6.1
1,mse1003,16,4427.0,44.0,9706.0,606.625,108.5,0,0.0,0.0,...,5092.0,727.428571,108.0,ms0817,1,2.0,RR,9.0,4.6,4.7
2,mse1009,6,81.0,40.0,346.0,57.666667,55.5,0,0.0,0.0,...,241.0,60.25,58.5,ms0818,1,2.0,RR,15.0,3.8,3.6
3,mse1011,5,2154.0,23.0,2425.0,485.0,96.0,0,0.0,0.0,...,2329.0,582.25,76.0,ms0820,1,1.0,CIS,3.0,4.3,4.2
4,mse1017,3,123.0,72.0,310.0,103.333333,115.0,0,0.0,0.0,...,72.0,72.0,72.0,ms0821,1,0.0,RR,0.0,4.0,4.2
