# Class definition with 20% JSN and with KL-grade
Code to extract to calculate progressing KOA classes with 20% JSN and KL-grade


author = MV<br>
date = 2021-10-04<br>

_______________________________________

- /srv/Class_def_files/JSN_final.csv    
    - class def: 20% JSN
    - exclusion of only KL 4 at baseline
    
- /srv/Class_def_files/KL_def_final.csv
    - class def: from KL 0, 1 to 3, 4 in <= 4 years

# Imports

In [None]:
!nvidia-smi

In [None]:
# this defines the GPU you are using
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [None]:
# add paths for dnn2 and labelbox-connector
import sys
sys.path.insert(1, "/srv/dnn-framework2")
sys.path.insert(1, "/srv/labelbox-connector")

In [None]:
#general
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import logging
import cv2
import copy

#tensorflow
import tensorflow as tf


import shutil
import os

import pydicom

from lxml import etree
import xml.etree.ElementTree as ET
from framework.data_objects import BoundingBox, PointList2D, DicomImage
import ast
from framework.inferences import Inference

In [None]:
#configs
%matplotlib inline
logging.basicConfig(format='%(asc' 'time)s %(name)-25s %(level' 'name)-8s %(message)s')
logging.getLogger().setLevel(logging.INFO) # you change this to logging.DEBUG to get more logging information

# Definition with 20% JSN

## Calculate JSN/year and Classify into slow and fast progressors

In [None]:
# JSW_all.csv content:
#   All images a and images b with 1 and 2 year interval 

df6 = pd.read_csv('/srv/Class_def_files/JSW_all_2.csv')

for i, row in df6.iterrows(): 
    print(i)
    
    # calculate difference btw. JSW of image a and image b 
    diff_MED = float(df6.JSW_MIN_MED_a[i]) - float(df6.JSW_MIN_MED_b[i]) 
    diff_LAT = float(df6.JSW_MIN_LAT_a[i]) - float(df6.JSW_MIN_LAT_b[i])
    
    ## MED
    if 0 > diff_MED > -0.4: # tolerance of -0.4mm == difference is 0 
        diff_MED = 0 
    else:
        s=1
    
    if (diff_MED >= 0) & (float(df6.JSW_MIN_MED_a[i]) > 0): 
        reduction_MED = diff_MED/float(df6.JSW_MIN_MED_a[i]) # calculate percentage of baseline 

        if (reduction_MED < 0.2)| (diff_MED < 0.4) :
            df6.class_MED[i] = 0 # less than 20% --> slow progr. (class 0)
        else:
            df6.class_MED[i] = 1 # else --> fast progr. (class 1)
    else:
        df6.class_MED[i] = np.nan
        
    ## LAT
    if 0 > diff_LAT > -0.4:
        diff_LAT = 0 
    else:
        s=1
        
    if (diff_LAT >= 0) & (float(df6.JSW_MIN_LAT_a[i]) > 0):
        reduction_LAT = diff_LAT/float(df6.JSW_MIN_LAT_a[i])

        if (reduction_LAT < 0.2)| (diff_LAT < 0.4):
            df6.class_LAT[i] = 0
        else:
            df6.class_LAT[i] = 1
    else:
        df6.class_LAT[i] = np.nan

In [None]:
df6.to_csv('/srv/Class_def_files/JSW_all_2.csv')

## Exclusion criteria KL 4 (raw KL)

In [None]:
df = pd.read_csv('/srv/Class_def_files/JSW_all_2.csv')
df_x = df.dropna(subset=['class_MED','class_LAT'], how = 'all')
print(len(df_x))


# if KL stays 1
#df_z = df_x.drop(df_x[(df_x['KL_a'] ==1) & (df_x['KL_b'] ==1)].index)
#print(len(df_z))

# id KL_a is 4
df8 = df_x[df_x.KL_a != 4]

df8['KL_a'] = df8['KL_a'].replace([1.9,5,8,9],np.nan)
df8['KL_b'] = df8['KL_b'].replace([1.9,5,8,9],np.nan)

# add one general class combining class_MED and class_LAT
df8['class']=''
for i, row in df8.iterrows():
    if (df8['class_MED'][i]==1) | (df['class_LAT'][i]==1):
        df8['class'][i]=1
    else:
        df8['class'][i]=0
        
print('Length of Dataset with applied exclusion criteria:',len(df8))
print('Number of right knees:', len(df8[df8.Laterality == 'R']))
print('Number of left knees:', len(df8[df8.Laterality == 'L']))

print("Number of medial slow progressors:",len(df8.loc[df8.class_MED == 0]))
print("Number of medial fast progressors:",len(df8.loc[df8.class_MED == 1]))

print("Number of lateral slow progressors:",len(df8.loc[df8.class_LAT == 0]))
print("Number of lateral fast progressors:",len(df8.loc[df8.class_LAT == 1]))

print('Class 0:',len(df8.loc[df8['class'] == 0]))
print('Class 1:',len(df8.loc[df8['class'] == 1]))



In [None]:
# contains Image a and Image b with class_MED and class_LAT and general class 
# Exclusion criteria are applied
df8.to_csv('/srv/Class_def_files/JSW_excluded_2.csv')

In [None]:
# Add class to master csv
df_master = pd.read_csv('/srv/Master_dataset_JSN.csv')

df7_tmp = df8.rename(columns = {'ID_a':'ID'})
df_c = df_master.merge(df7_tmp[['ID','Laterality','coords_bbox_a','class_MED','class_LAT','class']], on = ['ID','Laterality'], how = 'right')


In [None]:
df_c.to_csv('/srv/Class_def_files/JSN_pred_clf_2.csv')

In [None]:
#CHANGE IMG PATH OF OAI

df = pd.read_csv('/srv/Class_def_files/JSN_pred_clf_2.csv')

for i, row in df.iterrows():
    print(i, end = '\r')
    if df.Study[i] =='OAI':
        IDnum = df['ID'][i]
        IDnum = str(IDnum)
        fn = "/mnt/fs/37_OAI/data/dicom/" + IDnum + ".dcm"

        df['img_path'][i] = fn
        

df.to_csv('/srv/Class_def_files/JSN_pred_clf_2.csv')

## Eliminate bad segmented images 


In [None]:
c = pd.read_csv('/srv/Class_def_files/JSN_pred_clf_2.csv')
a = pd.read_csv('/srv/Class_def_files/FailedKOALAxml.csv')

a = a.replace(['l','r'], ['L','R']).rename(columns={'Side': 'Laterality'})
m = a.merge(c, on = ['Laterality', 'ID'], how = 'inner')

indices = []
for i, row in m.iterrows():
    index = m['Unnamed: 0'][i]
    indices.append(index)
    

d = c.drop(indices, axis = 0).reset_index()
e = d.drop(columns=['index','Unnamed: 0'])

e.to_csv('/srv/Class_def_files/JSN_pred_clf_2.csv')

## Add column of contralateral knee OA

In [None]:
df = pd.read_csv('/srv/Class_def_files/Master_20JSN_XML_ex014.csv')
df['other_knee_AKOA']=''
print(len(df))
df.sort_values(by = 'ID')
mask = df.duplicated(subset=['ID'], keep = False)

# keep only bilateral images 
df_d = df[mask].reset_index()
df_d
print(len(df_d))

# add other knee fast progressor column
for idx, row in df_d.iterrows():

    print(idx, end = '\r')
    i = idx+1 # idx = first row, i = following row 

    if df_d['ID'][i] == df_d['ID'][idx]:
        if df_d['class'][idx]==1:
            df_d['other_knee_AKOA'][i]= 1
        else: 
            df_d['other_knee_AKOA'][i]= 0
            
        if df_d['class'][i]==1:
            df_d['other_knee_AKOA'][idx]= 1
        else: 
            df_d['other_knee_AKOA'][idx]= 0
    else:
        continue
        
df_d = df_d.rename(columns = {'img_path':'dicom_img_path'})


In [None]:
df_new = df.merge(df_d[['ID','Laterality','other_knee_AKOA']], on = ['ID', 'Laterality'], how = 'left')

In [None]:
df_new2 = df_new.drop(columns = {'Unnamed: 0','Unnamed: 0.1','other_knee_AKOA_x'})
df_new2 = df_new2.rename(columns = {'other_knee_AKOA_y':'other_knee_AKOA'})
df_new2

## Add image path to Caroline 
df_new2['img_path'] = ''
df_new2['img_path_pro'] = ''
for i, row in df_new2.iterrows():
    print(i, end='\r')
    l = df_new2.loc[i]['Laterality']
    fn = os.path.join('/mnt/caroline/MV/Cropped_knees', df_new2.loc[i]['ID']+'_'+ l + '.png')
    df_new2['img_path'][i]=fn
    
    fn2 = os.path.join('/mnt/caroline/MV/Cropped_knees/Clahe-pngs', df_new2.loc[i]['ID']+'_'+ l + '.png')
    df_new2['img_path_pro'][i]=fn2

In [None]:
df_new2.to_csv('/srv/Class_def_files/JSN_tmp.csv')

## Eliminate Implants

In [None]:
## Eliminate from dataframe (438 images)

imp_df = pd.read_csv('/srv/Implants.csv')
orig_df = pd.read_csv('/srv/Class_def_files/JSN_tmp.csv')

imp_df = imp_df.replace(['l','r'], ['L','R'])
m = imp_df.merge(orig_df, on = ['Laterality', 'ID'], how = 'inner')
m

In [None]:
indices = []
for i, row in m.iterrows():
    idx = m['Unnamed: 0'][i]
    indices.append(idx)
    
df_new3 = orig_df.drop(indices, axis=0)

In [None]:
df_new3.to_csv('/srv/Class_def_files/JSN_final.csv')

## Split into Training Test Tune set

In [None]:
df = pd.read_csv('/srv/Class_def_files/JSN_final.csv')
df.columns

In [None]:
np.random.seed(137115)
train_split = int(len(df) * 0.8)
test_split = len(df) - train_split

index_test = np.sort(np.random.choice(range(0, len(df)), size=test_split, replace=False))
index_train = np.delete(range(0, len(df)), index_test)

tune_split = int(len(index_train) * 0.15)
bla = np.sort(np.random.choice(range(0, len(index_train)), size=tune_split, replace=False))
bla2 = np.delete(range(0, len(index_train)), bla)
index_tune = index_train[bla]
index_train = index_train[bla2]

dfTrain = df.iloc[index_train]
dfTune = df.iloc[index_tune]
dfTest = df.iloc[index_test]

dfTrain.to_csv('/srv/Class_def_files/JSN_final_train.csv')
dfTest.to_csv('/srv/Class_def_files/JSN_final_test.csv')
dfTune.to_csv('/srv/Class_def_files/JSN_final_tune.csv')

# Definition with KL grade taken from KOaLA (test)

In [None]:
df= pd.read_csv('/srv/Class_def_files/JSW_all_2.csv')

In [None]:
## filter images with at least 4 year interval

# load master file
df_master = pd.read_csv('/srv/Master_dataset_JSN.csv')
df_tmp = df_master.copy()
df_tmp['tmp'] =''

# Change visit from V00 to 0 (number to calculate)
df_tmp[['VISIT']] = df_tmp[['VISIT']].replace(
    ['V00','V01','V02','V03','V04','V05','V06','V07','V08','V09','V10'],
    [0,1,2,3,4,5,6,7,8,9,10])
df_tmp = df_tmp.astype({'Patient ID': 'str'})

# split into left and right dataframe dfL & dfR
dfL = df_tmp.loc[df_tmp['Laterality']=='L'].reset_index()
dfR = df_tmp.loc[df_tmp['Laterality']=='R'].reset_index()


# Dataframe with 'a' as year 0 and 'b' as year 1 or 2
df_img_L = pd.DataFrame(columns={'Study','Patient ID','Image_a','Image_b','ID_a','ID_b','VISIT_a','VISIT_b', 'KL_a','KL_b'})

# left knees
for idx, row in dfL.iterrows():
    print(idx, end='\r')
    if idx < 40027:
        i = idx+1 # to have to following rows i and idx 
        if dfL['Patient ID'][i] == dfL['Patient ID'][idx]: # make sure its same Patient 
            if (dfL['VISIT'][i] - dfL['VISIT'][idx])<= 4: # Visit with 4 year interval 
                
                row = {
                    'ID_a': dfL['ID'][idx],
                    'ID_b': dfL['ID'][i],
                    'Image_a': dfL['img_path'][idx],
                    'Image_b': dfL['img_path'][i],
                    'VISIT_a': dfL['VISIT'][idx],
                    'VISIT_b': dfL['VISIT'][i],
                    'KL_a' : dfL['KL'][idx],
                    'KL_b' : dfL['KL'][i],
                    'Patient ID': dfL['Patient ID'][idx],
                    'Study' : dfL['Study'][idx]
                }
                df_img_L = df_img_L.append(row, ignore_index = True)

            else:
                continue
        else: 
            continue
    else:
        break
        
        
df_img_R = pd.DataFrame(columns={'Study','Patient ID','Image_a','Image_b','ID_a','ID_b','VISIT_a','VISIT_b', 'KL_a','KL_b'})

# right side 
for idx, row in dfR.iterrows():
    print(idx, end='\r')
    if idx < 40021:
        i = idx+1
        if dfR['Patient ID'][i] == dfR['Patient ID'][idx]:
            if (dfR['VISIT'][i] - dfR['VISIT'][idx])<= 4:
                
                row = {
                    'ID_a': dfR['ID'][idx],
                    'ID_b': dfR['ID'][i],
                    'Image_a': dfR['img_path'][idx],
                    'Image_b': dfR['img_path'][i],
                    'VISIT_a': dfR['VISIT'][idx],
                    'VISIT_b': dfR['VISIT'][i],
                    'KL_a' : dfR['KL'][idx],
                    'KL_b' : dfR['KL'][i],
                    'Patient ID': dfR['Patient ID'][idx],
                    'Study' : dfR['Study'][idx]
                    
                }
                df_img_R = df_img_R.append(row, ignore_index = True)

            else:
                continue
        else: 
            continue
    else:
        break
        
df_img_L['Laterality']='L'
df_img_R['Laterality']='R'

In [None]:
df_m = pd.concat([df_img_L,df_img_R], ignore_index = True)

In [None]:
# Add columns of ostephytosis and sclerosis
df_bla = pd.read_csv('/srv/XMLExtrKLOsteoScleroAll.csv')
df_bla.columns

In [None]:
# write KL grade of imageA
tmp = df_bla.rename(columns={'ID':'ID_a','KL-grade':'KL-grade_a', 'osteophytes':'osteophytes_a','sclerosis':'sclerosis_a'})
df1 = df_m.merge(tmp, on=['ID_a','Laterality'], how='left')

# write KL grade of imageB
tmp = df_bla.rename(columns={'ID':'ID_b','KL-grade':'KL-grade_b', 'osteophytes':'osteophytes_b','sclerosis':'sclerosis_b'})
df2 = df1.merge(tmp, on=['ID_b','Laterality'], how='left')

In [None]:
df2 = df2.drop(columns={'Unnamed: 0_x','Unnamed: 0_y'})

In [None]:
df2.to_csv('/srv/temp.csv')

## calculate KL difference

In [None]:
df2 = pd.read_csv('/srv/temp.csv')

In [None]:
df3 = df2.copy()
df3['class']=''
for i, row in df3.iterrows():
    print(i, end='\r')
    
    if (df3['KL-grade_a'][i] < 2) & (df3['KL-grade_b'][i] > 2):
        df3['class'][i]=1
    else:
        df3['class'][i]=0


In [None]:
df3.drop(df3.loc[df3['KL-grade_a']==-1].index, inplace =True)
df3.drop(df3.loc[df3['KL-grade_b']==-1].index, inplace =True)

In [None]:
# Add class to master csv
df_master = pd.read_csv('/srv/Master_dataset_JSN.csv')

df_tmp = df3.rename(columns = {'ID_a':'ID'})
df_c = df_master.merge(df_tmp, on = ['ID','Laterality'], how = 'right')


In [None]:
df_c.to_csv('/srv/Class_def_files/KL_def_final.csv')