# Creating DB

!! USAR GPU



*	images: the images
*	global: .mat files (one per image) regarding global pathologies. The first is "diferenciación corticomedular; the second  "cortical hiperecogénica"
*	labels: .mat files. 1. Healthy kidney; 2. pathological kidney
*	lesion_labels: .mat files (per lesion and image): 1-litiasis, 2-quiste simple, 3-quiste complicado, 4-pirámides, 5-angiomiolipoma, 6-masa renal sólida, 7 - hidronefrosis, 8 - cortical adelgazada, 9 - escara cortical. 
*	lesion-masks: masks for each lesion
*	masks: elliptical masks for kidneys
*	masks_poly: polygonal masks


In [None]:
#LIBRARIES

import scipy.io
import os
import pandas as pd
from multiprocessing import Pool
import multiprocessing as mp
import time

In [None]:
#GET NAME FILES

list_global = os.listdir("global")

In [None]:
def function1(idx):
  # folder=d[0]
  name = idx.split('.')[0]
  res=[name] #save only the name (without .mat)--> will be used in the Lesions labels

  ## LABELS NEEDED
  labels = ['pathological', 'healthy']
  labels_lesions = ['litiasis','quiste simple', 'quiste complicado','pirámides','angiomiolipoma','masa renal sólida','hidronefrosis','cortical adelgazada','escara cortical']


  #globalPat --> GLOBAL
  mat = scipy.io.loadmat('global'+'/'+idx)
  res.append(int(mat['globalPat'][0]))
  res.append(int(mat['globalPat'][1]))
  
  #label --> LABELS
  mat = scipy.io.loadmat('labels/'+idx)
  res.append(labels[int(mat['label'][0])-1])

  #label --> LESION LABELS
  for les in range(1,13):
      try:
        mat = scipy.io.loadmat('lesion_labels/'+name+'__'+str(les)+'.mat')
      except:
        break
      else:
        res.append(labels_lesions[int(mat['label'][0])-1])
  return res


In [None]:
NUMCORES = 8 #mp.cpu_count()

start_time = time.time()
with Pool(NUMCORES) as p:
  results_parallel=p.map(function1, list_global)  # takes ~3mins

print(time.time()-start_time)

183.42391324043274


In [None]:
cols_global = ['idx','g_dif_corticomedular', 'g_cortical_hiperecogénica', 'label']
lesions_cols = ['les'+str(i) for i in range(1,13)] #the one that have more lesions has 12
cols = cols_global + lesions_cols

data = pd.DataFrame (results_parallel, columns = cols)
data = data.set_index('idx')

data

Unnamed: 0_level_0,g_dif_corticomedular,g_cortical_hiperecogénica,label,les1,les2,les3,les4,les5,les6,les7,les8,les9,les10,les11,les12
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
I20181002081653000,0,0,healthy,,,,,,,,,,,,
I20181002081653953,0,0,healthy,,,,,,,,,,,,
I20181002081654546,1,1,pathological,,,,,,,,,,,,
I20181002081717812,0,0,pathological,quiste simple,quiste simple,quiste simple,,,,,,,,,
I20181002081717968,0,0,pathological,quiste simple,quiste simple,quiste simple,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
I20181002081338984,0,0,pathological,hidronefrosis,,,,,,,,,,,
I20181002081356218,0,0,pathological,quiste simple,,,,,,,,,,,
I20181002081501906,0,0,pathological,quiste simple,quiste simple,quiste simple,,,,,,,,,
I20181002081652609,0,0,healthy,,,,,,,,,,,,


In [None]:
data2 = data.copy()

data2['sum lesions'] = data.iloc[:,3:].notna().sum(axis = 1)
data2

Unnamed: 0_level_0,g_dif_corticomedular,g_cortical_hiperecogénica,label,les1,les2,les3,les4,les5,les6,les7,les8,les9,les10,les11,les12,sum lesions
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
I20181002081653000,0,0,healthy,,,,,,,,,,,,,0
I20181002081653953,0,0,healthy,,,,,,,,,,,,,0
I20181002081654546,1,1,pathological,,,,,,,,,,,,,0
I20181002081717812,0,0,pathological,quiste simple,quiste simple,quiste simple,,,,,,,,,,3
I20181002081717968,0,0,pathological,quiste simple,quiste simple,quiste simple,,,,,,,,,,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
I20181002081338984,0,0,pathological,hidronefrosis,,,,,,,,,,,,1
I20181002081356218,0,0,pathological,quiste simple,,,,,,,,,,,,1
I20181002081501906,0,0,pathological,quiste simple,quiste simple,quiste simple,,,,,,,,,,3
I20181002081652609,0,0,healthy,,,,,,,,,,,,,0


# Saving to csv

In [None]:
%cd '/content/'

data2.to_csv('data_kidney.csv')

/content
