# Statistical measurements

The following script:

- Divide the patches into two dataframes (regardless of classes):
  - Surfaces
  - Sections
- Calculates mean and standard deviation of rgb channels for each image (patch). A vector is constructed for each image as:
  - vector: [mean_r, mean_g, mean_b, std_r, std_g, std_b]
- Returns an xlsx file with three sheets:
  - Surfaces: Dataframe with concatenated 'surfaces' data (normalized)
  - Sections: Dataframe with concatenated 'sections' data (normalized)
  - Summary: mean of 'Sections' and 'Surfaces' dataframes (normalized)



## Conect with Drive

Conecta con Google Drive

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Dataset
- About the folders: each folder has 750 patches
- About the patches: they have dimensions of 200x200 pixels
- About the classes: there are 4 classes 'au', 'wd', 'ww', 'br'.

If you want 3 classes, comment on the lines corresponding to 'br'


In [14]:
list_folder = ( # Comentar los elementos de la lista que NO se desean procesar
    #'au_sections_patches', 
    #'au_surfaces_patches', 
    #'wd_sections_patches', 
    #'wd_surfaces_patches', 
    #'ww_sections_patches',
    #'ww_surfaces_patches',
    'br_sections_patches',
    'br_surfaces_patches'
    )

#path_data = '/content/drive/My Drive/Test' # Datos para hacer prueblas :)
path_data = '/content/drive/My Drive/2 - Experiments/6 - Kindey stones/Proyecto_CalculosRenales/Imagenes/Patches/200x200' # Complete data

# Libraries!

import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Auxiliar functions

It uses three auxiliary functions: 
- statistical(): separates the image into channels and caculates mean and std of each channel. 
- data(): concatenates the vectors produced by statistical().
- makedf(): converts data arrays into a dataframe ready to send to excel.

In [13]:
def statistical(path_images):
  bgr_img = cv2.imread(path_images)
  b,g,r = cv2.split(bgr_img)       # get b,g,r
  rgb_img = cv2.merge([r,g,b])     # switch it to rgb
  chanel_r = np.array(r); chanel_g = np.array(g); chanel_b = np.array(b);
  mean_r = np.mean(chanel_r); mean_r = mean_r/255;
  mean_g = np.mean(chanel_g); mean_g = mean_g/255;
  mean_b = np.mean(chanel_b); mean_b = mean_b/255;
  std_r = np.std(chanel_r); std_r = std_r/255;
  std_g = np.std(chanel_g); std_g = std_g/255;
  std_b = np.std(chanel_b); std_b = std_b/255;
  data_vector = ([mean_r,mean_g,mean_b, std_r, std_g, std_b])
  return data_vector
    
def data(path_folder):
  flag_kind = 0
  list_images = os.listdir(path_folder)  
  for image in list_images:
    path_image = path_folder + '/' + image #print('path_image:', path_image)
    data_aux = statistical(path_image)
    if flag_kind == 0:
      data_kind = data_aux
      flag_kind = flag_kind + 1
    else:
      data_kind = np.vstack((data_kind,data_aux))
  return data_kind

def makedf(data2frame,label):
  if label == 'summary':
    mean_r = data2frame[:,0]  
    mean_g = data2frame[:,1]  
    mean_b = data2frame[:,2]
    std_r  = data2frame[:,3]  
    std_g  = data2frame[:,4]   
    std_b  = data2frame[:,5]
    name   = data2frame[:,6]
    df = pd.DataFrame(data2frame, columns = ['mean_r', 'mean_g', 'mean_b', 'std_r', 'std_g', 'std_b','names'])
  else:
    mean_r = data2frame[:,0]  
    mean_g = data2frame[:,1]  
    mean_b = data2frame[:,2]
    std_r  = data2frame[:,3]  
    std_g  = data2frame[:,4]   
    std_b  = data2frame[:,5]
    df = pd.DataFrame(data2frame, columns = ['mean_r', 'mean_g', 'mean_b', 'std_r', 'std_g', 'std_b'])
  return df

## Main function

In [15]:
flag_surfaces = 0
flag_sections = 0

for folder in list_folder:                                                      
  print(folder)
  path_folder = path_data + '/' + folder;                                       #print(path_folder)
  type_set = folder[3:11];                                                      #print(type_set)
  if type_set == 'sections':
    data_sections = data(path_folder)
    if flag_sections == 0:
      dataf_sections = data_sections
      flag_sections = flag_sections + 1
    else:
      dataf_sections = np.vstack((dataf_sections, data_sections))
  elif type_set == 'surfaces':
    data_surfaces = data(path_folder)
    if flag_surfaces == 0:
      dataf_surfaces = data_surfaces
      flag_surfaces = flag_surfaces + 1
    else:
      dataf_surfaces = np.vstack((dataf_surfaces,data_surfaces))
  else:
    print('Error: Unidentified class')

mean_sections = np.mean(np.array(dataf_sections), axis=0)
mean_surfaces = np.mean(np.array(dataf_surfaces), axis=0)
mean_sections = np.append(mean_sections, 'Sections');                           #print(mean_sections)
mean_surfaces = np.append(mean_surfaces, 'Surfaces');                           #print(mean_surfaces)

stack_summary = np.vstack((mean_sections,mean_surfaces))

df_summary = makedf(stack_summary,label='summary');                             #print(df_summary)
df_sections = makedf(dataf_sections,label='none');                              #print(df_sections)
df_surfaces = makedf(dataf_surfaces,label='none');                              #print(df_surfaces)

with pd.ExcelWriter('statistical-measurements-patches-w200-4class.xlsx') as writer:
  df_summary.to_excel(writer, sheet_name='Summary')
  df_sections.to_excel(writer, sheet_name='Sections')
  df_surfaces.to_excel(writer, sheet_name='Surfaces')

br_sections_patches
br_surfaces_patches
