<a href="https://colab.research.google.com/github/Alex0Alca0/Image_processing-/blob/main/RGB_LBL_STATS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Imports


In [None]:
import os 
import numpy as np 
import pandas as pd 
import skimage
import imageio
import scipy
import matplotlib.pyplot as plt
from PIL import Image
import statistics
from scipy.stats import kurtosis
import cv2
import time
import concurrent.futures
import multiprocessing
from multiprocessing import Pool
import itertools
import pickle

###FUNCIONES

In [None]:
def boxplot_stats(X, whis=1.5, bootstrap=None, labels=None,
                  autorange=False):
    r"""
    Return a list of dictionaries of statistics used to draw a series of box
    and whisker plots using `~.Axes.bxp`.
    Parameters
    ----------
    X : array-like
        Data that will be represented in the boxplots. Should have 2 or
        fewer dimensions.
    whis : float or (float, float), default: 1.5
        The position of the whiskers.
        If a float, the lower whisker is at the lowest datum above
        ``Q1 - whis*(Q3-Q1)``, and the upper whisker at the highest datum below
        ``Q3 + whis*(Q3-Q1)``, where Q1 and Q3 are the first and third
        quartiles.  The default value of ``whis = 1.5`` corresponds to Tukey's
        original definition of boxplots.
        If a pair of floats, they indicate the percentiles at which to draw the
        whiskers (e.g., (5, 95)).  In particular, setting this to (0, 100)
        results in whiskers covering the whole range of the data.
        In the edge case where ``Q1 == Q3``, *whis* is automatically set to
        (0, 100) (cover the whole range of the data) if *autorange* is True.
        Beyond the whiskers, data are considered outliers and are plotted as
        individual points.
    bootstrap : int, optional
        Number of times the confidence intervals around the median
        should be bootstrapped (percentile method).
    labels : array-like, optional
        Labels for each dataset. Length must be compatible with
        dimensions of *X*.
    autorange : bool, optional (False)
        When `True` and the data are distributed such that the 25th and 75th
        percentiles are equal, ``whis`` is set to (0, 100) such that the
        whisker ends are at the minimum and maximum of the data.
    Returns
    -------
    list of dict
        A list of dictionaries containing the results for each column
        of data. Keys of each dictionary are the following:
        ========   ===================================
        Key        Value Description
        ========   ===================================
        label      tick label for the boxplot
        mean       arithmetic mean value
        med        50th percentile
        q1         first quartile (25th percentile)
        q3         third quartile (75th percentile)
        iqr        interquartile range
        cilo       lower notch around the median
        cihi       upper notch around the median
        whislo     end of the lower whisker
        whishi     end of the upper whisker
        fliers     outliers
        ========   ===================================
    Notes
    -----
    Non-bootstrapping approach to confidence interval uses Gaussian-based
    asymptotic approximation:
    .. math::
        \mathrm{med} \pm 1.57 \times \frac{\mathrm{iqr}}{\sqrt{N}}
    General approach from:
    McGill, R., Tukey, J.W., and Larsen, W.A. (1978) "Variations of
    Boxplots", The American Statistician, 32:12-16.
    """

    def _bootstrap_median(data, N=5000):
        # determine 95% confidence intervals of the median
        M = len(data)
        percentiles = [2.5, 97.5]

        bs_index = np.random.randint(M, size=(N, M))
        bsData = data[bs_index]
        estimate = np.median(bsData, axis=1, overwrite_input=True)

        CI = np.percentile(estimate, percentiles)
        return CI

    def _compute_conf_interval(data, med, iqr, bootstrap):
        if bootstrap is not None:
            # Do a bootstrap estimate of notch locations.
            # get conf. intervals around median
            CI = _bootstrap_median(data, N=bootstrap)
            notch_min = CI[0]
            notch_max = CI[1]
        else:

            N = len(data)
            notch_min = med - 1.57 * iqr / np.sqrt(N)
            notch_max = med + 1.57 * iqr / np.sqrt(N)

        return notch_min, notch_max

    # output is a list of dicts
    bxpstats = []

    # convert X to a list of lists
    X = _reshape_2D(X, "X")

    ncols = len(X)
    if labels is None:
        labels = itertools.repeat(None)
    elif len(labels) != ncols:
        raise ValueError("Dimensions of labels and X must be compatible")

    input_whis = whis
    for ii, (x, label) in enumerate(zip(X, labels)):

        # empty dict
        stats = {}
        if label is not None:
            stats['label'] = label

        # restore whis to the input values in case it got changed in the loop
        whis = input_whis

        # note tricksiness, append up here and then mutate below
        bxpstats.append(stats)

        # if empty, bail
        if len(x) == 0:
            stats['fliers'] = np.array([])
            stats['mean'] = np.nan
            stats['med'] = np.nan
            stats['q1'] = np.nan
            stats['q3'] = np.nan
            stats['iqr'] = np.nan
            stats['cilo'] = np.nan
            stats['cihi'] = np.nan
            stats['whislo'] = np.nan
            stats['whishi'] = np.nan
            continue

        # up-convert to an array, just to be safe
        x = np.asarray(x)

        # arithmetic mean
        stats['mean'] = np.mean(x)

        # medians and quartiles
        q1, med, q3 = np.percentile(x, [25, 50, 75])

        # interquartile range
        stats['iqr'] = q3 - q1
        if stats['iqr'] == 0 and autorange:
            whis = (0, 100)

        # conf. interval around median
        stats['cilo'], stats['cihi'] = _compute_conf_interval(
            x, med, stats['iqr'], bootstrap
        )

        # lowest/highest non-outliers
        if np.iterable(whis) and not isinstance(whis, str):
            loval, hival = np.percentile(x, whis)
        elif np.isreal(whis):
            loval = q1 - whis * stats['iqr']
            hival = q3 + whis * stats['iqr']
        else:
            raise ValueError('whis must be a float or list of percentiles')

        # get high extreme
        wiskhi = x[x <= hival]
        if len(wiskhi) == 0 or np.max(wiskhi) < q3:
            stats['whishi'] = q3
            
        else:
            stats['whishi'] = np.max(wiskhi)
            
        # get low extreme
        wisklo = x[x >= loval]
        if len(wisklo) == 0 or np.min(wisklo) > q1:
            stats['whislo'] = q1
            
        else:
            stats['whislo'] = np.min(wisklo)
            
        # compute a single array of outliers
        stats['fliers'] = np.concatenate([
            x[x < stats['whislo']],
            x[x > stats['whishi']],
        ])

        # add in the remaining stats
        stats['q1'], stats['med'], stats['q3'] = q1, med, q3

    return bxpstats
def _unpack_to_numpy(x):
    """Internal helper to extract data from e.g. pandas and xarray objects."""
    if isinstance(x, np.ndarray):
        # If numpy, return directly
        return x
    if hasattr(x, 'to_numpy'):
        # Assume that any to_numpy() method actually returns a numpy array
        return x.to_numpy()
    if hasattr(x, 'values'):
        xtmp = x.values
        # For example a dict has a 'values' attribute, but it is not a property
        # so in this case we do not want to return a function
        if isinstance(xtmp, np.ndarray):
            return xtmp
    return x
def _reshape_2D(X, name):
    """
    Use Fortran ordering to convert ndarrays and lists of iterables to lists of
    1D arrays.
    Lists of iterables are converted by applying `numpy.asanyarray` to each of
    their elements.  1D ndarrays are returned in a singleton list containing
    them.  2D ndarrays are converted to the list of their *columns*.
    *name* is used to generate the error message for invalid inputs.
    """

    # Unpack in case of e.g. Pandas or xarray object
    X = _unpack_to_numpy(X)

    # Iterate over columns for ndarrays.
    if isinstance(X, np.ndarray):
        X = X.T

        if len(X) == 0:
            return [[]]
        elif X.ndim == 1 and np.ndim(X[0]) == 0:
            # 1D array of scalars: directly return it.
            return [X]
        elif X.ndim in [1, 2]:
            # 2D array, or 1D array of iterables: flatten them first.
            return [np.reshape(x, -1) for x in X]
        else:
            raise ValueError(f'{name} must have 2 or fewer dimensions')

    # Iterate over list of iterables.
    if len(X) == 0:
        return [[]]

    result = []
    is_1d = True
    for xi in X:
        # check if this is iterable, except for strings which we
        # treat as singletons.
        if not isinstance(xi, str):
            try:
                iter(xi)
            except TypeError:
                pass
            else:
                is_1d = False
        xi = np.asanyarray(xi)
        nd = np.ndim(xi)
        if nd > 1:
            raise ValueError(f'{name} must have 2 or fewer dimensions')
        result.append(xi.reshape(-1))

    if is_1d:
        # 1D array of scalars: directly return it.
        return [np.reshape(result, -1)]
    else:
        # 2D array, or 1D array of iterables: use flattened version.
        return result

def get_filenames(Img_Sur, extension):
  """
  Función que permite obtener los nombres de archivos de una lista de nombres de archivos, 
  filtrándolos por una extensión específica.

  Parámetros:
  Img_Sur (list): Lista de nombres de archivos.
  extension (str): Extensión a filtrar.
  
  Retorna:
  list: Lista de nombres de archivos filtrados por la extensión especificada.
  """
  file_names = [file_name for file_name in Img_Sur if file_name[-3:] == extension]
  file_names.sort()
  return file_names

def vectRGB_S (args):
  """
  Función que permite extraer los valores a nivel pixel de los tres distintos 
  canales (RGB) de una imagen comparandola a una mascara.

  Parámetros:
  path: Inicio de la dirección. (Ejemplo: 'omarlopez/Escritorio/')
  Img: Imagen que se desea extraer el valor de los pixeles. 
  Img_Lbl: Mascara de las imagenes.

  Retorna: Tres Vectores R,G,B.
  """
  path, Img, Img_Lbl = args

  Img_Lbl = imageio.imread(path+Img_Lbl)
  Img = imageio.imread(path+Img)
  
  r,g,b = Img[:,:,0], Img[:,:,1], Img[:,:,2]

  idx = (Img_Lbl == 1)
  Vr = r[idx].flatten()
  Vg = g[idx].flatten()
  Vb = b[idx].flatten()
  
  return Vr, Vg, Vb

def vectRGB_S_ (path,Img, Img_Lbl):
  """
  Función que permite extraer los valores a nivel pixel de los tres distintos 
  canales (RGB) de una imagen comparandola a una mascara.

  Parámetros:
  path: Inicio de la dirección. (Ejemplo: 'omarlopez/Escritorio/')
  Img: Imagen que se desea extraer el valor de los pixeles. 
  Img_Lbl: Mascara de las imagenes.

  Retorna: Tres Vectores R,G,B.
  """

  Img_Lbl = imageio.imread(path+Img_Lbl)
  Img = imageio.imread(path+Img)
  
  r,g,b = Img[:,:,0], Img[:,:,1], Img[:,:,2]

  idx = (Img_Lbl == 1)
  Vr = r[idx].flatten()
  Vg = g[idx].flatten()
  Vb = b[idx].flatten()
  
  return Vr, Vg, Vb

###PATHS

In [None]:
# input_images_path_1 = '/content/drive/MyDrive/Jonathan El-Beze mod/SEC/Subtype_Va (CYS)'
# input_images_path_2 = '/content/drive/MyDrive/Jonathan El-Beze mod/SUR/Subtype_IIa (WD)'
# Img_ =os.listdir(input_images_path_1)
# paso = '/content/drive/MyDrive/Jonathan El-Beze mod/SUR/Subtype_Va (CYS)/'
# paso = input_images_path_1 + '/'

In [None]:
main_path = '/content/drive/MyDrive/Jonathan El-Beze mod/'
path_sec = main_path + 'SEC/'
path_sur = main_path + 'SUR/'
path_sec_list = os.listdir(path_sec)
path_sur_list = os.listdir(path_sur)


#only_subtype = 'SEC-Subtype_Va'
#path_sub_sec_type = path_sec + only_subtype
#path_sub_sec_type_list =os.listdir(path_sub_sec_type)
#path_sub_sec_type_1 =  path_sub_sec_type + '_1'
#path_sub_sec_type_list_1 = os.listdir(path_sub_sec_type_1)
#name = only_subtype

#prueba

In [None]:
resultados = {}
for i in range(len(path_sur_list)):
    paso=path_sur+path_sur_list[i]+'/'
    _Img_ = os.listdir(path_sur+path_sur_list[i])
    args = [(paso, jpg, png) for jpg, png in zip(get_filenames(_Img_, 'jpg'), get_filenames(_Img_, 'png'))]
    with Pool(processes=5) as pool:
        result = pool.map(vectRGB_S, args)

    VR, VG, VB = zip(*result)
    VR_1_T = np.concatenate(VR)
    VG_1_T = np.concatenate(VG)
    VB_1_T = np.concatenate(VB)

    resultados[path_sur_list[i]] = [VR_1_T, VG_1_T, VB_1_T]

In [None]:
IIIa = resultados["Subtype_IIIa (AU)"]
IIa = resultados["Subtype_IIa (WD)"]
Va= resultados["Subtype_Va (CYS)"]
Ia= resultados["Subtype_Ia (WW)"]
IVc = resultados["Subtype_IVc (STR)"]
IVd = resultados["Subtype_IVd (BRU)"]

In [None]:
stats_IIIa=boxplot_stats(IIIa)
stats_IIa=boxplot_stats(IIa)
stats_Va=boxplot_stats(Va)
stats_Ia=boxplot_stats(Ia)
stats_IVc=boxplot_stats(IVc)
stats_IVd=boxplot_stats(IVd)

In [None]:
stats_Ia[1],stats_IIa[2], stats_IIIa[2], stats_IVd[2], stats_Va[2], stats_IVc[2]

({'mean': 135.5333874769089,
  'iqr': 78.0,
  'cilo': 131.97279153755318,
  'cihi': 132.02720846244682,
  'whishi': 255,
  'whislo': 0,
  'fliers': array([], dtype=uint8),
  'q1': 95.0,
  'med': 132.0,
  'q3': 173.0},
 {'mean': 117.45895283793988,
  'iqr': 69.0,
  'cilo': 111.89861247182034,
  'cihi': 112.10138752817966,
  'whishi': 251,
  'whislo': 0,
  'fliers': array([255, 255, 255, ..., 255, 255, 255], dtype=uint8),
  'q1': 79.0,
  'med': 112.0,
  'q3': 148.0},
 {'mean': 113.02053811811794,
  'iqr': 73.0,
  'cilo': 109.96614515540982,
  'cihi': 110.03385484459018,
  'whishi': 255,
  'whislo': 0,
  'fliers': array([], dtype=uint8),
  'q1': 75.0,
  'med': 110.0,
  'q3': 148.0},
 {'mean': 143.49032046517186,
  'iqr': 58.0,
  'cilo': 146.96943839445683,
  'cihi': 147.03056160554317,
  'whishi': 255,
  'whislo': 29,
  'fliers': array([28, 27, 28, ..., 25, 26, 28], dtype=uint8),
  'q1': 116.0,
  'med': 147.0,
  'q3': 174.0},
 {'mean': 128.5024687255179,
  'iqr': 89.0,
  'cilo': 124.93536

In [None]:
labels_= ["(AU)","(WD)","(CYS)","(WW)","(STR)","(BRU)"]

In [None]:
a = "Label"
dataset_= [a,a,a,a,a,a]

In [None]:
plt.figure(figsize=(10,6))
plt.title("R_SUR")
plt.boxplot([IIIa[0],IIa[0],Va[0],Ia[0],IVc[0],IVd[0]],labels=labels_)

In [None]:
plt.figure(figsize=(10,6))
plt.title("G_SUR")
plt.boxplot([IIIa[0],IIa[0],Va[0],Ia[0],IVc[0],IVd[0]],labels=labels_)

In [None]:
plt.figure(figsize=(10,6))
plt.title("B_SUR")
plt.boxplot([IIIa[0],IIa[0],Va[0],Ia[0],IVc[0],IVd[0]],labels=labels_)

In [None]:
df_R = pd.DataFrame(data=[stats_IIIa[0],stats_IIa[0],stats_Va[0],stats_Ia[0],stats_IVc[0],stats_IVc[0]],)

In [None]:
df_R.insert(0, 'Subtype', labels_)

In [None]:
df_R.insert(1, 'DataSet', dataset_)

In [None]:
df_R

Unnamed: 0,Subtype,DataSet,mean,iqr,cilo,cihi,whishi,whislo,fliers,q1,med,q3
0,(AU),Label,162.635846,70.0,165.969983,166.030017,-1,24,"[12, 12, 17, 21, 23, 19, 19, 23, 18, 17, 20, 2...",129.0,166.0,199.0
1,(WD),Label,153.071819,77.0,154.883055,155.116945,-1,2,[1],117.0,155.0,194.0
2,(CYS),Label,164.251532,75.0,166.949698,167.050302,-1,16,"[15, 15, 15, 12, 15, 13, 15, 13, 15, 12, 13, 1...",128.0,167.0,203.0
3,(WW),Label,156.268409,84.0,153.945697,154.054303,-1,0,[],114.0,154.0,198.0
4,(STR),Label,163.850389,67.0,165.964874,166.035126,-1,31,"[30, 29, 29, 23, 23, 26, 30, 28, 27, 30, 29, 2...",131.0,166.0,198.0
5,(BRU),Label,163.850389,67.0,165.964874,166.035126,-1,31,"[30, 29, 29, 23, 23, 26, 30, 28, 27, 30, 29, 2...",131.0,166.0,198.0


In [None]:
stats_IIIa[0][]

{'mean': 162.63584599076748,
 'iqr': 70.0,
 'cilo': 165.96998272939916,
 'cihi': 166.03001727060084,
 'whishi': 255,
 'whislo': 24,
 'fliers': array([12, 12, 17, ..., 22, 16, 23], dtype=uint8),
 'q1': 129.0,
 'med': 166.0,
 'q3': 199.0}

In [None]:
mean_Ivd

162.63584599076748

#Exportaciones

In [None]:
with open("resultados_RGB_LBL.pickle", "wb") as file:
    pickle.dump(resultados, file)