In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# For the regression part
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from scipy.optimize import curve_fit

# For the classification part
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score, classification_report

from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.decomposition import PCA, KernelPCA

from sklearn.ensemble import RandomForestClassifier

from sklearn.manifold import SpectralEmbedding


#sns.set_context('talk')
sns.set_theme()

import warnings
warnings.filterwarnings("ignore")


En este práctico, utilizaremos el archivo original *fiji_datos_0al7mo_labels.csv* que se encuentra en la carpeta *data/raw/*.

Además sumaremos los siguientes datos sintéticos que se encuentran en la carpeta *data/datos_sinteticos/*:
  * datos_sinteticos_dias_3_y_5.csv
  * synthetic_3y5_sint2.csv
  * synthetic_data_dia_3_si.csv
  * synthetic_data_dia_4_si.csv
  * synthetic_data_dia_5_si.csv

Como también dos archivos extras que están en la capeta *data/03_AS/*:
  * fiji_datos_mean_diam.csv
  * fiji_datos_noise.csv
  

## Análisis y exploración de los datos sinteticos dia 3 y 5

In [2]:
df_sinteticos_3_5=pd.read_csv("data/datos_sinteticos/datos_sinteticos_dias_3_y_5.csv")

In [3]:
df_sinteticos_3_5.head()

Unnamed: 0,labels,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,Esferoide,dia,Diameter,n_diam
0,0001u,2108.856029,158.5521,0.853148,52.0548,39.89,1.252439,0.801927,0.9574,si,3,44.779969,11.118208
1,0001v,5494.716556,267.7438,0.765975,108.6886,78.1359,1.367897,0.729508,0.9424,si,5,94.04524,55.354949
2,0001w,4604.334693,208.6327,0.835749,79.215,78.5711,1.007505,0.992155,0.9544,si,3,74.80203,24.886113
3,0001x,3293.43306,184.9685,0.888199,59.5354,55.6766,1.039236,0.955215,0.9792,si,3,57.879742,19.426588
4,000a0,12597.351184,393.5486,0.74141,115.4147,98.4842,1.126705,0.888984,0.9568,si,3,108.587775,412.66966


In [4]:
df_sinteticos_3_5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 13 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   labels     55 non-null     object 
 1   Area       55 non-null     float64
 2   Perim.     55 non-null     float64
 3   Circ.      55 non-null     float64
 4   Feret      55 non-null     float64
 5   MinFeret   55 non-null     float64
 6   AR         55 non-null     float64
 7   Round      55 non-null     float64
 8   Solidity   55 non-null     float64
 9   Esferoide  55 non-null     object 
 10  dia        55 non-null     int64  
 11  Diameter   55 non-null     float64
 12  n_diam     55 non-null     float64
dtypes: float64(10), int64(1), object(2)
memory usage: 5.7+ KB


In [5]:
df_sinteticos_3_5.shape

(55, 13)

In [6]:
df_sinteticos_3_5.columns

Index(['labels', 'Area', 'Perim.', 'Circ.', 'Feret', 'MinFeret', 'AR', 'Round',
       'Solidity', 'Esferoide', 'dia', 'Diameter', 'n_diam'],
      dtype='object')

In [7]:
df_sinteticos_3_5.dtypes

labels        object
Area         float64
Perim.       float64
Circ.        float64
Feret        float64
MinFeret     float64
AR           float64
Round        float64
Solidity     float64
Esferoide     object
dia            int64
Diameter     float64
n_diam       float64
dtype: object

In [8]:
# creamos arrays para cada tipo de variable
variable_categorica = []
variable_numerica = []

# Iteramos a través de las columnas
for columna in df_sinteticos_3_5.columns:
    if df_sinteticos_3_5[columna].dtypes == 'object':
        variable_categorica.append(columna)
    else:
        variable_numerica.append(columna)

print('Variable Categórica:', variable_categorica,'\n')
print('Variable Numérica:', variable_numerica)


Variable Categórica: ['labels', 'Esferoide'] 

Variable Numérica: ['Area', 'Perim.', 'Circ.', 'Feret', 'MinFeret', 'AR', 'Round', 'Solidity', 'dia', 'Diameter', 'n_diam']


In [9]:
df_sinteticos_3_5["labels"].unique()

array(['0001u', '0001v', '0001w', '0001x', '000a0', '000aa', '000ab',
       '000ac', '000ad', '000ae', '000ag', '000ah', '000aj', '000ak',
       '000am', '000an', '000aq', '000ar', '000au', '000aw', '000ax',
       '000ay', '000b0', '000bb', '000bc', '000bd', '000bg', '000bj',
       '000bk', '000bm', '000bo', '000br', '000bs', '000bt', '000bx',
       '000by', '000c0', '000ca', '000cc', '000cd', '000ce', '000cg',
       '000ch', '000cl', '000co', '000cs', '000ct', '000cu', '000cv',
       '000cx', '000d0', '000da', '000dd', '000de', '000dg'], dtype=object)

In [10]:
# A diferencia del dataset con el que venimos trabajando, la variable label toma otras etiquetas

In [11]:
df_sinteticos_3_5["Esferoide"].unique()

array(['si'], dtype=object)

In [12]:
df_sinteticos_3_5["Esferoide"].value_counts()

si    55
Name: Esferoide, dtype: int64

In [13]:
#Todos los datos tienen la etiqueta esferoide "si"

In [14]:
#Cantidad de datos etiquetados como "si" segun el dia 

In [15]:
df_sinteticos_3_5.groupby(["dia","Esferoide"]).size()

dia  Esferoide
3    si           29
5    si           26
dtype: int64

## Análisis y exploración de los datos sinteticos dia 3 y 5 bis

In [16]:
df_sinteticos_3_5_bis=pd.read_csv("data/datos_sinteticos/synthetic_3y5_sint2.csv")

In [17]:
df_sinteticos_3_5_bis.head()

Unnamed: 0,labels,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,Esferoide,dia,Diameter,n_diam
0,000iu,122.7626,41.1188,0.846634,14.1852,12.1525,1.366386,0.753013,0.935293,no,5,13.2978,0.38104
1,000ix,122.7626,101.52717,0.900619,32.269328,30.219156,1.27768,0.843124,0.974896,no,5,31.236372,0.38104
2,000j1,9070.046335,424.616078,0.563269,139.12526,95.758206,1.62625,0.618293,0.90347,no,3,117.423404,277.627847
3,000ji,6586.914719,270.750961,0.764905,89.727394,73.635156,1.465515,0.746293,0.947981,no,3,81.66837,215.923591
4,000ju,6796.984868,284.745161,0.788278,85.622933,72.65707,1.0012,0.955847,0.931386,si,3,79.115739,205.487181


In [18]:
df_sinteticos_3_5_bis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 161 entries, 0 to 160
Data columns (total 13 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   labels     161 non-null    object 
 1   Area       161 non-null    float64
 2   Perim.     161 non-null    float64
 3   Circ.      161 non-null    float64
 4   Feret      161 non-null    float64
 5   MinFeret   161 non-null    float64
 6   AR         161 non-null    float64
 7   Round      161 non-null    float64
 8   Solidity   161 non-null    float64
 9   Esferoide  161 non-null    object 
 10  dia        161 non-null    int64  
 11  Diameter   161 non-null    float64
 12  n_diam     161 non-null    float64
dtypes: float64(10), int64(1), object(2)
memory usage: 16.5+ KB


In [19]:
df_sinteticos_3_5_bis.shape

(161, 13)

In [20]:
df_sinteticos_3_5_bis.columns

Index(['labels', 'Area', 'Perim.', 'Circ.', 'Feret', 'MinFeret', 'AR', 'Round',
       'Solidity', 'Esferoide', 'dia', 'Diameter', 'n_diam'],
      dtype='object')

In [21]:
df_sinteticos_3_5_bis.dtypes

labels        object
Area         float64
Perim.       float64
Circ.        float64
Feret        float64
MinFeret     float64
AR           float64
Round        float64
Solidity     float64
Esferoide     object
dia            int64
Diameter     float64
n_diam       float64
dtype: object

In [22]:
# creamos arrays para cada tipo de variable
variable_categorica = []
variable_numerica = []

# Iteramos a través de las columnas
for columna in df_sinteticos_3_5_bis.columns:
    if df_sinteticos_3_5_bis[columna].dtypes == 'object':
        variable_categorica.append(columna)
    else:
        variable_numerica.append(columna)

print('Variable Categórica:', variable_categorica,'\n')
print('Variable Numérica:', variable_numerica)


Variable Categórica: ['labels', 'Esferoide'] 

Variable Numérica: ['Area', 'Perim.', 'Circ.', 'Feret', 'MinFeret', 'AR', 'Round', 'Solidity', 'dia', 'Diameter', 'n_diam']


In [23]:
df_sinteticos_3_5_bis["labels"].unique()

array(['000iu', '000ix', '000j1', '000ji', '000ju', '000jv', '000k0',
       '000kc', '000kf', '000kh', '000kn', '000kr', '000ku', '000kv',
       '000ky', '000l0', '000ld', '000lh', '000lj', '000lm', '000lo',
       '000lu', '000ly', '000mg', '000mi', '000mm', '000mo', '000ms',
       '000mu', '000my', '000n0', '000nh', '000ni', '000nk', '000nn',
       '000no', '000nr', '000ns', '000nx', '000o0', '000od', '000og',
       '000oj', '000om', '000op', '000ou', '000oy', '000oz', '000p0',
       '000pa', '000pf', '000pr', '000pu', '000px', '000pz', '000q1',
       '000qf', '000qg', '000qh', '000qi', '000qk', '000qp', '000qt',
       '000qu', '000qv', '000qy', '000qz', '000ra', '000rd', '000rf',
       '000rh', '000ri', '000rk', '000rn', '000rs', '000rt', '000rw',
       '000s0', '000s1', '000sb', '000sh', '000sl', '000sm', '000sq',
       '000sr', '000su', '000sy', '000t0', '000ta', '000tf', '000th',
       '000tj', '000tm', '000tn', '000to', '000tr', '000tv', '000ty',
       '000u1', '000

In [24]:
df_sinteticos_3_5_bis["Esferoide"].unique()

array(['no', 'si'], dtype=object)

In [25]:
df_sinteticos_3_5_bis["Esferoide"].value_counts()

no    124
si     37
Name: Esferoide, dtype: int64

In [26]:
df_sinteticos_3_5_bis.groupby(["dia","Esferoide"]).size()

dia  Esferoide
3    no           60
     si           17
5    no           64
     si           20
dtype: int64

## Análisis y exploración de los datos sinteticos dia 3 

In [27]:
df_sinteticos_3=pd.read_csv("data/datos_sinteticos/synthetic_data_dia_3_si.csv")

In [28]:
df_sinteticos_3.head()

Unnamed: 0,labels,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,Esferoide,dia,Diameter,n_diam
0,000px,2668.2568,228.4795,0.893241,66.215303,68.0477,1.069481,0.934437,0.9684,si,3,66.365583,17.311897
1,000q0,6009.2465,312.8576,0.784415,101.099708,76.88,1.37458,0.727304,0.9515,si,3,90.270453,109.653369
2,000qh,16358.7381,490.8434,0.85164,147.480859,144.2174,1.078255,0.927538,0.983,si,3,144.196627,634.141009
3,000qj,15335.9539,524.2858,0.756606,164.937428,134.5306,1.092246,0.915844,0.9693,si,3,141.551969,597.638251
4,000qq,14441.1167,454.2536,0.798609,99.781799,131.3265,1.028377,0.972036,0.9753,si,3,124.884592,501.757463


In [29]:
df_sinteticos_3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 13 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   labels     56 non-null     object 
 1   Area       56 non-null     float64
 2   Perim.     56 non-null     float64
 3   Circ.      56 non-null     float64
 4   Feret      56 non-null     float64
 5   MinFeret   56 non-null     float64
 6   AR         56 non-null     float64
 7   Round      56 non-null     float64
 8   Solidity   56 non-null     float64
 9   Esferoide  56 non-null     object 
 10  dia        56 non-null     int64  
 11  Diameter   56 non-null     float64
 12  n_diam     56 non-null     float64
dtypes: float64(10), int64(1), object(2)
memory usage: 5.8+ KB


In [30]:
df_sinteticos_3.shape

(56, 13)

In [31]:
df_sinteticos_3.columns

Index(['labels', 'Area', 'Perim.', 'Circ.', 'Feret', 'MinFeret', 'AR', 'Round',
       'Solidity', 'Esferoide', 'dia', 'Diameter', 'n_diam'],
      dtype='object')

In [32]:
df_sinteticos_3.dtypes

labels        object
Area         float64
Perim.       float64
Circ.        float64
Feret        float64
MinFeret     float64
AR           float64
Round        float64
Solidity     float64
Esferoide     object
dia            int64
Diameter     float64
n_diam       float64
dtype: object

In [33]:
# creamos arrays para cada tipo de variable
variable_categorica = []
variable_numerica = []

# Iteramos a través de las columnas
for columna in df_sinteticos_3.columns:
    if df_sinteticos_3[columna].dtypes == 'object':
        variable_categorica.append(columna)
    else:
        variable_numerica.append(columna)

print('Variable Categórica:', variable_categorica,'\n')
print('Variable Numérica:', variable_numerica)


Variable Categórica: ['labels', 'Esferoide'] 

Variable Numérica: ['Area', 'Perim.', 'Circ.', 'Feret', 'MinFeret', 'AR', 'Round', 'Solidity', 'dia', 'Diameter', 'n_diam']


In [34]:
df_sinteticos_3["labels"].unique()

array(['000px', '000q0', '000qh', '000qj', '000qq', '000qu', '000qz',
       '000r1', '000re', '000rf', '000rh', '000ri', '000rq', '000rx',
       '000rz', '000s1', '000sh', '000si', '000sp', '000sq', '000sr',
       '000st', '000sy', '000ta', '000tg', '000tl', '000tn', '000tq',
       '000tv', '000tz', '000ub', '000ud', '000ug', '000ul', '000um',
       '000up', '000ur', '000us', '000uu', '000vb', '000vc', '000vg',
       '000vj', '000vt', '000vu', '000vw', '000vy', '000wa', '000wd',
       '000we', '000wf', '000wk', '000wn', '000wo', '000wv', '000wz'],
      dtype=object)

In [35]:
df_sinteticos_3["Esferoide"].unique()

array(['si'], dtype=object)

In [36]:
df_sinteticos_3["Esferoide"].value_counts()

si    56
Name: Esferoide, dtype: int64

## Análisis y exploración de los datos sinteticos dia 4

In [37]:
df_sinteticos_4=pd.read_csv("data/datos_sinteticos/synthetic_data_dia_4_si.csv")

In [38]:
df_sinteticos_4.head()

Unnamed: 0,labels,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,Esferoide,dia,Diameter,n_diam
0,00000,15848.2676,474.1609,0.77885,145.709319,127.7317,1.153,0.8674,0.974,si,4,127.770345,521.426321
1,00001,10181.0707,433.6711,0.784262,131.55914,128.6091,1.0748,0.931,0.9739,si,4,121.307057,216.107458
2,0000a,20784.2025,611.4172,0.739942,181.800275,174.8398,1.0715,0.9323,0.9726,si,4,175.153921,779.371247
3,0000b,1083.9499,190.94,0.909804,55.199988,53.5693,1.0905,0.9164,0.9768,si,4,52.983315,8.749749
4,0000c,1254.7693,191.1564,0.869752,56.385742,50.7836,1.1148,0.8978,0.9725,si,4,51.059912,9.517034


In [39]:
df_sinteticos_4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 13 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   labels     50 non-null     object 
 1   Area       50 non-null     float64
 2   Perim.     50 non-null     float64
 3   Circ.      50 non-null     float64
 4   Feret      50 non-null     float64
 5   MinFeret   50 non-null     float64
 6   AR         50 non-null     float64
 7   Round      50 non-null     float64
 8   Solidity   50 non-null     float64
 9   Esferoide  50 non-null     object 
 10  dia        50 non-null     int64  
 11  Diameter   50 non-null     float64
 12  n_diam     50 non-null     float64
dtypes: float64(10), int64(1), object(2)
memory usage: 5.2+ KB


In [40]:
df_sinteticos_4.shape

(50, 13)

In [41]:
df_sinteticos_4.columns

Index(['labels', 'Area', 'Perim.', 'Circ.', 'Feret', 'MinFeret', 'AR', 'Round',
       'Solidity', 'Esferoide', 'dia', 'Diameter', 'n_diam'],
      dtype='object')

In [42]:
df_sinteticos_4.dtypes

labels        object
Area         float64
Perim.       float64
Circ.        float64
Feret        float64
MinFeret     float64
AR           float64
Round        float64
Solidity     float64
Esferoide     object
dia            int64
Diameter     float64
n_diam       float64
dtype: object

In [43]:
# creamos arrays para cada tipo de variable
variable_categorica = []
variable_numerica = []

# Iteramos a través de las columnas
for columna in df_sinteticos_4.columns:
    if df_sinteticos_4[columna].dtypes == 'object':
        variable_categorica.append(columna)
    else:
        variable_numerica.append(columna)

print('Variable Categórica:', variable_categorica,'\n')
print('Variable Numérica:', variable_numerica)


Variable Categórica: ['labels', 'Esferoide'] 

Variable Numérica: ['Area', 'Perim.', 'Circ.', 'Feret', 'MinFeret', 'AR', 'Round', 'Solidity', 'dia', 'Diameter', 'n_diam']


In [44]:
df_sinteticos_4["labels"].unique()

array(['00000', '00001', '0000a', '0000b', '0000c', '0000d', '0000e',
       '0000f', '0000g', '0000h', '0000i', '0000j', '0000k', '0000l',
       '0000m', '0000n', '0000o', '0000p', '0000q', '0000r', '0000s',
       '0000t', '0000u', '0000v', '0000w', '0000x', '0000y', '0000z',
       '00010', '00011', '0001a', '0001b', '0001c', '0001d', '0001e',
       '0001f', '0001g', '0001h', '0001i', '0001j', '0001k', '0001l',
       '0001m', '0001n', '0001o', '0001p', '0001q', '0001r', '0001s',
       '0001t'], dtype=object)

In [45]:
df_sinteticos_4["Esferoide"].unique()

array(['si'], dtype=object)

In [46]:
df_sinteticos_4["Esferoide"].value_counts()

si    50
Name: Esferoide, dtype: int64

## Análisis y exploración de los datos sinteticos dia 5

In [47]:
df_sinteticos_5=pd.read_csv("data/datos_sinteticos/synthetic_data_dia_5_si.csv")

In [48]:
df_sinteticos_5.head()

Unnamed: 0,labels,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,Esferoide,dia,Diameter,n_diam
0,000fh,1940.893421,161.8442,-0.627087,53.714871,62.5798,1.026814,0.973772,0.9833,si,5,46.077939,12.629334
1,000gl,17291.426981,524.0741,-0.627087,163.107805,155.1193,1.05376,0.949237,0.9835,si,5,149.262276,635.094169
2,000gq,1191.282905,155.5651,-0.627087,47.768183,41.5086,1.071748,0.933102,0.9508,si,5,40.808361,8.924818
3,000gx,3586.49506,269.9627,-0.627087,119.774515,73.2305,1.13487,0.881704,0.9813,si,5,60.997945,34.82699
4,000hl,11298.641901,402.5262,-0.627087,135.064444,120.0183,1.185762,0.84327,0.9812,si,5,106.108531,294.398135


In [49]:
df_sinteticos_5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 13 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   labels     41 non-null     object 
 1   Area       41 non-null     float64
 2   Perim.     41 non-null     float64
 3   Circ.      41 non-null     float64
 4   Feret      41 non-null     float64
 5   MinFeret   41 non-null     float64
 6   AR         41 non-null     float64
 7   Round      41 non-null     float64
 8   Solidity   41 non-null     float64
 9   Esferoide  41 non-null     object 
 10  dia        41 non-null     int64  
 11  Diameter   41 non-null     float64
 12  n_diam     41 non-null     float64
dtypes: float64(10), int64(1), object(2)
memory usage: 4.3+ KB


In [50]:
df_sinteticos_5.shape

(41, 13)

In [51]:
df_sinteticos_5.columns

Index(['labels', 'Area', 'Perim.', 'Circ.', 'Feret', 'MinFeret', 'AR', 'Round',
       'Solidity', 'Esferoide', 'dia', 'Diameter', 'n_diam'],
      dtype='object')

In [52]:
df_sinteticos_5.dtypes

labels        object
Area         float64
Perim.       float64
Circ.        float64
Feret        float64
MinFeret     float64
AR           float64
Round        float64
Solidity     float64
Esferoide     object
dia            int64
Diameter     float64
n_diam       float64
dtype: object

In [53]:
# creamos arrays para cada tipo de variable
variable_categorica = []
variable_numerica = []

# Iteramos a través de las columnas
for columna in df_sinteticos_5.columns:
    if df_sinteticos_5[columna].dtypes == 'object':
        variable_categorica.append(columna)
    else:
        variable_numerica.append(columna)

print('Variable Categórica:', variable_categorica,'\n')
print('Variable Numérica:', variable_numerica)


Variable Categórica: ['labels', 'Esferoide'] 

Variable Numérica: ['Area', 'Perim.', 'Circ.', 'Feret', 'MinFeret', 'AR', 'Round', 'Solidity', 'dia', 'Diameter', 'n_diam']


In [54]:
df_sinteticos_5["labels"].unique()

array(['000fh', '000gl', '000gq', '000gx', '000hl', '000hw', '000hy',
       '000ib', '000j1', '000jv', '000kf', '000kx', '000lf', '000lj',
       '000lo', '000mb', '000mf', '000mw', '000ni', '000no', '000nr',
       '000ny', '000nz', '000o0', '000oi', '000op', '000pr', '000qd',
       '000qg', '000qp', '000rk', '000rn', '000sm', '000t0', '000tj',
       '000tr', '000uv', '000vr', '000vv', '000wt', '000ww'], dtype=object)

In [55]:
df_sinteticos_5["Esferoide"].unique()

array(['si'], dtype=object)

In [56]:
df_sinteticos_5["Esferoide"].value_counts()

si    41
Name: Esferoide, dtype: int64

## Comparación con los datos de los dias 3, 4 y 5 

In [57]:
raw_df = pd.read_csv("data/raw/fiji_datos_0al7mo_labels.csv")

In [58]:
raw_df.head()

Unnamed: 0,labels,Area,X,Y,XM,YM,Perim.,BX,BY,Width,...,FeretY,FeretAngle,MinFeret,AR,Round,Solidity,Esferoide,dia,Diameter,n_diam
0,Esferas_BT474_dia_0_well_1_100X_1_blob_1,324.444,1129.2718,102.2051,1129.2718,102.2051,67.3603,1118.8859,92.3913,21.0598,...,157,23.4287,19.7011,1.0653,0.9387,0.95,si,0,20.9564,1.491357
1,Esferas_BT474_dia_0_well_1_100X_1_blob_2,497.5115,1517.5528,126.1022,1517.5528,126.1022,82.8004,1504.7554,113.4511,25.8152,...,192,16.1443,25.1289,1.0475,0.9546,0.9569,si,0,26.002,2.848733
2,Esferas_BT474_dia_0_well_1_100X_1_blob_3,282.9078,1314.2815,126.0988,1314.2815,126.0988,62.1586,1304.3478,116.8478,19.7011,...,189,15.4222,18.3424,1.0395,0.962,0.9519,si,0,19.3897,1.181258
3,Esferas_BT474_dia_0_well_1_100X_1_blob_5,500.7421,1189.2841,212.6155,1189.2841,212.6155,84.5571,1175.9511,200.4076,26.4946,...,309,174.144,25.1359,1.0278,0.973,0.9493,si,0,25.88475,2.81037
4,Esferas_BT474_dia_0_well_1_100X_1_blob_6,492.8964,1043.0782,247.7667,1043.0782,247.7667,82.0728,1030.5707,235.7337,25.1359,...,353,143.427,24.4565,1.0186,0.9817,0.9643,si,0,25.33985,2.636596


In [59]:
#Cantidad de datos clasificados como Esferoide "si" y "no" en los dias 3, 4 y 5

In [60]:
#En el dia 3 

raw_df[raw_df['dia'] == 3]['Esferoide'].value_counts()

no    25
si    18
Name: Esferoide, dtype: int64

In [61]:
#En el dia 4 

raw_df[raw_df['dia'] == 4]['Esferoide'].value_counts()

no    206
si     48
Name: Esferoide, dtype: int64

In [62]:
#En el dia 5

raw_df[raw_df['dia'] == 5]['Esferoide'].value_counts()

si    28
no    24
Name: Esferoide, dtype: int64

Lo que se puede notar en un principio es que en los dias 3 y 5 hay menor cantidad de datos que en el dia 4. En el dia 4 hay mayor cantidad de datos etiquetados como esferoides "no" que esferoides "si". Al contar con datos del dia 3 y 5 sinteticos se podrian agragar al dataset para contar con mas cantidad

In [63]:
df_sinteticos_3_5.groupby(["dia","Esferoide"]).size()

dia  Esferoide
3    si           29
5    si           26
dtype: int64

In [64]:
df_sinteticos_3_5_bis.groupby(["dia","Esferoide"]).size()

dia  Esferoide
3    no           60
     si           17
5    no           64
     si           20
dtype: int64

In [65]:
df_sinteticos_3["Esferoide"].unique()

array(['si'], dtype=object)

In [66]:
df_sinteticos_3["Esferoide"].value_counts()

si    56
Name: Esferoide, dtype: int64

In [67]:
df_sinteticos_5["Esferoide"].unique()

array(['si'], dtype=object)

In [68]:
df_sinteticos_5["Esferoide"].value_counts()

si    41
Name: Esferoide, dtype: int64

In [73]:
#Cantidad de datos sinteticos esferoide "si", "no" dia 3

si_sintetico_3_5 = len(df_sinteticos_3_5[(df_sinteticos_3_5['dia'] == 3) & (df_sinteticos_3_5['Esferoide'] == 'si')])
si_sintetico_3_5_bis= len(df_sinteticos_3_5_bis[(df_sinteticos_3_5_bis['dia'] == 3) & (df_sinteticos_3_5_bis['Esferoide'] == 'si')])
si_sintetico_3=len(df_sinteticos_3[df_sinteticos_3['Esferoide'] == 'si'])


count_dia_3_si= si_sintetico_3 + si_sintetico_3_5_bis + si_sintetico_3
print(f"La cantidad de datos sinteticos clasificados el dia 3 como esferoide  es: {count_dia_3_si}")

no_sintetico_3 = len(df_sinteticos_3_5[(df_sinteticos_3_5['dia'] == 3) & (df_sinteticos_3_5['Esferoide'] == 'no')])
no_sintetico_3_bis= len(df_sinteticos_3_5_bis[(df_sinteticos_3_5_bis['dia'] == 3) & (df_sinteticos_3_5_bis['Esferoide'] == 'no')])

count_dia_3_no= no_sintetico_3 + no_sintetico_3_bis
print(f"La cantidad de datos sinteticos clasificados el dia 3 como  no esferoide  es: {count_dia_3_no}")


La cantidad de datos sinteticos clasificados el dia 3 como esferoide  es: 129
La cantidad de datos sinteticos clasificados el dia 3 como  no esferoide  es: 60


In [74]:
#Cantidad de datos sinteticos esferoide "si", "no" dia 5

si_sintetico_5 = len(df_sinteticos_3_5[(df_sinteticos_3_5['dia'] == 5) & (df_sinteticos_3_5['Esferoide'] == 'si')])
si_sintetico_5_bis= len(df_sinteticos_3_5_bis[(df_sinteticos_3_5_bis['dia'] == 5) & (df_sinteticos_3_5_bis['Esferoide'] == 'si')])
si_sintetico_5=len(df_sinteticos_5[df_sinteticos_5['Esferoide'] == 'si'])

count_dia_5_si= si_sintetico_5 + si_sintetico_5_bis + si_sintetico_5
print(f"La cantidad de datos sinteticos clasificados el dia 5 como esferoide  es: {count_dia_5_si}")

no_sintetico_5 = len(df_sinteticos_3_5[(df_sinteticos_3_5['dia'] == 5) & (df_sinteticos_3_5['Esferoide'] == 'no')])
no_sintetico_5_bis= len(df_sinteticos_3_5_bis[(df_sinteticos_3_5_bis['dia'] == 5) & (df_sinteticos_3_5_bis['Esferoide'] == 'no')])

count_dia_5_no= no_sintetico_5+ no_sintetico_5_bis
print(f"La cantidad de datos sinteticos clasificados el dia 5 como  no esferoide  es: {count_dia_5_no}")


La cantidad de datos sinteticos clasificados el dia 5 como esferoide  es: 102
La cantidad de datos sinteticos clasificados el dia 5 como  no esferoide  es: 64


In [75]:
#Para el dia 4 serviria agregar datos sinteticos que esten clasificados como esferoide "si"

df_sinteticos_4["Esferoide"].unique()

array(['si'], dtype=object)

In [76]:
df_sinteticos_4["Esferoide"].value_counts()

si    50
Name: Esferoide, dtype: int64

In [None]:
#se podria decidir agregar estos 50 datos para sumar esferoides si al dia 4 

Para decidir si se pueden agregar estos datos sinteticos al dataset original tambien debemos explorar el resto de atributos 

In [None]:
#Esferoide "si" dia 3

In [85]:
raw_df[(raw_df['dia'] == 3)&(raw_df['Esferoide'] == 'si')].describe()

Unnamed: 0,Area,X,Y,XM,YM,Perim.,BX,BY,Width,Height,...,FeretX,FeretY,FeretAngle,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,...,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,8567.561678,913.911394,646.337228,913.911394,646.337228,340.235783,863.790772,596.014494,100.317017,100.883139,...,1297.277778,952.0,88.302911,92.372694,1.155533,0.873817,0.967117,3.0,101.092311,236.631639
std,6210.195028,438.955388,345.188671,438.955388,345.188671,129.16032,434.577595,348.292282,40.145475,37.621728,...,640.972772,533.970367,46.822427,34.110991,0.119787,0.086267,0.015588,0.0,37.918587,245.572601
min,1025.0214,117.1034,124.2614,117.1034,124.2614,119.6783,79.4837,78.125,35.3261,37.3641,...,159.0,145.0,5.417,34.5867,1.0074,0.7136,0.9267,3.0,37.27645,8.393347
25%,4727.513225,741.860925,339.347475,741.860925,339.347475,276.470625,695.48235,287.364175,77.2758,74.0489,...,1023.75,442.75,64.168675,71.5421,1.050175,0.831475,0.961425,3.0,80.144275,83.621162
50%,7216.68615,939.9111,641.47745,939.9111,641.47745,324.19515,889.2663,581.8614,97.82605,100.54345,...,1322.0,951.5,86.01005,94.30055,1.14315,0.8748,0.97325,3.0,98.014225,152.690047
75%,10814.52975,1178.975825,908.234075,1178.975825,908.234075,400.49355,1143.342425,847.316575,119.735075,121.433425,...,1704.25,1375.75,128.38225,114.565375,1.202725,0.952225,0.976875,3.0,120.282212,283.247273
max,22344.1746,1655.9275,1163.0063,1655.9275,1163.0063,586.9311,1597.8261,1144.0217,189.538,187.5,...,2390.0,1722.0,156.3706,150.11,1.4014,0.9926,0.9851,3.0,173.2438,842.567345


In [88]:
df_sinteticos_3_5[df_sinteticos_3_5['dia']==3].describe()

Unnamed: 0,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,29.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0
mean,7879.95562,279.269941,0.836494,99.271231,85.065545,1.107626,0.908935,0.97021,3.0,91.297873,188.085002
std,5375.405249,111.321166,0.038889,42.398319,33.658992,0.095838,0.071295,0.012984,0.0,38.239767,218.524872
min,1623.654632,150.1424,0.74141,51.0715,39.89,1.007505,0.797918,0.9302,3.0,44.779969,10.039858
25%,3293.43306,184.9685,0.81218,60.7351,55.6766,1.026585,0.835997,0.9659,3.0,57.879742,19.426588
50%,6490.366385,247.0383,0.841342,81.8915,76.2902,1.060879,0.937192,0.9735,3.0,76.801946,76.586117
75%,10081.666629,351.5335,0.872361,129.0413,106.0875,1.202976,0.970527,0.9803,3.0,116.380862,294.809745
max,18755.989942,506.476,0.888199,186.1937,159.2266,1.266493,0.992155,0.9847,3.0,171.576284,737.867759


In [89]:
df_sinteticos_3_5_bis[(df_sinteticos_3_5_bis['dia']==3)&(df_sinteticos_3_5_bis['Esferoide']=='si')].describe()

Unnamed: 0,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0
mean,7751.297741,323.497892,0.78967,102.929708,85.955487,1.201799,0.850835,0.955849,3.0,94.439613,216.148075
std,4377.403564,128.00201,0.09987,40.051648,33.411198,0.217154,0.127145,0.024811,0.0,36.485974,145.248082
min,122.7626,123.722134,0.543155,42.069491,22.044864,1.0012,0.552828,0.902452,3.0,32.06571,0.38104
25%,6111.341401,248.343041,0.730066,83.557131,71.594904,1.0012,0.772472,0.94179,3.0,77.56742,126.837669
50%,7550.069078,284.745161,0.783409,92.982805,83.08288,1.125284,0.904874,0.952983,3.0,87.538433,205.487181
75%,8834.139565,372.980584,0.824756,117.583256,100.445817,1.336678,0.93984,0.974678,3.0,109.041957,259.383177
max,16388.328296,591.501763,0.944,189.221425,143.370473,1.62705,0.993736,0.9851,3.0,166.266217,527.806829


In [101]:
df_sinteticos_3.describe()

Unnamed: 0,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,56.0,56.0,56.0,56.0,56.0,56.0,56.0,56.0,56.0,56.0,56.0
mean,9573.658561,364.895477,0.832902,113.237449,100.198134,1.137288,0.884828,0.970548,3.0,104.012271,316.246544
std,7174.599018,148.801192,0.057439,44.689832,39.064034,0.094529,0.069108,0.010594,0.0,40.609964,315.164028
min,1003.117,121.8531,0.708259,41.554029,35.1638,1.020536,0.727304,0.9479,3.0,37.779222,8.251857
25%,2696.885875,229.55605,0.787966,74.204293,64.451025,1.068397,0.841759,0.964625,3.0,66.434287,18.670722
50%,6467.78365,333.92195,0.848243,117.041191,94.79095,1.110452,0.901377,0.9712,3.0,97.23753,126.286524
75%,15364.451825,496.364675,0.880131,151.31161,134.937375,1.188151,0.935533,0.979625,3.0,137.763139,604.344168
max,22299.0963,626.7273,0.914031,199.679102,166.6959,1.37458,0.979885,0.9855,3.0,176.427468,861.343082


In [None]:
#Esferoide "no" dia 3

In [95]:
#describe dataset original

raw_df[(raw_df['dia'] == 3)&(raw_df['Esferoide'] == 'no')].describe()

Unnamed: 0,Area,X,Y,XM,YM,Perim.,BX,BY,Width,Height,...,FeretX,FeretY,FeretAngle,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,...,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,2286.873084,905.792388,613.987564,905.792388,613.987564,161.169872,882.934776,589.72826,46.141304,48.043472,...,1307.88,892.88,109.856284,39.923684,1.287356,0.805676,0.946448,3.0,47.156546,58.590889
std,3509.445986,401.773161,367.383208,401.773161,367.383208,132.104102,398.688423,364.736142,33.266611,40.828992,...,589.221189,540.554832,49.909347,26.870016,0.284178,0.139886,0.034046,0.0,35.929966,152.040357
min,205.3735,43.3657,65.774,43.3657,65.774,52.8809,32.6087,54.3478,16.3043,16.3043,...,49.0,87.0,17.4592,15.9673,1.0299,0.4512,0.8462,3.0,16.95775,0.790199
25%,478.128,603.3122,298.8057,603.3122,298.8057,81.908,580.163,288.7228,25.1359,23.0978,...,871.0,431.0,69.7174,23.0978,1.0906,0.7115,0.9501,3.0,25.42575,2.663501
50%,693.6547,1014.4278,598.6135,1014.4278,598.6135,98.8717,956.5217,588.9946,29.212,32.6087,...,1464.0,871.0,124.0459,28.5326,1.2183,0.8208,0.9582,3.0,30.37485,4.541235
75%,1597.7596,1135.0413,913.9615,1135.0413,913.9615,176.4022,1119.5652,902.1739,48.2337,59.7826,...,1656.0,1338.0,149.9667,42.7989,1.4055,0.917,0.962,3.0,48.4888,18.473783
max,14646.5914,1643.1499,1231.5422,1643.1499,1231.5422,606.5156,1609.375,1218.0707,129.7554,203.8043,...,2374.0,1823.0,171.3844,118.4902,2.2165,0.971,0.9725,3.0,164.88585,726.409779


In [105]:
df_sinteticos_3_5_bis[(df_sinteticos_3_5_bis['dia']==3)&(df_sinteticos_3_5_bis['Esferoide']=='no')].describe()

Unnamed: 0,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0
mean,3985.793642,188.202794,0.812967,61.313178,48.644389,1.275689,0.818803,0.944431,3.0,54.955422,120.864648
std,3974.306181,128.056439,0.096583,40.18287,32.643162,0.180896,0.108127,0.023845,0.0,36.218428,132.562251
min,122.7626,41.1188,0.563269,14.1852,12.1525,1.0012,0.618293,0.899281,3.0,13.2978,0.38104
25%,122.7626,52.716434,0.741809,20.249397,13.106992,1.147513,0.73815,0.927876,3.0,18.362769,0.38104
50%,2257.550235,180.322769,0.823324,59.790722,44.881153,1.284597,0.794658,0.945461,3.0,52.171974,50.907128
75%,7456.055261,286.141598,0.89995,89.557514,76.288805,1.412615,0.900634,0.959236,3.0,82.065063,238.634761
max,12942.317392,491.582898,0.944,153.95687,118.063057,1.627838,0.9988,0.9851,3.0,136.023938,432.117985


In [None]:
#Esferoide "si" dia 4

In [82]:
#describe dataset original

raw_df[(raw_df['dia'] == 4)&(raw_df['Esferoide'] == 'si')].describe()

Unnamed: 0,Area,X,Y,XM,YM,Perim.,BX,BY,Width,Height,...,FeretX,FeretY,FeretAngle,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,...,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0
mean,7232.569871,774.460531,607.908804,774.460531,607.908804,303.701298,729.194973,563.773777,90.126806,88.71151,...,1090.958333,886.770833,94.846058,83.747994,1.111127,0.902008,0.969987,4.0,89.688273,196.388663
std,6433.035739,351.879742,313.770259,351.879742,313.770259,145.499555,350.665991,316.881022,42.051047,41.268381,...,519.59837,459.983036,54.195499,38.8742,0.053133,0.043033,0.0095,0.0,41.278661,242.728689
min,1002.8688,181.9494,39.2042,181.9494,39.2042,118.7859,114.8098,10.1902,35.3261,35.3261,...,179.0,19.0,2.1747,34.0271,1.0133,0.8073,0.9372,4.0,37.0658,8.251856
25%,2508.55645,485.1593,347.09445,485.1593,347.09445,189.77605,458.0503,309.782575,55.536675,54.85735,...,684.25,537.25,40.592275,53.2054,1.0761,0.871375,0.9671,4.0,57.732788,31.183899
50%,4657.1324,797.878,569.70805,797.878,569.70805,269.21385,746.2636,519.70105,78.46465,79.82335,...,1101.0,815.0,103.61705,73.98275,1.1112,0.9,0.9712,4.0,79.730225,82.160943
75%,10057.416925,993.866175,875.542075,993.866175,875.542075,402.047075,950.747275,833.38995,117.697025,116.678,...,1404.25,1275.5,140.570525,110.406275,1.14765,0.929275,0.97635,4.0,116.13875,254.166291
max,21477.4523,1589.2715,1190.1327,1589.2715,1190.1327,605.6114,1550.2717,1153.5326,176.6304,175.9511,...,2298.0,1716.0,174.9204,159.5244,1.2387,0.9869,0.9846,4.0,169.18725,784.755583


In [78]:
df_sinteticos_4.describe()

Unnamed: 0,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,6100.346504,283.770946,0.837953,88.538226,80.8656,1.105764,0.905964,0.969484,4.0,79.818648,175.815826
std,6680.093975,154.9029,0.059143,45.408724,45.44172,0.047921,0.039005,0.010569,0.0,42.934978,254.270109
min,1056.8734,119.6463,0.725865,40.417558,34.1113,1.0114,0.8273,0.9318,4.0,37.16004,8.699276
25%,1084.406025,151.258325,0.782018,49.004956,40.715375,1.073125,0.8879,0.965375,4.0,43.915449,8.706765
50%,2840.1425,236.7885,0.857879,68.596548,63.7234,1.10545,0.9039,0.9721,4.0,62.584338,23.307039
75%,9508.1708,382.324525,0.886175,121.816162,114.09675,1.1259,0.931675,0.976675,4.0,103.446328,314.496248
max,21462.2869,623.2707,0.9141,185.704791,175.9364,1.2087,0.99,0.9819,4.0,179.060234,784.65026


In [None]:
#Esferoide "si" dia 5

In [84]:
#describe dataset original

raw_df[(raw_df['dia'] == 5)&(raw_df['Esferoide'] == 'si')].describe()

Unnamed: 0,Area,X,Y,XM,YM,Perim.,BX,BY,Width,Height,...,FeretX,FeretY,FeretAngle,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,...,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0
mean,9822.176879,885.913914,646.061918,885.913914,646.061918,376.428207,830.987971,590.838514,109.302211,108.865493,...,1245.285714,962.678571,81.892839,102.044643,1.117343,0.89875,0.967221,5.0,110.212304,282.276005
std,5868.928094,255.816232,293.034385,255.816232,293.034385,130.637115,258.027165,295.060363,35.767679,34.932752,...,376.659573,438.728639,49.235383,32.003392,0.075924,0.05747,0.017508,0.0,35.374617,239.819724
min,1633.2962,189.3324,53.5188,189.3324,53.5188,153.8389,116.8478,11.5489,45.5163,46.875,...,235.0,17.0,5.553,44.7632,1.0167,0.7661,0.9079,5.0,46.6154,16.414202
25%,5178.642625,767.077125,488.094875,767.077125,488.094875,273.43065,717.2215,425.271725,78.125,82.88045,...,1084.25,725.25,46.0192,75.46805,1.061025,0.8743,0.961325,5.0,82.360275,90.562254
50%,9934.3082,898.9196,627.41135,898.9196,627.41135,383.40915,847.4864,564.8777,110.394,113.79075,...,1276.0,952.5,66.04755,108.7971,1.0962,0.91225,0.9721,5.0,113.87965,239.315022
75%,13892.132325,1034.20615,824.512225,1034.20615,824.512225,487.202275,970.6182,761.71875,145.720075,132.133175,...,1447.0,1251.25,126.77555,129.8653,1.143875,0.942525,0.980625,5.0,138.836575,433.672753
max,22735.9995,1539.903,1256.021,1539.903,1256.021,620.2391,1495.2446,1222.8261,169.1576,179.3478,...,2201.0,1871.0,167.0054,165.233,1.3053,0.9836,0.9843,5.0,178.1591,916.337691


In [108]:
df_sinteticos_3_5[df_sinteticos_3_5['dia']==5].describe()

Unnamed: 0,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0
mean,9774.876514,328.201181,0.819963,115.257262,100.412988,1.127901,0.891924,0.963565,5.0,108.310836,268.644765
std,6158.098388,127.310159,0.051424,40.841935,32.641148,0.100237,0.068877,0.016271,0.0,36.151049,268.662927
min,2490.428941,171.9946,0.703842,47.7609,47.0721,1.025601,0.729508,0.9183,5.0,46.744465,10.848581
25%,5155.616651,233.491825,0.795339,92.016175,78.3358,1.060487,0.877544,0.9525,5.0,80.445457,85.781765
50%,7805.287393,283.4116,0.84271,108.85155,91.5072,1.098184,0.906737,0.9683,5.0,100.628264,187.132286
75%,14198.633196,415.0484,0.852234,136.9415,125.666025,1.142745,0.940221,0.975275,5.0,134.231275,281.925083
max,21069.35404,570.4674,0.880328,193.0088,158.3622,1.369009,0.969852,0.985,5.0,173.054181,870.355548


In [109]:
df_sinteticos_5.describe()

Unnamed: 0,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0
mean,8732.513052,355.597246,-0.6270868,106.445379,103.945744,1.104751,0.909779,0.968902,5.0,94.130628,268.897476
std,6557.624639,144.43615,5.620076e-16,44.140413,36.939395,0.082661,0.061935,0.013839,0.0,42.385664,285.349429
min,1031.418499,126.7667,-0.6270868,42.01602,39.5319,1.008965,0.729547,0.934,5.0,37.725926,8.29497
25%,3516.332418,243.4457,-0.6270868,65.106056,77.6758,1.046759,0.887528,0.9627,5.0,60.527532,37.816922
50%,6496.237316,339.8404,-0.6270868,105.585855,103.0,1.09113,0.916562,0.9711,5.0,81.506574,128.996882
75%,12730.605239,482.1591,-0.6270868,134.270679,134.0078,1.127491,0.9552,0.9795,5.0,128.599956,432.075072
max,22355.996705,616.2498,-0.6270868,194.122612,164.4045,1.371462,0.99119,0.9843,5.0,176.088265,913.365249


In [110]:
#Esferoide "no" dia 5

In [111]:
#describe dataset original

raw_df[(raw_df['dia'] == 5)&(raw_df['Esferoide'] == 'no')].describe()

Unnamed: 0,Area,X,Y,XM,YM,Perim.,BX,BY,Width,Height,...,FeretX,FeretY,FeretAngle,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,...,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,952.429188,994.009696,688.500621,994.009696,688.500621,113.031546,976.788946,672.10145,34.137237,33.089896,...,1443.291667,1014.375,92.696342,29.778663,1.239742,0.848975,0.945088,5.0,33.628644,9.627833
std,868.078828,444.58351,426.975862,444.58351,426.975862,49.990639,445.831682,427.42323,16.130202,13.020561,...,655.959336,624.546431,54.38675,10.792589,0.344809,0.162553,0.033255,0.0,13.748913,14.535723
min,150.4534,57.8613,50.7181,57.8613,50.7181,46.7184,40.7609,35.3261,12.9076,14.9457,...,67.0,90.0,6.6544,12.9076,1.0055,0.4392,0.8289,5.0,14.2958,0.473431
25%,397.016975,664.46765,347.0052,664.46765,347.0052,75.1628,652.51355,332.201075,23.7772,23.0978,...,962.0,491.75,42.8566,22.466275,1.0508,0.81545,0.944875,5.0,23.250625,2.037182
50%,784.80365,1092.8102,551.1272,1092.8102,551.1272,104.5196,1075.74725,525.13585,31.92935,29.8913,...,1592.0,828.5,94.08545,26.47985,1.0836,0.9229,0.95575,5.0,32.246525,5.434573
75%,1114.093475,1384.554325,1138.900675,1384.554325,1138.900675,140.76935,1369.225525,1125.849225,40.25135,41.61005,...,2025.75,1669.5,143.9605,35.195575,1.22805,0.951675,0.965175,5.0,40.772288,11.002851
max,4049.3192,1705.1817,1268.7435,1705.1817,1268.7435,264.2512,1688.8587,1252.7174,90.3533,65.2174,...,2496.0,1864.0,168.6901,56.8695,2.277,0.9945,0.9735,5.0,74.7352,67.640549


In [112]:
df_sinteticos_3_5_bis[(df_sinteticos_3_5_bis['dia']==5)&(df_sinteticos_3_5_bis['Esferoide']=='no')].describe()

Unnamed: 0,Area,Perim.,Circ.,Feret,MinFeret,AR,Round,Solidity,dia,Diameter,n_diam
count,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0
mean,4168.533265,189.99409,0.844191,61.692886,51.500304,1.224281,0.850823,0.954832,5.0,56.563631,122.220813
std,4015.380345,123.71048,0.079394,38.921361,31.263814,0.185184,0.099984,0.024071,0.0,34.943164,134.590601
min,122.7626,41.1188,0.676847,14.1852,12.1525,1.0012,0.634458,0.89997,5.0,13.2978,0.38104
25%,122.7626,83.215148,0.785375,30.419,24.864993,1.039259,0.776599,0.938045,5.0,27.865383,0.38104
50%,3392.316366,169.044717,0.849234,53.173011,49.237447,1.218977,0.848342,0.958912,5.0,52.582702,65.02739
75%,7342.427818,271.522592,0.917462,88.043645,72.649207,1.356994,0.923649,0.975466,5.0,80.571976,242.687317
max,15044.210754,514.29897,0.944,159.529863,129.811532,1.675953,0.9988,0.9851,5.0,144.679797,494.76659
