In [None]:
import sklearn
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
from scipy.stats import norm

In [None]:
path = os.getcwd()
path_datasets = path + "/datasets/"
path_img = path + "/img/"

# Funciones

In [None]:
def get_figures(data, nombre , rango = ['inf', 'inf'], path_save = path_img):
    media = data.mean()
    std = data.std()
    
    fig = plt.figure(figsize=(12, 5))
    gs = fig.add_gridspec(2, 3)  

    ax1 = fig.add_subplot(gs[0, :])  
    hist = sns.histplot(data, kde=True, ax=ax1,stat='count',color='skyblue')
    ax1.set_ylabel('Cuenta')
    ax1.set_title('(a)')
        
    ax2 = fig.add_subplot(gs[1, 0])  
    sns.boxplot(data, ax=ax2, color='lightgray')
    ax2.set_title('(b)')

    data_normalizada = np.random.normal(loc=0, scale=1, size=1000000)
    ax3 = fig.add_subplot(gs[1, 1:])  
    kde = sns.kdeplot(data_normalizada, ax=ax3, color='orange')
    ax3.set_title('(c)')
    ax3.set_ylabel('')
    x = kde.lines[0].get_xdata()
    y = kde.lines[0].get_ydata()
    
    x_hist = hist.lines[0].get_xdata()
    y_hist = hist.lines[0].get_ydata()
    
    z_i = 'inf'
    z_f = 'inf'
    
    prob = 1
    if(rango[0] != 'inf'):
        z_i = (rango[0] - media)/std
    if(rango[1] != 'inf'):
        z_f = (rango[1] - media)/std

    color_relleno = 'orange'
    color_relleno_hist = 'grey'
    if(rango[0] == 'inf' and rango[1] == 'inf'):
        ax3.fill(x, y, color = color_relleno)
        ax1.fill(x_hist, y_hist, color = color_relleno_hist, alpha = 0.3)
        ax1.legend(title=f'P(-inf<X<inf)\nMedia: {media:.2f} \nDesviación Estándar: {std:.2f}', loc='upper right')  
        ax3.legend(title = f'P(-inf<Z<inf)\nProbabilidad: {prob * 100: 0.2f}%')   
    elif(rango[0] == 'inf' and rango[1] != 'inf'): # x <= Z
        prob = norm.cdf(z_f)
        ax3.fill_between(x, y, where=(x <= z_f), color=color_relleno, alpha=0.3)
        ax1.fill_between(x_hist, y_hist, where=(x_hist <= rango[1]), color=color_relleno_hist, alpha=0.3)
        ax1.legend(title=f'P(-inf<X<{rango[1]:0.2f})\nMedia: {media:.2f} \nDesviación Estándar: {std:.2f}', loc='upper right')  
        ax3.legend(title = f'P(-inf<Z<{z_f:0.2f})\nProbabilidad: {prob * 100: 0.2f}%')
    elif(rango[0] != 'inf' and rango[1] == 'inf'): # x x >=1
        prob = 1 - norm.cdf(z_i)
        ax3.fill_between(x, y, where=(x >= z_i), color=color_relleno, alpha=0.3)
        ax1.fill_between(x_hist, y_hist, where=(x_hist >= rango[0]), color=color_relleno_hist, alpha=0.3)
        ax1.legend(title=f'P({rango[0]:0.2f}<X<inf)\nMedia: {media:.2f} \nDesviación Estándar: {std:.2f}', loc='upper right')  
        ax3.legend(title = f'P({z_i:0.2f}<Z<inf)\nProbabilidad: {prob * 100: 0.2f}%')
    else:
        prob = norm.cdf(z_f) - norm.cdf(z_i)
        ax3.fill_between(x, y, where=(x >= z_i) & (x <= z_f), color=color_relleno, alpha=0.3)
        ax1.fill_between(x_hist, y_hist, where=(x_hist >= rango[0]) & (x_hist <= rango[1]), color=color_relleno_hist, alpha=0.3)
        ax1.legend(title=f'P({rango[0]:0.2f}<X<{rango[1]:0.2f})\nMedia: {media:.2f} \nDesviación Estándar: {std:.2f}', loc='upper right')  
        ax3.legend(title = f'P({z_i:0.2f}<Z<{z_f:0.2f})\nProbabilidad: {prob * 100: 0.2f}%')


    plt.tight_layout()
    
    
    plt.savefig(path_save + nombre +'.png', dpi = 300)
    plt.show()

# Height Wight

In [None]:
nombre = 'HeightWeight'

In [None]:
HeightWeight = pd.read_csv(path_datasets + nombre + '.csv', index_col=0)

In [None]:
# Transformar data a sistema internacional
HeightWeight['Altura(cm)'] = HeightWeight['Height(Inches)'] * 2.54
HeightWeight['Peso(kg)'] = HeightWeight['Weight(Pounds)'] * 0.453592
HeightWeight.drop('Height(Inches)', axis = 1, inplace=True)
HeightWeight.drop('Weight(Pounds)', axis = 1, inplace=True)
HeightWeight

## Peso Experimento 1

In [None]:
get_figures(HeightWeight['Peso(kg)'], nombre='1_Peso', rango = [50,60])

## Altura - experimento 2

In [None]:
get_figures(HeightWeight['Altura(cm)'], nombre='2_Altura',rango=['inf', 172])

# Banana

In [None]:

nombre = 'banana_quality'
banana = pd.read_csv(path_datasets + nombre +'.csv', index_col= 0)
banana.rename(columns={'quality_score':'Calificación', 
                                'ripeness_index':'Índice de madurez',
                                'length_cm':'Largo(cm)', 
                                'weight_g':'Peso(g)', 
                                'tree_age_years':'Edad del árbol(años)', 
                                'altitude_m': 'Altitud(m)', 
                                'rainfall_mm':'Lluvia(mm)', 
                                'soil_nitrogen_ppm':'Nitrógeno en el suelo(ppm)',
                                'sugar_content_brix': 'Índice de Brix del azúcar',
                                'firmness_kgf':'Firmeza(kgf)'}, inplace = True)

banana.drop(columns=['variety', 'region', 'quality_category', 'harvest_date','ripeness_category'], inplace = True)
len(banana['Altitud(m)'])

## Calificacion de la banana - experimento 3

In [None]:
get_figures(banana['Calificación'], nombre='3_Calificacion_banana', rango=[1.9,'inf'])

## Indice de madurez - experimento 4

In [None]:
get_figures(banana['Índice de madurez'], nombre='4_Indice_madurez_banana', rango=[2,5])

## Indice Briz de azucar - experimento 5

In [None]:
get_figures(banana['Índice de Brix del azúcar'], nombre = '5_Indice_azucar_banana', rango=[20, 'inf'])

## Peso banana - experimento 6

In [None]:
get_figures(banana['Peso(g)'], nombre = '6_Peso_banana', rango=[105.39, 'inf'])

## Edad arbol banana - experimento 7

In [None]:
get_figures(banana['Edad del árbol(años)'], nombre = '7_Edad_arbol_banana', rango=[12.34,14.21])

## Altitud banana - experimento 8

In [None]:
get_figures(banana['Altitud(m)'], nombre = '8_Altitud_banana', rango=[200, 1000])

## Lluvia banana - experimento 9

In [None]:
get_figures(banana['Lluvia(mm)'], nombre = '9_Lluvia_banana', rango=[1789.12, 'inf'])

## Nitrogeno en suelo - experimento 10

In [None]:
get_figures(banana['Nitrógeno en el suelo(ppm)'], nombre = '10_Nitrogeno_banana', rango=['inf', 102])

## Firmeza banana - expermento 11

In [None]:
get_figures(banana['Firmeza(kgf)'], nombre = '11_firmeza_banana', rango=[3, 'inf'])

# Tortilla prices

In [115]:
nombre = 'tortilla_prices'
tortilla = pd.read_csv(path_datasets + nombre +'.csv')
tortilla.dropna(inplace = True)
tortilla.rename(columns={'Price per kilogram':'Precio por kg de tortillas'}, inplace=True)
tortilla = tortilla[(tortilla != 0).all(axis=1)]

print(tortilla['Precio por kg de tortillas'].min())
len(tortilla['Precio por kg de tortillas'])

3.87


282755

## Precio tortilla - experimento 12

In [None]:
get_figures(tortilla['Precio por kg de tortillas'], nombre = '12_precio_tortilla', rango=[10,13])

# Food and nutrition

In [None]:
nombre = 'Food_and_Nutrition'
food_nutrition = pd.read_csv(path_datasets + nombre +'.csv')
food_nutrition.drop(columns=['Ages',
                     'Gender',
                     'Dietary Preference',
                     'Activity Level',
                     'Breakfast Suggestion',
                     'Lunch Suggestion',
                     'Dinner Suggestion',
                     'Snack Suggestion',
                     'Disease',
                     'Weight',
                     'Height',
                     'Daily Calorie Target'], inplace = True)

food_nutrition.rename(columns={'Protein':'Proteínas(g)',
                       'Sugar': 'Azúcar(g)',
                       'Sodium':'Sodio(mg)',
                       'Calories': 'Calorías',
                       'Carbohydrates': 'Carbohidratos(g)',
                       'Fiber':'Fibra(g)',
                       'Fat':'Grasa(g)'}, inplace = True)

food_nutrition

In [116]:
len(food_nutrition['Azúcar(g)'])

1698

## Azucar food - experimento 13

In [None]:
get_figures(food_nutrition['Azúcar(g)'], nombre='13_azucar_nutricion', rango=[50,60])

## Calorias food - experimento 14

In [None]:
get_figures(food_nutrition['Calorías'], nombre='14_calorias_nutricion', rango=[1000,3000])

## Carbogidratos food - experimento 15

In [None]:
get_figures(food_nutrition['Carbohidratos(g)'], nombre='15_carbohidratos_nutricion', rango=['inf', 200])

## Fibra food -experimento 16

In [None]:
get_figures(food_nutrition['Fibra(g)'], nombre='16_fibra_nutricion', rango=['inf', 10])

## Grasa food -experimento 17

In [None]:
get_figures(food_nutrition['Grasa(g)'], nombre='17_grasa_nutricion', rango=[100, 'inf'])

## Proteinas food - experimento 18

In [None]:
get_figures(food_nutrition['Proteínas(g)'], nombre='18_proteinas_nutricion', rango=['inf', 100])

## Sodio food - experimento 19

In [None]:
get_figures(food_nutrition['Sodio(mg)'], nombre='19_sodio_nutricion', rango=[10, 70])

# Plant health

In [None]:
nombre = 'plant_health'
plant = pd.read_csv(path_datasets + nombre + '.csv')
plant.rename(columns={'Electrochemical_Signal':'Señal electroquímica'}, inplace = True)
plant

In [117]:
len(plant['Ambient_Temperature'])

1200

## Senal electroquimica - experimento 20

In [None]:
get_figures(plant['Señal electroquímica'], nombre='20_senal_planta', rango=['inf', 2])

# User behavior

In [None]:
nombre = 'user_behavior'

In [None]:
user_behavior = pd.read_csv(path_datasets + nombre +'.csv')
user_behavior

# Water quality

In [None]:
nombre = 'water_quality'

In [None]:
water = pd.read_csv(path_datasets + nombre + '.csv')
water
