# 1.1.8. Generarar dataset de metricas

La idea consiste en generar conjuntos de datos intermedios que contengan métricas para las existencias ovinas, los kilogramos de lana, la finura y el rinde. Estos datos intermedios nos permitirán completar el conjunto de datos final de manera más estructurada utilizando estas métricas.

In [1]:
import numpy as np
import pandas as pd

Vamos a empezar con los datos para las existencias.

In [11]:
datos = pd.read_csv("actualizando_dataset - dataset_nuevos_porcentajes_existencias.csv")
datos

Unnamed: 0,fecha,provincia,departamento,%_carneros,%_ovejas,%_borregos,%_capones,%_corderos_corderas,kilos_animal,finura,rinde
0,2013-12-31,CHUBUT,BIEDMA,2.28,41.05,15.81,19.39,21.47,3.16,19.8,55.0
1,2014-12-31,CHUBUT,BIEDMA,2.03,38.6,18.6,18.73,22.03,3.24,19.22,58.86
2,2013-12-31,CHUBUT,CUSHAMEN,2.7,48.83,14.92,10.01,23.55,2.73,20.04,57.47
3,2014-12-31,CHUBUT,CUSHAMEN,2.42,44.6,15.3,10.95,26.72,3.34,19.74,63.35
4,2013-12-31,CHUBUT,FLORENTINO AMEGHINO,2.38,39.3,18.77,14.61,24.94,2.85,19.3,57.75
5,2014-12-31,CHUBUT,FLORENTINO AMEGHINO,2.56,40.6,16.21,14.82,25.8,2.73,19.53,60.37
6,2013-12-31,CHUBUT,FUTALEUFU,3.2,49.78,12.7,12.78,21.53,3.14,21.6,63.9
7,2014-12-31,CHUBUT,FUTALEUFU,3.14,46.98,12.6,10.83,26.44,3.3,21.55,62.93
8,2013-12-31,CHUBUT,GAIMAN,2.43,51.81,11.9,14.89,18.97,3.41,20.8,56.2
9,2014-12-31,CHUBUT,GAIMAN,2.47,45.06,13.9,13.2,25.37,3.19,20.39,55.06


In [13]:
datos_agrupado = datos.groupby("departamento").agg({
    "%_carneros": ["mean", "std", "min", "max"],
    "%_ovejas": ["mean", "std", "min", "max"],
    "%_borregos": ["mean", "std", "min", "max"],
    "%_capones": ["mean", "std", "min", "max"],
    "%_corderos_corderas": ["mean", "std", "min", "max"],
})

In [14]:
datos_agrupado.stack()

Unnamed: 0_level_0,Unnamed: 1_level_0,%_carneros,%_ovejas,%_borregos,%_capones,%_corderos_corderas
departamento,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BIEDMA,mean,2.155,39.825,17.205,19.06,21.75
BIEDMA,std,0.176777,1.732412,1.972828,0.46669,0.39598
BIEDMA,min,2.03,38.6,15.81,18.73,21.47
BIEDMA,max,2.28,41.05,18.6,19.39,22.03
CUSHAMEN,mean,2.56,46.715,15.11,10.48,25.135
CUSHAMEN,std,0.19799,2.991062,0.268701,0.66468,2.241528
CUSHAMEN,min,2.42,44.6,14.92,10.01,23.55
CUSHAMEN,max,2.7,48.83,15.3,10.95,26.72
FLORENTINO AMEGHINO,mean,2.47,39.95,17.49,14.715,25.37
FLORENTINO AMEGHINO,std,0.127279,0.919239,1.810193,0.148492,0.608112


In [16]:
df_resultante = datos_agrupado.stack().reset_index()
df_resultante.rename(columns={'level_1':'metrica',
                   '%_carneros': 'carneros',
                   '%_ovejas': 'ovejas',
                   '%_borregos':'borregos',
                   '%_capones': 'capones',
                   '%_corderos_corderas': 'corderos_corderas'
                  }, inplace=True)
df_resultante

Unnamed: 0,departamento,metrica,carneros,ovejas,borregos,capones,corderos_corderas
0,BIEDMA,mean,2.155,39.825,17.205,19.06,21.75
1,BIEDMA,std,0.176777,1.732412,1.972828,0.46669,0.39598
2,BIEDMA,min,2.03,38.6,15.81,18.73,21.47
3,BIEDMA,max,2.28,41.05,18.6,19.39,22.03
4,CUSHAMEN,mean,2.56,46.715,15.11,10.48,25.135
5,CUSHAMEN,std,0.19799,2.991062,0.268701,0.66468,2.241528
6,CUSHAMEN,min,2.42,44.6,14.92,10.01,23.55
7,CUSHAMEN,max,2.7,48.83,15.3,10.95,26.72
8,FLORENTINO AMEGHINO,mean,2.47,39.95,17.49,14.715,25.37
9,FLORENTINO AMEGHINO,std,0.127279,0.919239,1.810193,0.148492,0.608112


Por último, guardamos el DataFrame como archivo CSV.

In [None]:
df_resultante.to_csv("nuevo_porcentajes_existencias.csv", index=False)

Continuamos con los datos para la lana.

In [2]:
datos = pd.read_csv("actualizando_dataset - dataset_nuevos_%_lana.csv")
datos

Unnamed: 0,fecha,provincia,departamento,kilos_animal
0,2012-12-31,CHUBUT,BIEDMA,3.61
1,2013-12-31,CHUBUT,BIEDMA,3.16
2,2014-12-31,CHUBUT,BIEDMA,3.24
3,2006-12-31,CHUBUT,CUSHAMEN,3.55
4,2007-12-31,CHUBUT,CUSHAMEN,3.42
...,...,...,...,...
73,2010-12-31,CHUBUT,TELSEN,3.67
74,2011-12-31,CHUBUT,TELSEN,3.92
75,2012-12-31,CHUBUT,TELSEN,3.81
76,2013-12-31,CHUBUT,TELSEN,3.50


In [3]:
datos_agrupado = datos.groupby("departamento").agg({
    "kilos_animal": ["mean", "std", "min", "max"],
})

In [4]:
datos_agrupado.stack()

Unnamed: 0_level_0,Unnamed: 1_level_0,kilos_animal
departamento,Unnamed: 1_level_1,Unnamed: 2_level_1
BIEDMA,mean,3.336667
BIEDMA,std,0.240069
BIEDMA,min,3.16
BIEDMA,max,3.61
CUSHAMEN,mean,3.4075
CUSHAMEN,std,0.295284
CUSHAMEN,min,2.73
CUSHAMEN,max,3.69
FLORENTINO AMEGHINO,mean,3.286667
FLORENTINO AMEGHINO,std,0.430473


In [6]:
df_resultante = datos_agrupado.stack().reset_index()
df_resultante.rename(columns={'level_1':'metrica',}, inplace=True)
df_resultante

Unnamed: 0,departamento,metrica,kilos_animal
0,BIEDMA,mean,3.336667
1,BIEDMA,std,0.240069
2,BIEDMA,min,3.16
3,BIEDMA,max,3.61
4,CUSHAMEN,mean,3.4075
5,CUSHAMEN,std,0.295284
6,CUSHAMEN,min,2.73
7,CUSHAMEN,max,3.69
8,FLORENTINO AMEGHINO,mean,3.286667
9,FLORENTINO AMEGHINO,std,0.430473


Por último, guardamos el DataFrame como archivo CSV. 

In [None]:
df_resultante.to_csv("nuevo_porcentajes_lana.csv", index=False)

Ahora toca hacer lo mismo para la finura y el rinde.

In [7]:
datos = pd.read_csv("actualizando_dataset - dataset_nuevos_%_f_r.csv")
datos

Unnamed: 0,fecha,provincia,departamento,finura,rinde
0,2013-12-31,CHUBUT,BIEDMA,19.8,55.0
1,2014-12-31,CHUBUT,BIEDMA,19.22,58.86
2,2013-12-31,CHUBUT,CUSHAMEN,20.04,57.47
3,2014-12-31,CHUBUT,CUSHAMEN,19.74,63.35
4,2013-12-31,CHUBUT,FLORENTINO AMEGHINO,19.3,57.75
5,2014-12-31,CHUBUT,FLORENTINO AMEGHINO,19.53,60.37
6,2013-12-31,CHUBUT,FUTALEUFU,21.6,63.9
7,2014-12-31,CHUBUT,FUTALEUFU,20.27,63.13
8,2013-12-31,CHUBUT,GAIMAN,20.8,56.2
9,2014-12-31,CHUBUT,GAIMAN,20.39,55.06


In [8]:
datos_agrupado = datos.groupby("departamento").agg({
    "finura": ["mean", "std", "min", "max"],
    "rinde": ["mean", "std", "min", "max"],
})

In [9]:
datos_agrupado.stack()

Unnamed: 0_level_0,Unnamed: 1_level_0,finura,rinde
departamento,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BIEDMA,mean,19.51,56.93
BIEDMA,std,0.410122,2.729432
BIEDMA,min,19.22,55.0
BIEDMA,max,19.8,58.86
CUSHAMEN,mean,19.89,60.41
CUSHAMEN,std,0.212132,4.157788
CUSHAMEN,min,19.74,57.47
CUSHAMEN,max,20.04,63.35
FLORENTINO AMEGHINO,mean,19.415,59.06
FLORENTINO AMEGHINO,std,0.162635,1.85262


In [10]:
df_resultante = datos_agrupado.stack().reset_index()
df_resultante.rename(columns={'level_1':'metrica',}, inplace=True)
df_resultante

Unnamed: 0,departamento,metrica,finura,rinde
0,BIEDMA,mean,19.51,56.93
1,BIEDMA,std,0.410122,2.729432
2,BIEDMA,min,19.22,55.0
3,BIEDMA,max,19.8,58.86
4,CUSHAMEN,mean,19.89,60.41
5,CUSHAMEN,std,0.212132,4.157788
6,CUSHAMEN,min,19.74,57.47
7,CUSHAMEN,max,20.04,63.35
8,FLORENTINO AMEGHINO,mean,19.415,59.06
9,FLORENTINO AMEGHINO,std,0.162635,1.85262


Por último, guardamos el DataFrame como archivo CSV.

In [None]:
df_resultante.to_csv("nuevo_porcentajes_f_r.csv", index=False)