# Import libraries

In [1]:
from fim import *
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
from paretoset import paretoset
import plotly.express as px
import plotly.graph_objects as go
pd.set_option('display.max_colwidth',None)

# Load dataset

In [2]:
df_siembra = pd.read_excel("https://www.datosabiertos.gob.pe/node/6920/download")
df_siembra.head()

Unnamed: 0,DEPARTAMENTO,PROVINICA,DISTRITO,CULTIVO,CAMPANA,AGO,SEP,OCT,NOV,DIC,ENE,FEB,MAR,ABR,MAY,JUN,JUL
0,ANCASH,AIJA,SUCCHA,Papa nativa,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0
1,ANCASH,ANTONIO RAYMONDI,LLAMELLIN,Olluco,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0
2,ANCASH,ANTONIO RAYMONDI,LLAMELLIN,Quinua,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0
3,ANCASH,ANTONIO RAYMONDI,ACZO,Quinua,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0
4,ANCASH,ANTONIO RAYMONDI,CHINGAS,Olluco,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0


In [3]:
df_siembra.shape

(12181, 17)

# Pre-processing the data

In [4]:
df_siembra.rename(columns={'PROVINICA':'PROVINCIA'}, inplace=True)
df_siembra

Unnamed: 0,DEPARTAMENTO,PROVINCIA,DISTRITO,CULTIVO,CAMPANA,AGO,SEP,OCT,NOV,DIC,ENE,FEB,MAR,ABR,MAY,JUN,JUL
0,ANCASH,AIJA,SUCCHA,Papa nativa,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0
1,ANCASH,ANTONIO RAYMONDI,LLAMELLIN,Olluco,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0
2,ANCASH,ANTONIO RAYMONDI,LLAMELLIN,Quinua,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0
3,ANCASH,ANTONIO RAYMONDI,ACZO,Quinua,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0
4,ANCASH,ANTONIO RAYMONDI,CHINGAS,Olluco,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12176,ANCASH,RECUAY,TAPACOCHA,Cebada grano,2020-2021,0,0,0,0,0,4,5,0,0,0,0,0
12177,ANCASH,RECUAY,TAPACOCHA,Haba grano seco,2020-2021,0,0,0,0,0,3,4,0,0,0,0,0
12178,ANCASH,RECUAY,TAPACOCHA,Trigo,2020-2021,0,0,0,0,0,3,5,0,0,0,0,0
12179,ANCASH,SANTA,MACATE,Haba grano seco,2020-2021,0,0,0,0,0,3,2,0,0,0,0,0


In [5]:
print('Existen {} cultivos'.format(len(df_siembra['CULTIVO'].unique())))
df_siembra['CULTIVO'].unique()

Existen 26 cultivos


array(['Papa nativa', 'Olluco', 'Quinua', 'Papa color',
       'Frijol grano seco', 'Arveja grano seco', 'Arveja grano verde',
       'Zapallo', 'Cebada grano', 'Tomate', 'Papa blanca',
       'Maiz amarillo duro', 'Cebolla cabeza roja', 'Aji', 'Trigo',
       'Haba grano seco', 'Yuca', 'Zanahoria', 'Maiz choclo',
       'Maiz amilaceo', 'Ajo', 'Camote', 'Arroz cascara', 'Algodon',
       'Paprika', 'Cebolla cabeza blanca o amarilla'], dtype=object)

In [6]:
df_siembra['UBICACION'] = df_siembra['DEPARTAMENTO'] + '-' + df_siembra['PROVINCIA'] + '-' + df_siembra['DISTRITO']
df_siembra

Unnamed: 0,DEPARTAMENTO,PROVINCIA,DISTRITO,CULTIVO,CAMPANA,AGO,SEP,OCT,NOV,DIC,ENE,FEB,MAR,ABR,MAY,JUN,JUL,UBICACION
0,ANCASH,AIJA,SUCCHA,Papa nativa,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,ANCASH-AIJA-SUCCHA
1,ANCASH,ANTONIO RAYMONDI,LLAMELLIN,Olluco,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,ANCASH-ANTONIO RAYMONDI-LLAMELLIN
2,ANCASH,ANTONIO RAYMONDI,LLAMELLIN,Quinua,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,ANCASH-ANTONIO RAYMONDI-LLAMELLIN
3,ANCASH,ANTONIO RAYMONDI,ACZO,Quinua,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,ANCASH-ANTONIO RAYMONDI-ACZO
4,ANCASH,ANTONIO RAYMONDI,CHINGAS,Olluco,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,ANCASH-ANTONIO RAYMONDI-CHINGAS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12176,ANCASH,RECUAY,TAPACOCHA,Cebada grano,2020-2021,0,0,0,0,0,4,5,0,0,0,0,0,ANCASH-RECUAY-TAPACOCHA
12177,ANCASH,RECUAY,TAPACOCHA,Haba grano seco,2020-2021,0,0,0,0,0,3,4,0,0,0,0,0,ANCASH-RECUAY-TAPACOCHA
12178,ANCASH,RECUAY,TAPACOCHA,Trigo,2020-2021,0,0,0,0,0,3,5,0,0,0,0,0,ANCASH-RECUAY-TAPACOCHA
12179,ANCASH,SANTA,MACATE,Haba grano seco,2020-2021,0,0,0,0,0,3,2,0,0,0,0,0,ANCASH-SANTA-MACATE


# Transactional data

In [7]:
def to_transactional(df, column_trans, column_items):
  transactions = []
  for v in df[column_trans].unique():
    transactions.append(list(df[df[column_trans] == v][column_items].values))
  return transactions

trans = to_transactional(df_siembra, 'UBICACION', 'CULTIVO')
print(len(trans))

1503


## Attributes

In [8]:
price = {cultivo:random.randint(1,20) for cultivo in df_siembra['CULTIVO'].tolist()}
water = {cultivo:random.randint(5,50) for cultivo in df_siembra['CULTIVO'].tolist()}

print(price)
print(water)

{'Papa nativa': 2, 'Olluco': 20, 'Quinua': 7, 'Papa color': 15, 'Frijol grano seco': 3, 'Arveja grano seco': 9, 'Arveja grano verde': 14, 'Zapallo': 20, 'Cebada grano': 15, 'Tomate': 17, 'Papa blanca': 7, 'Maiz amarillo duro': 13, 'Cebolla cabeza roja': 7, 'Aji': 17, 'Trigo': 6, 'Haba grano seco': 16, 'Yuca': 6, 'Zanahoria': 4, 'Maiz choclo': 11, 'Maiz amilaceo': 18, 'Ajo': 17, 'Camote': 20, 'Arroz cascara': 15, 'Algodon': 17, 'Paprika': 6, 'Cebolla cabeza blanca o amarilla': 14}
{'Papa nativa': 27, 'Olluco': 16, 'Quinua': 29, 'Papa color': 26, 'Frijol grano seco': 49, 'Arveja grano seco': 19, 'Arveja grano verde': 40, 'Zapallo': 15, 'Cebada grano': 26, 'Tomate': 45, 'Papa blanca': 40, 'Maiz amarillo duro': 17, 'Cebolla cabeza roja': 37, 'Aji': 49, 'Trigo': 26, 'Haba grano seco': 17, 'Yuca': 49, 'Zanahoria': 18, 'Maiz choclo': 28, 'Maiz amilaceo': 28, 'Ajo': 34, 'Camote': 31, 'Arroz cascara': 27, 'Algodon': 33, 'Paprika': 8, 'Cebolla cabeza blanca o amarilla': 8}


## Extract itemsets

In [9]:
#función para extraer todos los itemsets frecuentes y retornarlos como dataframe
def all_itemsets(trans_, supp_=1):
  #calcular todos los itemsets frecuentes en trans_ con soporte mínimo supp_
  #reportando su frecuencia absoluta (a) y relativa (S)
  r = fpgrowth(trans_, supp=supp_, report='aS')
  #convertir el resultado en dataframe
  df_items = pd.DataFrame(r)
  #nombrar columnas
  df_items.columns = ['Itemset', 'Freq', 'Freq(%)']
  df_items['Size'] = [len(x) for x in df_items['Itemset'].values]
  df_items['Itemset'] = [str(sorted(x)) for x in df_items['Itemset'].values]
  return df_items

all_itemsets(trans, 1)

Unnamed: 0,Itemset,Freq,Freq(%),Size
0,['Maiz amilaceo'],926,61.610113,1
1,"['Cebada grano', 'Maiz amilaceo']",701,46.640053,2
2,['Cebada grano'],802,53.359947,1
3,"['Maiz amilaceo', 'Papa blanca']",702,46.706587,2
4,"['Cebada grano', 'Maiz amilaceo', 'Papa blanca']",597,39.720559,3
...,...,...,...,...
143933,"['Aji', 'Paprika', 'Tomate', 'Zapallo']",17,1.131071,4
143934,"['Aji', 'Cebolla cabeza roja', 'Paprika', 'Tomate']",17,1.131071,4
143935,"['Aji', 'Paprika', 'Tomate']",19,1.264138,3
143936,"['Aji', 'Paprika']",22,1.463739,2


## Emerging itemsets

In [10]:
df_siembra_junin = df_siembra[df_siembra['DEPARTAMENTO'] == 'JUNIN']
trans_junin = to_transactional(df_siembra_junin, 'UBICACION', 'CULTIVO')
print(len(trans_junin))
df_siembra_junin

52


Unnamed: 0,DEPARTAMENTO,PROVINCIA,DISTRITO,CULTIVO,CAMPANA,AGO,SEP,OCT,NOV,DIC,ENE,FEB,MAR,ABR,MAY,JUN,JUL,UBICACION
30,JUNIN,HUANCAYO,PILCOMAYO,Cebolla cabeza roja,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,JUNIN-HUANCAYO-PILCOMAYO
31,JUNIN,CHANCHAMAYO,CHANCHAMAYO,Aji,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,JUNIN-CHANCHAMAYO-CHANCHAMAYO
32,JUNIN,TARMA,ACOBAMBA,Quinua,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,JUNIN-TARMA-ACOBAMBA
33,JUNIN,YAULI,LA OROYA,Olluco,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,JUNIN-YAULI-LA OROYA
34,JUNIN,YAULI,CHACAPALPA,Arveja grano seco,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,JUNIN-YAULI-CHACAPALPA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11113,JUNIN,CONCEPCION,MARISCAL CASTILLA,Maiz choclo,2020-2021,0,0,0,0,0,0,5,15,10,0,0,0,JUNIN-CONCEPCION-MARISCAL CASTILLA
11335,JUNIN,HUANCAYO,EL TAMBO,Cebolla cabeza roja,2020-2021,0,0,0,0,2,2,1,0,0,0,0,0,JUNIN-HUANCAYO-EL TAMBO
11336,JUNIN,CHUPACA,AHUAC,Cebolla cabeza roja,2020-2021,0,0,0,0,2,3,1,0,0,0,0,0,JUNIN-CHUPACA-AHUAC
11916,JUNIN,HUANCAYO,CHUPURO,Cebolla cabeza roja,2020-2021,0,0,0,0,0,0,1,0,0,0,0,0,JUNIN-HUANCAYO-CHUPURO


In [11]:
df_siembra_not_junin = df_siembra[df_siembra['DEPARTAMENTO'] != 'JUNIN']
trans_not_junin = to_transactional(df_siembra_not_junin, 'UBICACION', 'CULTIVO')
print(len(trans_not_junin))
df_siembra_not_junin

1451


Unnamed: 0,DEPARTAMENTO,PROVINCIA,DISTRITO,CULTIVO,CAMPANA,AGO,SEP,OCT,NOV,DIC,ENE,FEB,MAR,ABR,MAY,JUN,JUL,UBICACION
0,ANCASH,AIJA,SUCCHA,Papa nativa,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,ANCASH-AIJA-SUCCHA
1,ANCASH,ANTONIO RAYMONDI,LLAMELLIN,Olluco,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,ANCASH-ANTONIO RAYMONDI-LLAMELLIN
2,ANCASH,ANTONIO RAYMONDI,LLAMELLIN,Quinua,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,ANCASH-ANTONIO RAYMONDI-LLAMELLIN
3,ANCASH,ANTONIO RAYMONDI,ACZO,Quinua,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,ANCASH-ANTONIO RAYMONDI-ACZO
4,ANCASH,ANTONIO RAYMONDI,CHINGAS,Olluco,2020-2021,0,0,1,0,0,0,0,0,0,0,0,0,ANCASH-ANTONIO RAYMONDI-CHINGAS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12176,ANCASH,RECUAY,TAPACOCHA,Cebada grano,2020-2021,0,0,0,0,0,4,5,0,0,0,0,0,ANCASH-RECUAY-TAPACOCHA
12177,ANCASH,RECUAY,TAPACOCHA,Haba grano seco,2020-2021,0,0,0,0,0,3,4,0,0,0,0,0,ANCASH-RECUAY-TAPACOCHA
12178,ANCASH,RECUAY,TAPACOCHA,Trigo,2020-2021,0,0,0,0,0,3,5,0,0,0,0,0,ANCASH-RECUAY-TAPACOCHA
12179,ANCASH,SANTA,MACATE,Haba grano seco,2020-2021,0,0,0,0,0,3,2,0,0,0,0,0,ANCASH-SANTA-MACATE


In [12]:
df_all_itemsets_junin = all_itemsets(trans_junin, -1)
df_all_itemsets_not_junin = all_itemsets(trans_not_junin, -1)

In [13]:
emerging = df_all_itemsets_junin.join(df_all_itemsets_not_junin.set_index('Itemset'),
                                      on='Itemset',
                                      lsuffix='_j',rsuffix='_nj',
                                      how='outer').fillna(0)
emerging['GrowthRate_j'] = (emerging['Freq(%)_j'] / emerging['Freq(%)_nj'])
emerging

Unnamed: 0,Itemset,Freq_j,Freq(%)_j,Size_j,Freq_nj,Freq(%)_nj,Size_nj,GrowthRate_j
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca', 'Zanahoria', 'Zapallo']",0.0,0.000000,0.0,1,0.068918,16,0.000000
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca', 'Zanahoria']",0.0,0.000000,0.0,1,0.068918,15,0.000000
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca', 'Zapallo']",0.0,0.000000,0.0,1,0.068918,15,0.000000
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca']",0.0,0.000000,0.0,1,0.068918,14,0.000000
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Zanahoria', 'Zapallo']",0.0,0.000000,0.0,1,0.068918,15,0.000000
...,...,...,...,...,...,...,...,...
34818.0,"['Yuca', 'Zapallo']",3.0,5.769231,2.0,151,10.406616,2,0.554381
34822.0,['Yuca'],6.0,11.538462,1.0,509,35.079256,1,0.328925
34045.0,"['Zanahoria', 'Zapallo']",1.0,1.923077,2.0,88,6.064783,2,0.317089
4094.0,['Zanahoria'],24.0,46.153846,1.0,167,11.509304,1,4.010134


In [14]:
emerging['Itemset_list'] = [x.replace("'","").replace("[","").replace("]","").split(', ') for x in emerging['Itemset'].tolist()]
emerging

Unnamed: 0,Itemset,Freq_j,Freq(%)_j,Size_j,Freq_nj,Freq(%)_nj,Size_nj,GrowthRate_j,Itemset_list
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca', 'Zanahoria', 'Zapallo']",0.0,0.000000,0.0,1,0.068918,16,0.000000,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca, Zanahoria, Zapallo]"
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca', 'Zanahoria']",0.0,0.000000,0.0,1,0.068918,15,0.000000,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca, Zanahoria]"
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca', 'Zapallo']",0.0,0.000000,0.0,1,0.068918,15,0.000000,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca, Zapallo]"
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca']",0.0,0.000000,0.0,1,0.068918,14,0.000000,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca]"
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Zanahoria', 'Zapallo']",0.0,0.000000,0.0,1,0.068918,15,0.000000,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Zanahoria, Zapallo]"
...,...,...,...,...,...,...,...,...,...
34818.0,"['Yuca', 'Zapallo']",3.0,5.769231,2.0,151,10.406616,2,0.554381,"[Yuca, Zapallo]"
34822.0,['Yuca'],6.0,11.538462,1.0,509,35.079256,1,0.328925,[Yuca]
34045.0,"['Zanahoria', 'Zapallo']",1.0,1.923077,2.0,88,6.064783,2,0.317089,"[Zanahoria, Zapallo]"
4094.0,['Zanahoria'],24.0,46.153846,1.0,167,11.509304,1,4.010134,[Zanahoria]


In [15]:
def average_list(l, f):
  return sum([f[v] for v in l]) / len(l)
emerging['AvgPrice'] = [average_list(x, price) for x in emerging['Itemset_list'].tolist()]
emerging['AvgWater'] = [average_list(x, water) for x in emerging['Itemset_list'].tolist()]
emerging

Unnamed: 0,Itemset,Freq_j,Freq(%)_j,Size_j,Freq_nj,Freq(%)_nj,Size_nj,GrowthRate_j,Itemset_list,AvgPrice,AvgWater
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca', 'Zanahoria', 'Zapallo']",0.0,0.000000,0.0,1,0.068918,16,0.000000,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca, Zanahoria, Zapallo]",11.625000,31.625000
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca', 'Zanahoria']",0.0,0.000000,0.0,1,0.068918,15,0.000000,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca, Zanahoria]",11.066667,32.733333
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca', 'Zapallo']",0.0,0.000000,0.0,1,0.068918,15,0.000000,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca, Zapallo]",12.133333,32.533333
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Yuca']",0.0,0.000000,0.0,1,0.068918,14,0.000000,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca]",11.571429,33.785714
,"['Aji', 'Ajo', 'Algodon', 'Arroz cascara', 'Camote', 'Cebolla cabeza roja', 'Frijol grano seco', 'Maiz amarillo duro', 'Maiz choclo', 'Papa blanca', 'Paprika', 'Tomate', 'Trigo', 'Zanahoria', 'Zapallo']",0.0,0.000000,0.0,1,0.068918,15,0.000000,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Zanahoria, Zapallo]",12.000000,30.466667
...,...,...,...,...,...,...,...,...,...,...,...
34818.0,"['Yuca', 'Zapallo']",3.0,5.769231,2.0,151,10.406616,2,0.554381,"[Yuca, Zapallo]",13.000000,32.000000
34822.0,['Yuca'],6.0,11.538462,1.0,509,35.079256,1,0.328925,[Yuca],6.000000,49.000000
34045.0,"['Zanahoria', 'Zapallo']",1.0,1.923077,2.0,88,6.064783,2,0.317089,"[Zanahoria, Zapallo]",12.000000,16.500000
4094.0,['Zanahoria'],24.0,46.153846,1.0,167,11.509304,1,4.010134,[Zanahoria],4.000000,18.000000


In [16]:
emerging = emerging[['Itemset_list', 'Freq_j', 'GrowthRate_j', 'Size_j', 'AvgPrice', 'AvgWater']]
emerging

Unnamed: 0,Itemset_list,Freq_j,GrowthRate_j,Size_j,AvgPrice,AvgWater
,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca, Zanahoria, Zapallo]",0.0,0.000000,0.0,11.625000,31.625000
,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca, Zanahoria]",0.0,0.000000,0.0,11.066667,32.733333
,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca, Zapallo]",0.0,0.000000,0.0,12.133333,32.533333
,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Yuca]",0.0,0.000000,0.0,11.571429,33.785714
,"[Aji, Ajo, Algodon, Arroz cascara, Camote, Cebolla cabeza roja, Frijol grano seco, Maiz amarillo duro, Maiz choclo, Papa blanca, Paprika, Tomate, Trigo, Zanahoria, Zapallo]",0.0,0.000000,0.0,12.000000,30.466667
...,...,...,...,...,...,...
34818.0,"[Yuca, Zapallo]",3.0,0.554381,2.0,13.000000,32.000000
34822.0,[Yuca],6.0,0.328925,1.0,6.000000,49.000000
34045.0,"[Zanahoria, Zapallo]",1.0,0.317089,2.0,12.000000,16.500000
4094.0,[Zanahoria],24.0,4.010134,1.0,4.000000,18.000000


## Skypatterns

In [17]:
#optimizar según freq y size
mask = paretoset(emerging[['Freq_j', 'Size_j']], sense=['max', 'max'])
sky_itemsets = emerging[mask]
print(len(sky_itemsets))
sky_itemsets

15


Unnamed: 0,Itemset_list,Freq_j,GrowthRate_j,Size_j,AvgPrice,AvgWater
28143.0,"[Ajo, Arveja grano seco, Arveja grano verde, Cebada grano, Cebolla cabeza roja, Haba grano seco, Maiz amilaceo, Maiz choclo, Olluco, Papa blanca, Papa color, Papa nativa, Quinua, Trigo, Zanahoria]",1.0,13.951923,15.0,11.2,27.4
26095.0,"[Ajo, Arveja grano seco, Arveja grano verde, Cebada grano, Cebolla cabeza roja, Haba grano seco, Maiz amilaceo, Maiz choclo, Olluco, Papa blanca, Papa color, Quinua, Trigo, Zanahoria]",5.0,69.759615,14.0,11.857143,27.428571
25071.0,"[Ajo, Arveja grano seco, Arveja grano verde, Cebada grano, Cebolla cabeza roja, Haba grano seco, Maiz amilaceo, Maiz choclo, Papa blanca, Papa color, Quinua, Trigo, Zanahoria]",9.0,125.567308,13.0,11.230769,28.307692
24559.0,"[Ajo, Arveja grano seco, Arveja grano verde, Cebada grano, Cebolla cabeza roja, Haba grano seco, Maiz amilaceo, Maiz choclo, Papa blanca, Quinua, Trigo, Zanahoria]",11.0,153.471154,12.0,10.916667,28.5
13271.0,"[Ajo, Arveja grano seco, Arveja grano verde, Cebada grano, Haba grano seco, Maiz amilaceo, Maiz choclo, Papa blanca, Quinua, Trigo, Zanahoria]",12.0,33.484615,11.0,11.272727,27.727273
14167.0,"[Ajo, Arveja grano seco, Arveja grano verde, Haba grano seco, Maiz amilaceo, Maiz choclo, Papa blanca, Quinua, Trigo, Zanahoria]",13.0,36.275,10.0,10.9,27.9
4847.0,"[Arveja grano seco, Arveja grano verde, Cebada grano, Haba grano seco, Maiz amilaceo, Maiz choclo, Papa blanca, Quinua, Trigo]",15.0,3.80507,9.0,11.444444,28.111111
395.0,"[Arveja grano verde, Cebada grano, Maiz amilaceo, Maiz choclo, Papa blanca, Papa color, Quinua, Trigo]",17.0,4.605489,8.0,11.625,30.375
135.0,"[Arveja grano verde, Cebada grano, Maiz amilaceo, Maiz choclo, Papa blanca, Papa color]",22.0,3.720513,6.0,13.333333,31.333333
379.0,"[Arveja grano verde, Cebada grano, Maiz amilaceo, Maiz choclo, Papa blanca, Quinua, Trigo]",20.0,4.359976,7.0,11.142857,31.0


In [22]:
#optimizar según las 5 métricas
mask = paretoset(emerging[['Freq_j', 'Size_j', 'GrowthRate_j', 'AvgPrice', 'AvgWater']], sense=['max', 'max', 'max', 'max', 'min'])
sky_itemsets = emerging[mask]
print(len(sky_itemsets))
sky_itemsets

1215


Unnamed: 0,Itemset_list,Freq_j,GrowthRate_j,Size_j,AvgPrice,AvgWater
28143.0,"[Ajo, Arveja grano seco, Arveja grano verde, Cebada grano, Cebolla cabeza roja, Haba grano seco, Maiz amilaceo, Maiz choclo, Olluco, Papa blanca, Papa color, Papa nativa, Quinua, Trigo, Zanahoria]",1.0,13.951923,15.0,11.200000,27.400000
26095.0,"[Ajo, Arveja grano seco, Arveja grano verde, Cebada grano, Cebolla cabeza roja, Haba grano seco, Maiz amilaceo, Maiz choclo, Olluco, Papa blanca, Papa color, Quinua, Trigo, Zanahoria]",5.0,69.759615,14.0,11.857143,27.428571
26111.0,"[Ajo, Arveja grano seco, Arveja grano verde, Cebada grano, Cebolla cabeza roja, Haba grano seco, Maiz amilaceo, Maiz choclo, Olluco, Papa blanca, Papa color, Quinua, Trigo]",5.0,17.439904,13.0,12.461538,28.153846
26097.0,"[Ajo, Arveja grano seco, Arveja grano verde, Cebada grano, Cebolla cabeza roja, Haba grano seco, Maiz amilaceo, Maiz choclo, Olluco, Papa blanca, Papa color, Quinua, Zanahoria]",5.0,69.759615,13.0,12.307692,27.538462
26113.0,"[Ajo, Arveja grano seco, Arveja grano verde, Cebada grano, Cebolla cabeza roja, Haba grano seco, Maiz amilaceo, Maiz choclo, Olluco, Papa blanca, Papa color, Quinua]",5.0,17.439904,12.0,13.000000,28.333333
...,...,...,...,...,...,...
0.0,[Papa blanca],46.0,1.765580,1.0,7.000000,40.000000
14.0,[Papa color],37.0,1.534090,1.0,15.000000,26.000000
2555.0,"[Quinua, Trigo, Zanahoria]",20.0,9.001241,3.0,5.666667,24.333333
4094.0,[Zanahoria],24.0,4.010134,1.0,4.000000,18.000000


In [19]:
def radar_chart_all(df_, dimensions_):
  fig = go.Figure()
  for row_ in range(len(df_)):
    fig.add_trace(go.Scatterpolar(r=df_.iloc[row_,1:].values,
                                  theta=dimensions_,
                                  fill='toself',
                                  name=str(df_.iloc[row_,0])))
  fig.show()
radar_chart_all(sky_itemsets.head(20), sky_itemsets.columns[1:])

In [20]:
radar_chart_all(sky_itemsets[sky_itemsets['GrowthRate_j'] > 150], sky_itemsets.columns[1:])

In [21]:
radar_chart_all(sky_itemsets[(sky_itemsets['GrowthRate_j'] > 5) & (sky_itemsets['GrowthRate_j'] < 10)], sky_itemsets.columns[1:])