#Import libraries

In [1]:
!pip install pyfim

Collecting pyfim
  Downloading pyfim-6.28.tar.gz (357 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/357.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━[0m [32m297.0/357.3 kB[0m [31m8.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m357.3/357.3 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyfim
  Building wheel for pyfim (setup.py) ... [?25l[?25hdone
  Created wheel for pyfim: filename=pyfim-6.28-cp310-cp310-linux_x86_64.whl size=644211 sha256=be869cae77c2be00dce5c585a238b67465f3ddc53ce160f1443a965904edf2b1
  Stored in directory: /root/.cache/pip/wheels/96/0a/b3/c877bfa85c4cfe1baf3de4a89e1949382be09de5eabe49314f
Successfully built pyfim
Installing collected packages: pyfim
Successfully installed pyfim-6.28


In [2]:
from fim import *
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
pd.set_option('display.max_colwidth',None)

#Load dataset

In [4]:
df_siembra = pd.read_excel("https://www.datosabiertos.gob.pe/node/6920/download")
df_siembra.head()

HTTPError: HTTP Error 500: Service unavailable (with message)

In [None]:
df_siembra.shape

#Pre-processing the data

In [None]:
df_siembra.rename(columns={'PROVINICA':'PROVINCIA'}, inplace=True)
df_siembra

In [None]:
print('Existen {} cultivos'.format(len(df_siembra['CULTIVO'].unique())))
df_siembra['CULTIVO'].unique()

In [None]:
df_siembra['UBICACION'] = df_siembra['DEPARTAMENTO'] + '-' + df_siembra['PROVINCIA'] + '-' + df_siembra['DISTRITO']
df_siembra

#Transactional data

In [None]:
def to_transactional(df, column_trans, column_items):
  transactions = []
  for v in df[column_trans].unique():
    transactions.append(list(df[df[column_trans] == v][column_items].values))
  return transactions

trans = to_transactional(df_siembra, 'UBICACION', 'CULTIVO')
print(len(trans))

##Attributes

In [None]:
price = {cultivo:random.randint(1,20) for cultivo in df_siembra['CULTIVO'].tolist()}
water = {cultivo:random.randint(5,50) for cultivo in df_siembra['CULTIVO'].tolist()}

print(price)
print(water)

##Extract itemsets

In [None]:
#función para extraer todos los itemsets frecuentes y retornarlos como dataframe
def all_itemsets(trans_, supp_=1):
  #calcular todos los itemsets frecuentes en trans_ con soporte mínimo supp_
  #reportando su frecuencia absoluta (a) y relativa (S)
  r = fpgrowth(trans_, supp=supp_, report='aS')
  #convertir el resultado en dataframe
  df_items = pd.DataFrame(r)
  #nombrar columnas
  df_items.columns = ['Itemset', 'Freq', 'Freq(%)']
  df_items['Size'] = [len(x) for x in df_items['Itemset'].values]
  df_items['Itemset'] = [str(sorted(x)) for x in df_items['Itemset'].values]
  return df_items

all_itemsets(trans, 1)

##Emerging itemsets

In [None]:
df_siembra_junin = df_siembra[df_siembra['DEPARTAMENTO'] == 'JUNIN']
trans_junin = to_transactional(df_siembra_junin, 'UBICACION', 'CULTIVO')
print(len(trans_junin))
df_siembra_junin

In [None]:
df_siembra_not_junin = df_siembra[df_siembra['DEPARTAMENTO'] != 'JUNIN']
trans_not_junin = to_transactional(df_siembra_not_junin, 'UBICACION', 'CULTIVO')
print(len(trans_not_junin))
df_siembra_not_junin

In [None]:
df_all_itemsets_junin = all_itemsets(trans_junin, -1)

In [None]:
df_all_itemsets_not_junin = all_itemsets(trans_not_junin, -1)

In [None]:
emerging = df_all_itemsets_junin.join(df_all_itemsets_not_junin.set_index('Itemset'),
                                      on='Itemset',
                                      lsuffix='_j',rsuffix='_nj',
                                      how='outer').fillna(0)
emerging['GrowthRate_j'] = (emerging['Freq(%)_j'] / emerging['Freq(%)_nj'])
emerging

In [None]:
def average_list(l, f):
  return sum([f[v] for v in l]) / len(l)

emerging['Itemset_list'] = [x.replace("'","").replace("[","").replace("]","").split(', ') for x in emerging['Itemset'].tolist()]
emerging['AvgPrice'] = [average_list(x, price) for x in emerging['Itemset_list'].tolist()]
emerging['AvgWater'] = [average_list(x, water) for x in emerging['Itemset_list'].tolist()]
emerging

In [None]:
emerging = emerging[['Itemset_list', 'Freq_j', 'GrowthRate_j', 'Size_j', 'AvgPrice', 'AvgWater']]
emerging

##Skypatterns

In [None]:
!pip install paretoset

In [None]:
from paretoset import paretoset
import plotly.express as px

In [None]:
#optimizar según freq y size
mask = paretoset(emerging[['Freq_j', 'Size_j']], sense=['max', 'max'])
sky_itemsets = emerging[mask]
print(len(sky_itemsets))
sky_itemsets

In [None]:
#optimizar según las 5 métricas
mask = paretoset(emerging[['Freq_j', 'Size_j', 'GrowthRate_j', 'AvgPrice', 'AvgWater']], sense=['max', 'max', 'max', 'max', 'min'])
sky_itemsets = emerging[mask]
print(len(sky_itemsets))
sky_itemsets

In [None]:
import plotly.express as px

df_ = sky_itemsets[['Freq_j', 'Size_j', 'GrowthRate_j', 'AvgPrice', 'AvgWater']]
fig = px.parallel_coordinates(df_,
                              color='AvgPrice',
                              labels=['Freq_j','Size', 'GrowthRate_j', 'AvgPrice', 'AvgWater'])
fig.show()

In [None]:
import plotly.graph_objects as go

def radar_chart_all(df_, dimensions_):
  fig = go.Figure()
  for row_ in range(len(df_)):
    fig.add_trace(go.Scatterpolar(r=df_.iloc[row_,1:].values,
                                  theta=dimensions_,
                                  fill='toself',
                                  name=str(df_.iloc[row_,0])))
  fig.show()
radar_chart_all(sky_itemsets.head(20), sky_itemsets.columns[1:])

In [None]:
radar_chart_all(sky_itemsets[sky_itemsets['GrowthRate_j'] > 150], sky_itemsets.columns[1:])

In [None]:
radar_chart_all(sky_itemsets[(sky_itemsets['GrowthRate_j'] > 5) & (sky_itemsets['GrowthRate_j'] < 10)], sky_itemsets.columns[1:])