In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
import math
import multiprocessing
from item.item_list import (
    ItemList,
    Item
)
from nlp.utils import (
    plot_histogram,
    get_completetext,
    plot_wordcloud,
    print_statistics,
    groups_frequency_sort)
from nlp.grouping import (
    get_groups,
    get_groups_size,
    get_unigram_groups,
    get_two_tokens_groups,
    get_first_token_groups,
    get_bigram_groups,
    get_first_two_groups,
    groups_frequency_sort
)
from nlp.pos_tagging import (
    get_tokens_tags
)
from nlp.word_embeddings import (
    load_word_embeddings,
    get_item_embedding,
    get_items_embeddings,
    get_items_similarities
)
from item.clustering.evaluate import (
    get_score_pickle,
    evaluate_results_pickle,
    evaluate_results
)
from item.clustering.utils import (
    load_clustering_results_pickle,
    load_clustering_results,
    load_models_pickle
)
from item.item_representation import (
    load_items_embeddings,
    get_group_embeddings_matrix,
    get_words_plus_categories_embeddings,
    normalize
)

from hdbscan.prediction import approximate_predict

In [2]:
clustering_model, reducer_model = load_models_pickle('./precificacao/word2vec_cbow50/v3/SUB+MED+unit+num_concat_umap_hdbscan_euclidean/')

In [3]:
# It gets the descriptions processed:
itemlist = ItemList()
itemlist.load_items_from_file('../dados/items_preprocessed_v3_train.zip')

In [4]:
# Get the tags of tokens descriptions
word_class = get_tokens_tags(itemlist.unique_words)

In [5]:
# word embeddings file, each line contains a word embedding
# word_embeddings_file = '../../../embeddings/fasttext/skip_s300.txt'
word_embeddings_file = '../../../embeddings/word2vec/cbow_s50.txt'

In [6]:
# read word embeddings from file and store them in a map
word_embeddings = load_word_embeddings(word_embeddings_file, itemlist.unique_words)

In [7]:
#It gets the ranges of the clusters generated by the First Token approach
#This is done in order to the processes work on.
def get_ranges(group_len, n_threads):

    if(n_threads == 1):
        return 0, (group_len - 1)    

    total_len = group_len
    num_threads = n_threads
    lower = []
    upper = []
    step = int(total_len/num_threads)

    for k in range(num_threads):
        lower.append(0)
        upper.append(0)

    lower[0] = 0
    upper[0] = step

    i = 1
    j = 0
    while (i < num_threads):    
        upper[i]  = upper[j] + step
        lower[i]  = upper[j] +  1
        if(i%2 != 0):
            upper[i] = upper[i] + 1
        
        i = i + 1
        j = j + 1

    #Please, check if the final cluster range ends with 18,034 clusters
    #(i.e., the number of clusters generated by First Token).
    #Depending of the number of processes, you may have to change this "-1"
    #for something else.
    upper[n_threads - 1] = group_len - 1 
    return lower, upper

In [17]:
def get_items_clusters(itemlist, first_token_groups, word_embeddings, word_class,
                       reducer_model, clustering_model, categories, embedding_type,
                       embedding_size, operation, it_thread, lower, upper, results_threads):

    print(it_thread)
    
    # It creates a list of the the keys of these groups:
    groups = list(first_token_groups.keys())

    # It gets the values of each group (i.e., the ids of the descriptions into that group):
    group_descriptions = list(first_token_groups.values())
    
    # Iterator of the first token groups:
    ft_it = lower

    results = []
    
    while ft_it <= upper:
        group = groups[ft_it]
        items = group_descriptions[ft_it]
        if group not in reducer_model:
            for item_id in items:
                cluster_id = '-1'
                cluster_prob = 0
                item_result = {'item_id': item_id, 'cluster': cluster_id,
                               'cluster_prob': cluster_prob, 'price':0}
                results.append(item_result)
        else:
            embeddings_matrix = get_group_embeddings_matrix(items, itemlist.items_list, word_embeddings, word_class,
                                                    categories=categories, embedding_type=embedding_type,
                                                    embedding_size=embedding_size, norm=True, operation=operation)
            # It gets the reduced vector for the item
            embeddings_matrix = reducer_model[group].transform(embeddings_matrix)
            # It gets the item cluster
            clusters = approximate_predict(clustering_model[group], embeddings_matrix)
            # ([4, 5, 6, 7, 8], [0.94, ...])
            for _id, item_id in enumerate(items):
                cluster_id = group + '_' + str(clusters[0][_id])
                cluster_prob = clusters[1][_id]
                item_result = {'item_id': item_id, 'cluster': cluster_id,
                               'cluster_prob': cluster_prob, 'price':itemlist.get_item(item_id).get_item_dict()['preço']}
                results.append(item_result)
        ft_it = ft_it + 1
        
    results_threads[it_thread] = results

In [18]:
def predict_items_clusters(itemlist, word_embeddings, word_class, reducer_model,
                           clustering_model, categories=None, embedding_type=None,
                           embedding_size=50, operation='mean', n_threads=10):

    # It gets the first tokens of each description and groups
    # based on this approach:
    first_token_groups = itemlist.get_first_token_groups()
    group_len = len(first_token_groups)

    first_token_groups_new = {}
    keys_ft = list(first_token_groups.keys())
    random.shuffle(keys_ft)
    random.shuffle(keys_ft)

    for k in keys_ft:
        first_token_groups_new[k] = first_token_groups[k]
    
    manager = multiprocessing.Manager()
    results_threads = manager.dict()
    jobs = []

    # It defines the ranges (of the groups) the threads will work on:
    thread_ranges = get_ranges(group_len, n_threads)
    print('Read ranges')
    print(thread_ranges)

    for i in range(n_threads):
        p = multiprocessing.Process(target=get_items_clusters, \
                args = (itemlist, first_token_groups_new, word_embeddings, word_class, \
                reducer_model, clustering_model, categories, embedding_type, embedding_size, \
                operation, i, thread_ranges[0][i], thread_ranges[1][i], results_threads))
        jobs.append(p)
        p.start()
        
    for proc in jobs:
        proc.join()

    results = []
    for i in range(n_threads):
        for inst in results_threads[i]:
            results.append(inst)

    return results

In [19]:
results = predict_items_clusters(itemlist, word_embeddings, word_class, reducer_model, \
                                clustering_model, categories=['unidades_medida', 'números'], \
                                embedding_type=['N', 'MED'], operation='concatenate', n_threads=30)

Read ranges
([0, 450, 900, 1349, 1799, 2248, 2698, 3147, 3597, 4046, 4496, 4945, 5395, 5844, 6294, 6743, 7193, 7642, 8092, 8541, 8991, 9440, 9890, 10339, 10789, 11238, 11688, 12137, 12587, 13036], [449, 899, 1348, 1798, 2247, 2697, 3146, 3596, 4045, 4495, 4944, 5394, 5843, 6293, 6742, 7192, 7641, 8091, 8540, 8990, 9439, 9889, 10338, 10788, 11237, 11687, 12136, 12586, 13035, 13497])
0


  warn('Clusterer does not have any defined clusters, new data'


1
2

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



3

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



4

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



5

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



6

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



7

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



8

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



9

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



10

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



11

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



12

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



13

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'





  warn('Clusterer does not have any defined clusters, new data'


14

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



15

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



16

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'





  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'


17

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



18

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



19

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



20

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



21

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



22

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



23

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'



24

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi


25

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'





  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'


26

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'





  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'


27

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi


28

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi


29

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'





  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defined clusters, new data'
  warn('Clusterer does not have any defi

In [20]:
results

[{'item_id': 1422,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price': 1.13},
 {'item_id': 3163,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price': 0.648},
 {'item_id': 16443,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price': 0.69},
 {'item_id': 18711,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price': 2.15},
 {'item_id': 25487,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price': 0.384},
 {'item_id': 26155,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price': 0.29},
 {'item_id': 29378,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price': 1.28},
 {'item_id': 31677,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price': 0.57},
 {'item_id': 31744,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price': 0.4},
 {'item_id': 36374,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price': 0.6},
 {'item_id': 41089,
  'cluster': 'tioridazina_-1',
  'cluster_prob': 0.0,
  'price

In [68]:
results_df=pd.DataFrame.from_dict(results)
results_df.head(50)

Unnamed: 0,item_id,cluster,cluster_prob,price
0,1422,tioridazina_-1,0.0,1.13
1,3163,tioridazina_-1,0.0,0.648
2,16443,tioridazina_-1,0.0,0.69
3,18711,tioridazina_-1,0.0,2.15
4,25487,tioridazina_-1,0.0,0.384
5,26155,tioridazina_-1,0.0,0.29
6,29378,tioridazina_-1,0.0,1.28
7,31677,tioridazina_-1,0.0,0.57
8,31744,tioridazina_-1,0.0,0.4
9,36374,tioridazina_-1,0.0,0.6


In [57]:
results_grouped=results_df.groupby(['cluster'], as_index=False)['price'].mean()
results_grouped=results_grouped.rename(columns = {'price':'mean'})
results_grouped['count']=results_df.groupby(['cluster'], as_index=False)['price'].count().transform('price')
results_grouped['max']=results_df.groupby(['cluster'], as_index=False)['price'].max().transform('price')
results_grouped['min']=results_df.groupby(['cluster'], as_index=False)['price'].min().transform('price')
results_grouped['median']=results_df.groupby(['cluster'], as_index=False)['price'].median().transform('price')
results_grouped['std']=results_df.groupby(['cluster'])['price'].std().reset_index().transform('price')
results_grouped['var']=results_df.groupby(['cluster'])['price'].var().reset_index().transform('price')
results_grouped['quantile_1']=results_df.groupby(['cluster'])['price'].quantile(q=0.25).reset_index().transform('price')
results_grouped['quantile_3']=results_df.groupby(['cluster'])['price'].quantile(q=0.75).reset_index().transform('price')

results_grouped.head(50)
#'avg', 'quartil_1', 'median', 'quartil_3', 'std', 'var', 'max', 'min'

Unnamed: 0,cluster,mean,count,max,min,median,std,var,quantile_1,quantile_3
0,-1,0.0,52056,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,a100_-1,46.885645,62,97.0,20.5,49.11,15.102414,228.08291,33.5225,56.0
2,a1_-1,28.665309,81,95.0,7.8,30.0,18.866133,355.930968,10.35,38.75
3,a250_-1,63.258889,36,81.0,37.43,65.89,13.929107,194.020033,49.5,73.8675
4,a2_0,34.04975,40,102.81,7.8,33.825,19.689069,387.659438,16.895,40.795
5,a2_1,32.900345,87,95.0,8.19,33.4,19.918428,396.743759,13.79,44.5
6,a3_-1,37.4,8,59.3,31.74,33.85,9.179349,84.260457,32.495,37.9
7,a3_0,33.729577,71,151.22,0.4,34.0,22.382825,500.99085,16.4,43.055
8,a3_1,35.828496,133,120.0,8.19,33.4,23.213631,538.87266,17.22,44.5
9,a50_-1,35.490938,32,195.5,4.2,18.85,42.496175,1805.924918,14.7125,34.175


In [66]:
results_grouped.to_csv("../dados/precificacao/train_groups_infos.csv", index=False)

In [67]:
results_df.to_csv("../dados/precificacao/train_item_groups.csv", index=False)

In [72]:
results_grouped[results_grouped['cluster'].str.contains('arroz')].head(50)

Unnamed: 0,cluster,mean,count,max,min,median,std,var,quantile_1,quantile_3
924,arroz_-1,10.779787,47,18.0,3.2,12.4,3.982711,15.86198,8.34,12.99
925,arroz_0,11.401538,117,95.0,2.05,11.29,8.33475,69.46805,10.15,12.46
926,arroz_1,13.44386,57,101.7,3.25,11.8,12.089579,146.1579,10.8,13.31
927,arroz_2,14.864727,55,71.4,2.6,12.65,12.899615,166.4001,11.045,13.6
928,arroz_3,16.297808,73,100.1,1.89,12.16,20.08549,403.4269,10.24,13.24
929,arroz_4,22.085,54,180.0,2.55,12.62,30.238378,914.3595,10.26,14.6875
930,arroz_5,310.235517,87,26000.0,1.65,12.0,2786.259986,7763245.0,11.02,13.455
931,arroz_6,213.207726,321,34091.75,1.69,11.9,2496.704778,6233535.0,9.65,13.8


In [2]:
results_grouped = pd.read_csv("../dados/precificacao/train_groups_infos.csv")
results_grouped.head()

Unnamed: 0,cluster,mean,count,max,min,median,std,var,quantile_1,quantile_3
0,-1,0.0,52056,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,a100_-1,46.885645,62,97.0,20.5,49.11,15.102414,228.08291,33.5225,56.0
2,a1_-1,28.665309,81,95.0,7.8,30.0,18.866133,355.930968,10.35,38.75
3,a250_-1,63.258889,36,81.0,37.43,65.89,13.929107,194.020033,49.5,73.8675
4,a2_0,34.04975,40,102.81,7.8,33.825,19.689069,387.659438,16.895,40.795


In [7]:
results_grouped['dsc_unidade_medida'] = 'Unidade'
results_grouped.head()

Unnamed: 0,cluster,mean,count,max,min,median,std,var,quantile_1,quantile_3,dsc_unidade_medida
0,-1,0.0,52056,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Unidade
1,a100_-1,46.885645,62,97.0,20.5,49.11,15.102414,228.08291,33.5225,56.0,Unidade
2,a1_-1,28.665309,81,95.0,7.8,30.0,18.866133,355.930968,10.35,38.75,Unidade
3,a250_-1,63.258889,36,81.0,37.43,65.89,13.929107,194.020033,49.5,73.8675,Unidade
4,a2_0,34.04975,40,102.81,7.8,33.825,19.689069,387.659438,16.895,40.795,Unidade


In [9]:
results_grouped.to_csv("../dados/precificacao/train_groups_infos_un.csv", index=False)

In [16]:
results_grouped = pd.read_csv("../dados/precificacao/train_groups_infos_un.csv")
results_grouped = results_grouped[(results_grouped['cluster']=='a100_-1') & (results_grouped['dsc_unidade_medida'] == 'Unidade')] 
results_grouped['mean'].values[0]

46.88564516129034