In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from IPython.display import SVG, display, display_png 
import plotly.io as pio 
from threading import Timer
import webbrowser
import keyboard
import time

import re
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
import nltk

from dash import Dash, dcc, html, Input, Output
import plotly.express as px

from datetime import datetime

import os

In [2]:
stop_words = stopwords.words('english')
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

In [3]:
dir_path = os.listdir(r'dados_data\todos')
names_f = [name.split('_')[1] for name  in dir_path]
names_f = [item.replace('Eletronicos', 'Eletrônicos') for item in names_f]
names_f = [item.replace('Criancas', 'Crianças') for item in names_f]

categories_all = {}
for path,names in zip(dir_path,names_f):
    categories_all[names] = pd.read_csv(rf'dados_data\todos\{path}')


In [4]:
dir_path = os.listdir(r'dados_data/15dias/')
names_f = [name.split('_')[1] for name  in dir_path] 
names_f = [item.replace('Eletronicos', 'Eletrônicos') for item in names_f]
names_f = [item.replace('Criancas', 'Crianças') for item in names_f]


categories_15 = {}
for path,names in zip(dir_path,names_f):
    categories_15[names] = pd.read_csv(rf'dados_data/15dias/{path}')
    categories_15[names] = categories_15[names].drop(['Unnamed: 0'], axis = 1)

In [5]:
dir_path = os.listdir(r'dados_data/7dias/')
names_f = [name.split('_')[1] for name  in dir_path] 
names_f = [item.replace('Eletronicos', 'Eletrônicos') for item in names_f]
names_f = [item.replace('Criancas', 'Crianças') for item in names_f]


categories_7 = {}
for path,names in zip(dir_path,names_f):
    categories_7[names] = pd.read_csv(rf'dados_data/7dias/{path}')
    categories_7[names] = categories_7[names].drop(['Unnamed: 0'], axis = 1)

In [6]:
dir_path = os.listdir(r'dados_data/lista_filtro/')
names_f = [name.split('_')[1] for name  in dir_path] 
names_f = [item.replace('Eletronicos', 'Eletrônicos') for item in names_f]
names_f = [item.replace('Criancas', 'Crianças') for item in names_f]


categories_filter = {}
for path,names in zip(dir_path,names_f):
    categories_filter[names] = pd.read_csv(rf'dados_data/lista_filtro/{path}')

In [7]:
dir_path = os.listdir(r'dados/NLP/')
names_f = [name.split('_')[1] for name  in dir_path] 
names_f = [item.replace('Eletronicos', 'Eletrônicos') for item in names_f]
names_f = [item.replace('Criancas', 'Crianças') for item in names_f]


NLP_categories = {}
for path,names in zip(dir_path,names_f):
    frame = pd.read_csv(rf'dados/NLP/{path}')
    name_mean = frame.groupby(['name_rank']).mean().reset_index().drop(['description_rank'], axis = 1)
    description_mean = frame.groupby(['description_rank']).mean().reset_index().drop(['name_rank'], axis = 1)
    NLP_categories[names] = [name_mean,description_mean]
    

In [8]:
def processamento(tokens):
    
    token_processado = []
    for token in tokens:
        token = token.lower()
        token = lemmatizer.lemmatize(token)
        
        if token not in stop_words:
            token = stemmer.stem(token)
            token_processado.append(token)
        
    return token_processado

In [9]:
def plot_graph(cat,table):
        fig = go.Figure()
        #adição dos traços
        fig.add_trace(
        go.Scatter
        (
        y = NLP_categories[cat][table].iloc[:,1].values,
        x = NLP_categories[cat][table].iloc[:,0].values,
        marker_color = '#52b788',
        )
        )

        fig.add_annotation(dict(xref='paper',yref='paper', x=-0.07, y=1,
                        xanchor='right', yanchor='top',
                        text='Média de preço',
                        font=dict(family='Arial',
                                size=22,
                                color = "#52b788"),
                        showarrow=False,
                        textangle=90
                        ))
        if table == 0:
                #nome do eixo x
                fig.add_annotation(dict(xref='paper', yref='paper', x=-0.03, y=-0.08,
                                xanchor='left', yanchor='top',
                                text='Número de palavras chave do nome',
                                font=dict(family='Arial',
                                        size=22,
                                        color = "#52b788"),
                                showarrow=False
                                )) 
        if table == 1:
                #nome do eixo x
                fig.add_annotation(dict(xref='paper', yref='paper', x=-0.03, y=-0.08,
                                xanchor='left', yanchor='top',
                                text='Número de palavras chave da descrição',
                                font=dict(family='Arial',
                                        size=22,
                                        color = "#52b788"),
                                showarrow=False
                                )) 

        fig.update_layout(
                margin=dict(
                l=80,
                r=30,
                b=80,
                t=30,
                pad=5
                ),
                width=600,
                height=400,
                paper_bgcolor = 'rgb(10,10,10)', # cor de fundo do papel do gráfico
                plot_bgcolor = 'rgb(10,10,10)', # cor de fundo do gráfico
                yaxis = dict(tickfont=dict(color="#52b788")),
                xaxis = dict(tickfont=dict(color="#52b788"))

        )

        #configuração dos eixos
        fig.update_xaxes(showgrid=False, zerolinecolor = 'rgb(10,10,10)')
        fig.update_yaxes(showgrid=False, zerolinecolor = 'rgb(10,10,10)')
        return fig

In [10]:
def flatten_list(_2d_list):
    flat_list = []
    # Iterate through the outer list
    for element in _2d_list:
        if type(element) is list:
            # If the element is of type list, iterate through the sublist
            for item in element:
                flat_list.append(item)
        else:
            flat_list.append(element)
    return flat_list

In [11]:
name_classificator = {
    'ruim': [0,1],
    'médiano': [2,3],
}

description_classificator = {
    'ruim': [0,1,2],
    'médiana': [4,5,6,7]
}

In [12]:
categories_filter

{'Beleza':      Unnamed: 0     Word
 0             0    bundl
 1             1      set
 2             2      new
 3             3   palett
 4             4    brush
 ..          ...      ...
 983         983  haircar
 984         984    slice
 985         985      jam
 986         986    joico
 987         987    smart
 
 [988 rows x 2 columns],
 'Casa':      Unnamed: 0    Word
 0             0    dunn
 1             1     rae
 2             2     set
 3             3     mug
 4             4     new
 ..          ...     ...
 549         549     3pc
 550         550  magnet
 551         551   shade
 552         552    leaf
 553         553     toy
 
 [554 rows x 2 columns],
 'Crianças':      Unnamed: 0        Word
 0             0        girl
 1             1        size
 2             2        babi
 3             3         boy
 4             4       bundl
 ..          ...         ...
 985         985        neon
 986         986  watermelon
 987         987        burp
 988         9

In [12]:
print('As categorias possíveis são:')
for z in categories_all.keys():
        print('\t',z)
category_name = str(input('Insira o nome da categoria: ')).replace(" ", "")
category_name = category_name.capitalize()

if category_name not in categories_all.keys():
        raise ValueError('Categoria inexistente, tente novamente')

#inicio nome do produto
product_name = str(input('Insira o nome do produto: '))
product_name_filtered = []
   
sinopse = re.sub(r'[^\w\s]','', product_name)
tokens = processamento(word_tokenize(product_name))
product_name_filtered.append(tokens)
product_name_filtered = flatten_list(product_name_filtered)

for name in product_name_filtered:
        if name not in categories_filter[category_name]['Word'].values:
                product_name_filtered.remove(name)
score_name = len(product_name_filtered)

for classif in name_classificator:
        if score_name not in name_classificator[classif]:
                if classif == list(name_classificator.keys())[-1]:
                        score_name = 'excelente'
                else:
                        continue
        else:
                score_name = classif
fig_name = plot_graph(category_name, 0) 
#fim nome produto
#inicio descrição do produto
input('Deseja adicionar uma descrição ao produto? Pressione Enter para adicionar ou qualquer tecla para ignorar')
input_user = keyboard.read_key()

if input_user == 'enter':
        product_description = str(input('Insira a descrição do produto: '))
        product_description_filtered = []
        
        sinopse = re.sub(r'[^\w\s]','', product_description)
        tokens = processamento(word_tokenize(product_description))
        product_description_filtered.append(tokens)
        product_description_filtered = flatten_list(product_description_filtered)

        for name in product_description_filtered:
                if name not in categories_filter[category_name]['Word'].values:
                        product_description_filtered.remove(name)
        score_description = len(product_description_filtered)

        for classif in name_classificator:
                if score_description in name_classificator[classif]:
                        score_description = classif
                else:
                        score_description = 'excelente'
        fig_description = plot_graph(category_name, 1) 
        #fim descrição
        app = Dash(__name__)
        app.layout = html.Div(children = [
        html.Div(
                className = 'app-header',
                children = [
                        html.H1('Olá, Augusto Cesar!', className = "app-header--title", style = {'display': 'inline-block'}),
                        html.Img(src = '/assets/image.png', className = 'app-header--image', style = {'display': 'inline-block', 'height': '120px'})
                ], style={'display': 'flex'}),
        html.P( className= 'app-body',
                children = [html.P(f'Produto a ser vendido: {product_name}', className = 'app-body--p1')]),
        html.Br(),  
        html.P('Estas são as caracteristicas do seu produto'),
        html.Div(className='app-body2',
                children = [html.Div(children = [   
        html.Div(
                className = 'app-list',
                children = [
                        html.Div(
                                className = 'app-list--list1',
                                children = [
                                html.Ul(children=[f"Categoria principal"], className = 'app-list--firstelements'),
                                html.Ul(children = [f'Categoria secundária'], className = 'app-list--firstelements'),
                                html.Ul(children= [f'Nome da marca'], className = 'app-list--firstelements'),
                                html.Ul(children=[f'Condição do item'], className = 'app-list--firstelements'),
                                html.Ul(children=[f'Tipo de frete '], className = 'app-list--firstelements')
                        ]),
                        html.Div(
                                className = 'app-list--list2',
                                children=[
                                html.Ul(children=category_name, className = 'app-list--elements'),
                                html.Ul(children=category_name, className = 'app-list--elements'),
                                html.Ul(children=category_name, className = 'app-list--elements'),
                                html.Ul(children=category_name, className = 'app-list--elements'),
                                html.Ul(children=category_name, className = 'app-list--elements')
                        ])
        ], style = {'display':'flex'}
        ),
        html.Div(
                className = 'app-body--preco',
                children = [
                html.P(
                children = ['Preço'], className = 'app-body--preco1'),
                html.P(
                children = category_name, className = 'app-body--preco2')
                ], style = {'display':'flex'}),
        html.P(children = 'Avaliação do nome/descrição'),
        html.Div(
                className = 'app-body--nome',
                children = [
                html.P(
                children = ['Nome'], className = 'app-body--nome1'),
                html.P(
                children = score_name.capitalize(), className = 'app-body--nome2')
                ], style = {'display':'flex'}),
        html.Div(
                className = 'app-body--descricao',
                children = [
                html.P(
                children = ['Descricao'], className = 'app-body--descricao1'),
                html.P(
                children = score_name.capitalize(), className = 'app-body--descricao2')
                ], style = {'display':'flex'})], className='app-body2--text'),
        
        html.Div(
                className = 'app-body2--graph',
                children=[
                html.Div(children = [dcc.Graph(
                figure=fig_name
        ),
                html.Div(children = [dcc.Graph(
                figure=fig_description)])])
        ])]),
        html.Div(
                className = 'app-graph',
                children = [
                html.Div(children=[
                dcc.Graph(
                        id = 'graph')
                ], style={'display': 'inline-block'}),
                html.Div(
                className = 'app-RadioItems',
                children = [
                html.Label(['Faixa de tempo do gráfico']),
                html.Div(children = [dcc.RadioItems(
                                                id = 'yaxis',
                                                options = [
                                                        {'label': 'Diário', 'value': 'day'},
                                                        {'label': '7 Dias', 'value': '7days'},
                                                        {'label': '15 Dias', 'value': '15days'}
                                                ],
                                                value='day',
                                                className = 'app-RadioItems--style',
                                                labelStyle={'display': 'block'}
                                                )]
                                                )
                                        ], style={'display': 'inline-block', 'vertical-align': 'top'}
                                )
                        ], style={'width': '1400px', 'display': 'inline-block'})
                ]
        )
else:
        app = Dash(__name__)
        app.layout = html.Div(children = [
        html.Div(
                className = 'app-header',
                children = [
                        html.H1('Olá, Augusto Cesar!', className = "app-header--title", style = {'display': 'inline-block'}),
                        html.Img(src = '/assets/image.png', className = 'app-header--image', style = {'display': 'inline-block', 'height': '120px'})
                ], style={'display': 'flex'}),
        html.P( className= 'app-body',
                children = [html.P(f'Produto a ser vendido: {product_name}', className = 'app-body--p1')]),
        html.Br(),  
        html.P('Estas são as caracteristicas do seu produto'),
        html.Div(className='app-body2',
                children = [html.Div(children = [   
        html.Div(
                className = 'app-list',
                children = [
                        html.Div(
                                className = 'app-list--list1',
                                children = [
                                html.Ul(children=[f"Categoria principal"], className = 'app-list--firstelements'),
                                html.Ul(children = [f'Categoria secundária'], className = 'app-list--firstelements'),
                                html.Ul(children= [f'Nome da marca'], className = 'app-list--firstelements'),
                                html.Ul(children=[f'Condição do item'], className = 'app-list--firstelements'),
                                html.Ul(children=[f'Tipo de frete '], className = 'app-list--firstelements')
                        ]),
                        html.Div(
                                className = 'app-list--list2',
                                children=[
                                html.Ul(children=category_name, className = 'app-list--elements'),
                                html.Ul(children=category_name, className = 'app-list--elements'),
                                html.Ul(children=category_name, className = 'app-list--elements'),
                                html.Ul(children=category_name, className = 'app-list--elements'),
                                html.Ul(children=category_name, className = 'app-list--elements')
                        ])
        ], style = {'display':'flex'}
        ),
        html.Div(
                className = 'app-body--preco',
                children = [
                html.P(
                children = ['Preço'], className = 'app-body--preco1'),
                html.P(
                children = category_name, className = 'app-body--preco2')
                ], style = {'display':'flex'}),
        html.P(children = 'Avaliação do nome/descrição'),
        html.Div(
                className = 'app-body--nome',
                children = [
                html.P(
                children = ['Nome'], className = 'app-body--nome1'),
                html.P(
                children = score_name.capitalize(), className = 'app-body--nome2')
                ], style = {'display':'flex'})], className='app-body2--text'),
        
        html.Div(children = [dcc.Graph(
                figure=fig_name,
                className= 'app-body2--graph'
        )])]),
        html.Div(
                className = 'app-graph',
                children = [
                html.Div(children=[
                dcc.Graph(
                        id = 'graph')
                ], style={'display': 'inline-block'}),
                html.Div(
                className = 'app-RadioItems',
                children = [
                html.Label(['Faixa de tempo do gráfico']),
                html.Div(children = [dcc.RadioItems(
                                                id = 'yaxis',
                                                options = [
                                                        {'label': 'Diário', 'value': 'day'},
                                                        {'label': '7 Dias', 'value': '7days'},
                                                        {'label': '15 Dias', 'value': '15days'}
                                                ],
                                                value='day',
                                                className = 'app-RadioItems--style',
                                                labelStyle={'display': 'block'}
                                                )]
                                                )
                                        ], style={'display': 'inline-block', 'vertical-align': 'top'}
                                )
                        ], style={'width': '1400px', 'display': 'inline-block'})
                ]
        )


@app.callback(
Output('graph', 'figure'),
[Input(component_id='yaxis', component_property='value')]
)

def graph_builder(value):
        if value == 'day':
                fig = go.Figure()
                #adição dos traços
                fig.add_trace(
                go.Scatter
                (
                y = categories_all[category_name]['Total'].values,
                x = categories_all[category_name]['Data'].values,
                marker_color = '#52b788',
                )
                )

                #configuração do layout
                fig.update_layout(
                        title = {'text': f'<b  style = "color:#52b788;font-size:22"><br>Total de compras no ano de 2018 da categoria {category_name}</br>',
                                'font_family':"Arial",
                                'font_size':12,
                                'xref' :'paper',
                                'y': 0.97,
                                'x': 0,
                                'xanchor': 'left',
                                'yanchor': 'bottom'},
                        margin=dict(
                        l=100,
                        r=200,
                        b=50,
                        t=100,
                        pad=5
                        ),
                        width=1000,
                        height=600,
                        paper_bgcolor = 'rgb(10,10,10)', # cor de fundo do papel do gráfico
                        plot_bgcolor = 'rgb(10,10,10)', # cor de fundo do gráfico
                        yaxis = dict(tickfont=dict(color="#52b788"))
                
                )

                #configuração dos eixos
                fig.update_xaxes(showgrid=False, visible = False)
                fig.update_yaxes(showgrid=False, zerolinecolor = 'rgb(10,10,10)', )
                return fig

        elif value == '7days':
                fig = go.Figure()
                fig.add_trace(
                go.Scatter(
                y = categories_7[category_name]['Total'].values,
                x = categories_7[category_name]['Dias'].values,
                name = 'Frete pelo comprador',
                marker_color = '#52b788',
                )
                )

                #configuração do layout
                fig.update_layout(
                        title = {'text': f'<b  style = "color:#52b788;font-size:22"><br>Total de compras no ano de 2018 da categoria {category_name}</br>',
                                'font_family':"Arial",
                                'font_size':12,
                                'xref' :'paper',
                                'y': 0.97,
                                'x': 0,
                                'xanchor': 'left',
                                'yanchor': 'bottom'},
                        margin=dict(
                        l=100,
                        r=200,
                        b=50,
                        t=100,
                        pad=5
                        ),
                        width=1000,
                        height=600,
                        paper_bgcolor = 'rgb(10,10,10)', # cor de fundo do papel do gráfico
                        plot_bgcolor = 'rgb(10,10,10)', # cor de fundo do gráfico
                        yaxis = dict(tickfont=dict(color="#52b788"))
                
                )

                #configuração dos eixos
                fig.update_xaxes(showgrid=False, visible = False)
                fig.update_yaxes(showgrid=False, zerolinecolor = 'rgb(10,10,10)', )
                return fig
        else:
                fig = go.Figure()
                fig.add_trace(
                go.Scatter(
                y = categories_15[category_name]['Total'].values,
                x = categories_15[category_name]['Dias'].values,
                name = 'Frete pelo comprador',
                marker_color = '#52b788',
                )
                )

                #configuração do layout
                fig.update_layout(
                        title = {'text': f'<b  style = "color:#52b788;font-size:22"><br>Total de compras no ano de 2018 da categoria {category_name}</br>',
                                'font_family':"Arial",
                                'font_size':12,
                                'xref' :'paper',
                                'y': 0.97,
                                'x': 0,
                                'xanchor': 'left',
                                'yanchor': 'bottom'},
                        margin=dict(
                        l=100,
                        r=200,
                        b=50,
                        t=100,
                        pad=5
                        ),
                        width=1000,
                        height=600,
                        paper_bgcolor = 'rgb(10,10,10)', # cor de fundo do papel do gráfico
                        plot_bgcolor = 'rgb(10,10,10)', # cor de fundo do gráfico
                        yaxis = dict(tickfont=dict(color="#52b788"))
                
                )

                #configuração dos eixos
                fig.update_xaxes(showgrid=False, visible = False)
                fig.update_yaxes(showgrid=False, zerolinecolor = 'rgb(10,10,10)', )
                return fig

def open_browser():
        webbrowser.open_new("http://localhost:{}".format(8050))

if __name__ == '__main__':
        Timer(1, open_browser).start()
        app.run_server(debug=True, use_reloader=False, port=8050)
open_browser()

As categorias possíveis são:
	 Beleza
	 Casa
	 Crianças
	 Eletrônicos
	 Esporte
	 Homens
	 Mulher
	 Outro
Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
