In [1]:
import pandas as pd
from google.cloud import bigquery
from google.oauth2 import service_account

#### Ler tabela do Google Big Query

In [2]:
# Create the connection with Google Cloud
def create_connection():
        # Set the credentials and create the connection
        key_path = "../usp-mba-dsa-tcc-4277103d9155.json"

        credentials = service_account.Credentials.from_service_account_file(
            key_path,
            scopes=["https://www.googleapis.com/auth/bigquery"]
        )

        client = bigquery.Client(
            credentials=credentials,
            project=credentials.project_id,
        )
        return client

client = create_connection()

In [31]:
# Perform a query.
QUERY = ("""
    SELECT DISTINCT Categoria, Marca, Especie, Grupo, Embalagem, Loja, Descricao, offer_id
    FROM `usp-mba-dsa-tcc.ecommerce_offers.vw_dim_offers`
""")
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish

offers = rows.to_dataframe()

In [32]:
offers.head(2)

Unnamed: 0,Categoria,Marca,Especie,Grupo,Embalagem,Loja,Descricao,offer_id
0,Racao Umida,N&D,Cachorro,,140g,Cobasi,Ração Úmida N&amp;D Quinoa Cães Weight Managem...,b'Q\x8c\xc4\xef\x08m\x8a\xde\xaf\x92\xc4\xdc)P...
1,Racao Seca,Nestlé Purina,Gato,Castrados,700g,Petz,Ração Cat Chow para Gatos Castrados Sabor Frango,"b""'\x15\xbb\xcf\x1e\xda\x04\x8a\x02\xa0\xdc\xf..."


In [34]:
# Creating a copy of the dataframe and editing
golden_1kg_adultos=offers.query("Categoria == 'Racao Seca' and Marca == 'Golden' and Especie == 'Cachorro' and Grupo == 'Adultos' and Embalagem == '1Kg'")
golden_1kg_adultos.head(2)

Unnamed: 0,Categoria,Marca,Especie,Grupo,Embalagem,Loja,Descricao,offer_id
388,Racao Seca,Golden,Cachorro,Adultos,1Kg,Cobasi,Ração Golden Fórmula Light Cães Adultos Porte ...,b'<\xb1\x95\x98\x18\xfer\x05#J\x0e\xacIW\xb9\x...
396,Racao Seca,Golden,Cachorro,Adultos,1Kg,Petlove,Ração Seca PremieR Pet Golden Formula Mini Bit...,b'\x17G1\xc9d(RQ!\xfc\xe80\x91\xc5\xc0-\xfa2\x...


#### Import Language Model

In [None]:
# https://spacy.io/models/pt#pt_core_news_lg
import spacy
nlp = spacy.load("pt_core_news_lg")

##### Testes

In [None]:
w1 = "banana"
w2 = "mamão"

# Transform into spacy object
w1 = nlp.vocab[w1]
w2 = nlp.vocab[w2]

w1.similarity(w2)

In [None]:
#Hx/o//XEL8KnXyhZfnMRDK55G+AoABfz3kKwGJLKAx8=
offer1 = nlp("Ração Naturalis Lifebites Cães Filhotes Porte Pequeno Peru, Frango, Legumes e Frutas 1kg")

#UTZQ/dqXqO94qscRScp/6ehZY6hZmnWKmeSatqM7QVw=
offer2 = nlp("Ração Seca Total Naturalis Peru, Frango e Frutas para Cães Adultos Porte Pequeno - 1 Kg")

offer1.similarity(offer2)

In [None]:
#N954En5srMC24fi4K/3au+jmzCuc6guQx7x1IYwDvM0=
offer2 = nlp("Ração Naturalis Lifebites Cães Sênior Porte Pequeno Peru, Frango, Legumes e Frutas 1kg")
offer1.similarity(offer2)

In [None]:
#fTRC8JQlzMAhL3qgENIRP0HJz2x+QbmoWUqfvyX1cfs=
offer2 = nlp("Ração Úmida Fórmula Natural Vet Care Recuperação para Cães e Gatos 270 g")
offer1.similarity(offer2)

In [None]:
#qB1kHKxZOmz4J6A/OfWybRC260BtjXb0vm/9q4SlM1I=
offer2 = nlp("Ração Naturalis Lifebites Cães Adultos Porte Médio e Grande Peru, Frango, Legumes e Frutas 2,5kg")
offer1.similarity(offer2)

In [None]:
#yfkeevB9AQTAnuC97yDgi7DWo8H/anZQ57/QHngrKLY=
offer2 = nlp("Ração Naturalis Lifebites Gatos Adultos Peru, Frango e Frutas 1,5kg")
offer1.similarity(offer2)

In [None]:
#KlqW6FrilTY99taOFOyzXbwjXgR/2SkyRzNaYUlFIHA=
offer2 = nlp("Ração Naturalis Cães Filhotes Pequeno Porte Peru, Frango e Frutas 1 kg")
offer1.similarity(offer2)

##### Para Valer

In [None]:
from unidecode import unidecode

offer1 = nlp(unidecode(offers["Descricao"][0]))
offer2 = nlp(unidecode(offers["Descricao"][2]))

print(f"{offer1}\n{offer2}\n{offer1.similarity(offer2)}")


In [None]:
#offers.loc[[0]]
print(offers["URL"][0])
print(offers["URL"][2])

In [12]:
import pandas as pd
import spacy
nlp = spacy.load("pt_core_news_lg")
data = {"palavra": ["gato", "cachorro", "peixe", "ave"]}
df = pd.DataFrame(data)

def calcular_similaridade(palavra1, palavra2):
    doc1 = nlp(palavra1)
    doc2 = nlp(palavra2)
    return doc1.similarity(doc2)

num_palavras = len(df)
similaridade_matrix = []

for i in range(num_palavras):
    linha = []
    for j in range(num_palavras):
        palavra1 = df.iloc[i]["palavra"]
        palavra2 = df.iloc[j]["palavra"]
        similaridade = calcular_similaridade(palavra1, palavra2)
        linha.append(similaridade)
    similaridade_matrix.append(linha)

# Crie um novo DataFrame com a matriz de similaridade
df_similaridade = pd.DataFrame(similaridade_matrix, columns=df["palavra"], index=df["palavra"])


In [13]:
df_similaridade

palavra,gato,cachorro,peixe,ave
palavra,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gato,1.0,0.83682,0.415578,0.395356
cachorro,0.83682,1.0,0.442158,0.357636
peixe,0.415578,0.442158,1.0,0.452842
ave,0.395356,0.357636,0.452842,1.0


In [35]:
import pandas as pd
import spacy
nlp = spacy.load("pt_core_news_lg")
df = golden_1kg_adultos

def calcular_similaridade(offer1, offer2):
    doc1 = nlp(offer1)
    doc2 = nlp(offer2)
    return doc1.similarity(doc2)

num_offers = len(df)
similaridade_matrix = []

for i in range(num_offers):
    linha = []
    for j in range(num_offers):
        offer1 = df.iloc[i]["Descricao"]
        offer2 = df.iloc[j]["Descricao"]
        similaridade = calcular_similaridade(offer1, offer2)
        linha.append(similaridade)
    similaridade_matrix.append(linha)

# Crie um novo DataFrame com a matriz de similaridade
df_similaridade = pd.DataFrame(similaridade_matrix, columns=df["Descricao"], index=df["Descricao"])
df_similaridade


Descricao,Ração Golden Fórmula Light Cães Adultos Porte Pequeno Frango e Arroz 1 kg,Ração Seca PremieR Pet Golden Formula Mini Bits Salmão e Arroz para Cães Adultos de Raças Pequenas - 1 Kg,Ração Golden Formula Cães Adultos Raças Pequenas Carne e Arroz Mini Bits 1 kg,Ração Seca PremieR Pet Golden Seleção Natural para Cães Adultos Mini Bits - 1 Kg,Ração Seca PremieR Pet Golden Formula Carne e Arroz para Cães Adultos de Raças Pequenas - 1 Kg,Ração Golden Fórmula Cães Adultos Raças Pequenas Salmão e Arroz 1 kg,Ração Premier Golden Formula Cães Adultos Light Mini Bits Frango e Arroz - 1 Kg,Ração Premier Golden Formula Cães Adultos Frango e Arroz Mini Bits - 1 Kg,Ração Golden Seleção Natural Cães Adultos Porte Pequeno Frango e Arroz Mini Bits 1 kg,Ração Seca PremieR Pet Golden Formula Peru & Arroz para Cães Adultos de Pequeno Porte - 1 Kg,Ração Golden Fórmula Cães Adultos Raças Pequenas Frango e Arroz Mini Bits 1 kg,Ração Golden Fórmula Cães Adultos Raças Pequenas Peru e Arroz 1kg
Descricao,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Ração Golden Fórmula Light Cães Adultos Porte Pequeno Frango e Arroz 1 kg,1.0,0.851585,0.952302,0.83161,0.845246,0.958886,0.895269,0.896673,0.969685,0.814956,0.964037,0.952863
Ração Seca PremieR Pet Golden Formula Mini Bits Salmão e Arroz para Cães Adultos de Raças Pequenas - 1 Kg,0.851585,1.0,0.88784,0.928221,0.985262,0.849994,0.916645,0.923491,0.876024,0.925947,0.883262,0.831636
Ração Golden Formula Cães Adultos Raças Pequenas Carne e Arroz Mini Bits 1 kg,0.952302,0.88784,1.0,0.851025,0.872104,0.97325,0.907901,0.917376,0.952403,0.796023,0.993352,0.961183
Ração Seca PremieR Pet Golden Seleção Natural para Cães Adultos Mini Bits - 1 Kg,0.83161,0.928221,0.851025,1.0,0.904807,0.792119,0.924744,0.928165,0.881118,0.919777,0.845097,0.780825
Ração Seca PremieR Pet Golden Formula Carne e Arroz para Cães Adultos de Raças Pequenas - 1 Kg,0.845246,0.985262,0.872104,0.904807,1.0,0.849919,0.883023,0.893363,0.852339,0.921149,0.860595,0.835236
Ração Golden Fórmula Cães Adultos Raças Pequenas Salmão e Arroz 1 kg,0.958886,0.849994,0.97325,0.792119,0.849919,1.0,0.851178,0.866647,0.928892,0.750502,0.978441,0.989372
Ração Premier Golden Formula Cães Adultos Light Mini Bits Frango e Arroz - 1 Kg,0.895269,0.916645,0.907901,0.924744,0.883023,0.851178,1.0,0.993982,0.908344,0.887842,0.90762,0.835472
Ração Premier Golden Formula Cães Adultos Frango e Arroz Mini Bits - 1 Kg,0.896673,0.923491,0.917376,0.928165,0.893363,0.866647,0.993982,1.0,0.917654,0.888572,0.917126,0.851027
Ração Golden Seleção Natural Cães Adultos Porte Pequeno Frango e Arroz Mini Bits 1 kg,0.969685,0.876024,0.952403,0.881118,0.852339,0.928892,0.908344,0.917654,1.0,0.839066,0.960411,0.922941
Ração Seca PremieR Pet Golden Formula Peru & Arroz para Cães Adultos de Pequeno Porte - 1 Kg,0.814956,0.925947,0.796023,0.919777,0.921149,0.750502,0.887842,0.888572,0.839066,1.0,0.791628,0.751184
