In [1]:
import torch
import random
import time
import numpy as np
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

import matplotlib.pyplot as plt

from functions import enviar_chute_site


In [2]:
memory = {}
valor_embeddings = {}

# Palavras gen√©ricas para abrir sentidos sem√¢nticos
temas_exploratorios = [
    "object", "place", "person", "animal", "emotion",
    "food", "vehicle", "technology", "family", "music",
    "clothing", "tool", "plant", "profession", "color"
]
temas_usados = set()

In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")

def get_embedding(word):
    with torch.no_grad():
        inputs = tokenizer(word, return_tensors="pt")
        outputs = model(**inputs)
        # Usa o embedding do [CLS] token
        embedding = outputs.last_hidden_state[0][0].numpy()
    return embedding

# Sugest√£o adaptativa
def sugerir_proximo():
    print("\nüéØ Gerando novas sugest√µes...")

    if not memory:
        sugestao = random.choice(temas_exploratorios)
        temas_usados.add(sugestao)
        print(f"üß≠ Come√ßando explora√ß√£o: {sugestao}")
        return sugestao

    # Classifica√ß√£o refinada por score
    perfeitos = [(p, s) for p, s in memory.items() if s < 50]
    otimos    = [(p, s) for p, s in memory.items() if 50 <= s < 200]
    bons      = [(p, s) for p, s in memory.items() if 200 <= s < 300]
    oks       = [(p, s) for p, s in memory.items() if 300 <= s < 500]
    ruins     = [(p, s) for p, s in memory.items() if s >= 500]

    todos_ruins = len(ruins) == len(memory)

    if todos_ruins:
        restantes = [t for t in temas_exploratorios if t not in temas_usados]
        if restantes:
            sugestao = random.choice(restantes)
            temas_usados.add(sugestao)
            print(f"üîç Todos os chutes est√£o ruins. Explorando novo tema: {sugestao}")
            return sugestao
        else:
            print("‚ö†Ô∏è Todos os temas explorat√≥rios j√° foram usados. Chutando palavra gen√©rica aleat√≥ria.")
            return random.choice([w for w in tokenizer.vocab.keys() if w.isalpha() and len(w) > 4 and w not in memory])

    # Refinamento inteligente com prioridade por n√≠vel
    base = []

    if len(perfeitos) >= 1:
        base = sorted(perfeitos, key=lambda x: x[1])  # usa todos
        print(f"üí• Refinando a partir de perfeitos ({len(base)}): {[p for p, _ in base]}")
    elif len(otimos) >= 1:
        base = sorted(otimos, key=lambda x: x[1])
        print(f"üåü Refinando a partir de √≥timos ({len(base)}): {[p for p, _ in base]}")

    elif len(bons) >= 1:
        base = sorted(bons, key=lambda x: x[1])
        print(f"üëå Refinando a partir de bons ({len(base)}): {[p for p, _ in base]}")

    elif len(oks) >= 1:
        base = sorted(oks, key=lambda x: x[1])
        print(f"üôÇ Refinando a partir de oks ({len(base)}): {[p for p, _ in base]}")
    else:
        base = sorted(memory.items(), key=lambda x: x[1])[:2]
        print(f"ü§î Refinando com os menos ruins: {[p for p, _ in base]}")

    # C√°lculo do vetor m√©dio com pesos
    vetores = np.array([get_embedding(p) for p, _ in base])
    pesos = np.array([1 / (s + 1) for _, s in base])
    pesos = pesos / pesos.sum()
    vetor_medio = np.average(vetores, axis=0, weights=pesos)

    # Sele√ß√£o de candidatos
    candidatos = [w for w in tokenizer.vocab.keys() if w.isalpha() and len(w) > 4 and w not in memory]
    random.shuffle(candidatos)
    candidatos = candidatos[:1000]

    melhor_palavra = None
    melhor_sim = -1
    dic_sim = {}
    for w in candidatos:
        vec = get_embedding(w)
        sim = cosine_similarity([vetor_medio], [vec])[0][0]
        dic_sim[w] = sim

    # Ordena o dic_sim pelo valor de similaridade (do maior para o menor)
    dic_sim_ordenado = sorted(dic_sim.items(), key=lambda x: x[1], reverse=True)
    palavras = dic_sim_ordenado[:10]  # pega as 10 melhores palavras

    print(f"‚úÖ Pr√≥xima sugest√£o: {melhor_palavra} (sim={melhor_sim:.4f})")
    return palavras

def loop_automatico():
    while True:
        proximas = sugerir_proximo()
        for proxima in proximas:
            chute, score = enviar_chute_site(proxima)
        
            if score is not None:
                if chute not in memory:
                    memory[chute] = score
                    print(f"üí° Chute: {chute} (score={score})")
                else:
                    print(f"üîÑ Chute repetido: {chute} (score={score})")
            else:
                print("‚è≠Ô∏è Ignorando chute inv√°lido ou repetido.")

            if score <= 10:
                print("üéâ Palavra secreta encontrada:", chute)
                break

In [4]:
# Inicializa navegador
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://contexto.me/en/")


In [None]:
# lista de palavras manuais a testar 
# palavras_a_testar = ["place", "try", "fix", "link"] 

# for p in palavras_a_testar:
#     valor_embeddings[p] = get_embedding(p)
#     _,memory[p] = enviar_chute_site(driver, p)

In [10]:
proximos = sugerir_proximo()
print(f"Pr√≥ximas sugest√µes: {proximos}")


üéØ Gerando novas sugest√µes...
üí• Refinando a partir de perfeitos (1): ['link']
‚úÖ Pr√≥xima sugest√£o: None (sim=-1.0000)
Pr√≥ximas sugest√µes: [('eligibility', np.float64(0.9669995892306966)), ('overview', np.float64(0.9662007320411513)), ('outlets', np.float64(0.9648468379699994)), ('gateway', np.float64(0.9637623669586088)), ('network', np.float64(0.9634852767819001)), ('shared', np.float64(0.9634391294257358)), ('legacy', np.float64(0.9634298208818348)), ('users', np.float64(0.9634072489142185)), ('surfing', np.float64(0.9633091416800056)), ('hometown', np.float64(0.9632329390299588))]


In [None]:
loop_automatico()