In [1]:
import pandas as pd
from langchain_ollama import ChatOllama
from langchain_huggingface import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(
    #model_name = "ai-forever/sbert_large_mt_nlu_ru",
    model_name = "sergeyzh/rubert-mini-uncased",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True} # i wanted to explicit set this to False since we are using weaviate, but DONT FORGET TO SET IT TO TRUE if we stop using weaviate. SET IT TO TRUE if vector db doesnt normalize automatically
)



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader #not sure about those 2
from langchain_community.document_loaders.csv_loader import CSVLoader #i mean this as second
from langchain_weaviate.vectorstores import WeaviateVectorStore
import weaviate
import weaviate.classes as wvc
from weaviate.classes.config import Configure, Property, DataType

#series_result = '../data/processed/series_results.csv'
#df = pd.read_csv(series_result)

client = weaviate.connect_to_local(
    host="127.0.0.1",  # Use a string to specify the host
    port=8080,
    grpc_port=50051,
)

if client.collections.exists("MatchData"): #redo this in prod obviously
    client.collections.delete("MatchData")

if client.collections.exists("MapStats"):
    client.collections.delete("MapStats")

client.collections.create(
    name="MatchData",
    properties=[
        Property(name="team_id", data_type=DataType.TEXT),
        Property(name="team_name", data_type=DataType.TEXT),
        Property(name="date", data_type=DataType.TEXT),
        Property(name="opponent_id", data_type=DataType.TEXT),
        Property(name="opponent_name", data_type=DataType.TEXT),
        Property(name="map_name", data_type=DataType.TEXT),
        Property(name="series_result", data_type=DataType.TEXT),
        Property(name="description", data_type=DataType.TEXT),
    ],
    description="–ò–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –æ –º–∞—Ç—á–µ –º–µ–∂–¥—É –∫–æ–º–∞–Ω–¥–∞–º–∏",
    vectorizer_config=Configure.Vectorizer.none()  # for manual embedding
)

# Create MapStats class
client.collections.create(
    name="MapStats",
    properties=[
        Property(name="team_id", data_type=DataType.TEXT),
        Property(name="map_name", data_type=DataType.TEXT),
        Property(name="winrate", data_type=DataType.NUMBER),
        Property(name="pickrate", data_type=DataType.NUMBER),
        Property(name="banrate", data_type=DataType.NUMBER),
    ],
    description="–°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞ –ø–æ –∫–∞—Ä—Ç–∞–º –¥–ª—è –∫–æ–º–∞–Ω–¥",
    vectorizer_config=Configure.Vectorizer.none()
)


USER_AGENT environment variable not set, consider setting it to identify your requests.
            We encourage you to update your code to use the async client instead when running inside async def functions!


<weaviate.collections.collection.sync.Collection at 0x73e94f586c90>

In [3]:
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer("sergeyzh/rubert-mini-uncased")

series_result = '../data/processed/series_results.csv'
maps_stats = '../data/processed/team_maps.csv'

matches = pd.read_csv(series_result)
matches.drop(columns=['series_link', 'series_result'])
maps = pd.read_csv(maps_stats)



matches["description"] = matches.apply(
    lambda row: f"–∫–æ–º–∞–Ω–¥–∞ {row['team_name']} —Å—ã–≥—Ä–∞–ª–∞ –ø—Ä–æ—Ç–∏–≤ –∫–æ–º–∞–Ω–¥—ã {row['opponent_name']} –Ω–∞ –∫–∞—Ä—Ç–µ {row['map_name']}, –º–∞—Ç—á –±—ã–ª –ø—Ä–æ–≤–µ–¥–µ–Ω {row['date']}",
    axis=1,
)



embeddings = model.encode(matches["description"].tolist(), show_progress_bar=True)

collection_match = client.collections.get("MatchData") #fix collections later maybe

def json_clean(obj):
    for k, v in obj.items():
        if isinstance(v, float) and (np.isnan(v) or np.isinf(v)):
            obj[k] = 0.0
    return obj


for i, row in matches.iterrows():
    properties = {
        "team_id": str(row["team_id"]),
        "team_name": row["team_name"],
        "date": row["date"],
        "opponent_id": str(row["opponent_id"]),
        "opponent_name": row["opponent_name"],
        "map_name": row["map_name"],
        "series_result": str(row["series_result"]),
        "description": row["description"],
    }
    properties = json_clean(properties) # had to
    vector = embeddings[i]
    collection_match.data.insert(properties=properties, vector=vector)


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 456/456 [00:02<00:00, 169.24it/s]


In [4]:
import re

collection_team_map = client.collections.get("MapStats")
df = pd.read_csv(maps_stats)

def extract_float(value):
    if pd.isna(value):
        return None
    match = re.search(r"[\d.]+", str(value))
    return float(match.group()) if match else None

for col in ['winrate', 'pickrate', 'banrate']:
    df[col] = df[col].apply(extract_float)

for _, row in df.iterrows():
    properties = {
        "team_id": str(row["team_id"]),
        "map_name": row["map_name"],
        "winrate": float(row["winrate"]),
        "pickrate": float(row["pickrate"]),
        "banrate": float(row["banrate"]),
    }
    collection_team_map.data.insert(properties=properties)

In [5]:
from weaviate.classes.query import Filter

match_vectorstore = WeaviateVectorStore(
    client=client,
    index_name="MatchData",  # –Ω–∞–∑–≤–∞–Ω–∏–µ –∫–ª–∞—Å—Å–∞
    text_key="description",  # –ø–æ –∫–∞–∫–æ–º—É –ø–æ–ª—é –∏—Å–∫–∞—Ç—å
    embedding=embedding_model,    # —ç–º–±–µ–¥–¥–∏–Ω–≥–∏ –¥–ª—è –≤–µ–∫—Ç–æ—Ä–Ω–æ–≥–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—è
)

match_retriever_certain = match_vectorstore.as_retriever(search_kwargs={"k":1}) # –º–æ–∂–µ—Ç –±—ã—Ç—å –Ω–µ –æ–¥–∏–Ω, –∞ –¥–∞–∂–µ 5, –µ—Å–ª–∏ bo5. –¥—É–º–∞—Ç—å.
match_retriever_bo3 = match_vectorstore.as_retriever(search_kwargs={"k":3})

In [6]:
router_instructions_ru = """–¢—ã —ç–∫—Å–ø–µ—Ä—Ç –ø–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏ –∑–∞–ø—Ä–æ—Å–∞ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è. –¢–≤–æ—è –∑–∞–¥–∞—á–∞ ‚Äî –Ω–∞–ø—Ä–∞–≤–∏—Ç—å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è –∫ –æ–¥–Ω–æ–º—É –∏–∑ –∞–≥–µ–Ω—Ç–æ–≤.

–ê–≥–µ–Ω—Ç—ã:
- –ø—Ä–µ–¥—Å–∫–∞–∑–∞—Ç–µ–ª—å —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ –º–∞—Ç—á–∞ ‚Üí "predictor"
- –∞–Ω–∞–ª–∏—Ç–∏–∫ —Ñ–æ—Ä–º—ã –∏–≥—Ä–æ–∫–∞ –∏–ª–∏ –∫–æ–º–∞–Ω–¥—ã ‚Üí "shape"
- –∞–Ω–∞–ª–∏–∑ –∏–≥—Ä–æ–≤–æ–≥–æ –ø–æ–≤–µ–¥–µ–Ω–∏—è –∫–æ–º–∞–Ω–¥—ã ‚Üí "team"
- —Å–≤–æ–±–æ–¥–Ω—ã–µ –¥–∏–∞–ª–æ–≥–∏ ‚Üí "fallback"

–í –∫–∞—á–µ—Å—Ç–≤–µ –æ—Ç–≤–µ—Ç–∞ –≤–µ—Ä–Ω–∏ **–¢–û–õ–¨–ö–û** JSON-–æ–±—ä–µ–∫—Ç —Å –∫–ª—é—á–æ–º "node", –æ–¥–Ω–æ –∏–∑ –∑–Ω–∞—á–µ–Ω–∏–π: "predictor", "shape", "team", "fallback".

**–ü—Ä–∏–º–µ—Ä:**
{"node": "predictor"}

–ù–µ –¥–æ–±–∞–≤–ª—è–π –Ω–∏–∫–∞–∫–∏—Ö –æ–±—ä—è—Å–Ω–µ–Ω–∏–π. –ù–µ –≤–µ–¥–∏ –¥–∏–∞–ª–æ–≥. –¢–æ–ª—å–∫–æ JSON.
"""


# –≤–æ–∑–º–æ–∂–Ω–æ —Å—Ç–æ–∏—Ç –¥–æ–±–∞–≤–∏—Ç—å bias –≤ —Å—Ç–æ—Ä–æ–Ω—É fallback, —á—Ç–æ–±—ã –º–æ–¥–µ–ª—å —á–∞—â–µ –≤—ã–¥–∞–≤–∞–ª–∞ –µ–≥–æ –¥–ª—è —É—Ç–æ—á–Ω–µ–Ω–∏—è –∑–∞–ø—Ä–æ—Å–∞

In [7]:
local_llm = "ilyagusev/saiga_llama3:latest"
llm = ChatOllama(model=local_llm, temperature=0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")

In [8]:
from langchain_core.messages import HumanMessage, SystemMessage
import json

test_case_match = llm_json_mode.invoke(
    [SystemMessage(content=router_instructions_ru)]
    + [
        HumanMessage(
            content="–∫—Ç–æ –≤—ã–∏–≥—Ä–∞–µ—Ç –≤ –∑–∞–≤—Ç—Ä–∞—à–Ω–µ–º –º–∞—Ç—á–µ: Navi –∏–ª–∏ G2?"
        )
    ]
)
test_case_fallback = llm_json_mode.invoke(
    [SystemMessage(content=router_instructions_ru)]
    + [HumanMessage(content="–Ø –≤–∫—É—Å–Ω–æ –ø–æ–∫—É—à–∞–ª")]
)
test_case_shape = llm_json_mode.invoke(
    [SystemMessage(content=router_instructions_ru)]
    + [HumanMessage(content="–í –∫–∞–∫–æ–π —Ñ–æ—Ä–º–µ –Ω–∞—Ö–æ–¥–∏—Ç—Å—è Niko?")]
)
print(
    json.loads(test_case_match.content),
    json.loads(test_case_fallback.content),
    json.loads(test_case_shape.content),
)

{'node': 'predictor'} {'node': 'fallback'} {'node': 'shape'}


In [9]:
from catboost import CatBoostClassifier

closeness_analyzer_model = CatBoostClassifier()
closeness_analyzer_model.load_model("catboost_series_model.cbm")

sample_feature = pd.DataFrame([[55.0, 10.0, 20.0, 50.0, 8.0, 30.0]], columns=[
    'team_map_winrate', 'team_map_pickrate', 'team_map_banrate',
    'opponent_map_winrate', 'opponent_map_pickrate', 'opponent_map_banrate'
])

# Predict
pred = closeness_analyzer_model.predict(sample_feature)

labels = ["One-sided win", "Close win", "Close loss", "One-sided loss"]
print("Prediction:", labels[int(pred[0])])

Prediction: Close win


  print("Prediction:", labels[int(pred[0])])


In [10]:
# match predictor section
test_case_prompt = """–¢–≤–æ—è –∑–∞–¥–∞—á–∞ - –∏–∑–≤–ª–µ—á—å –Ω–∞–∑–≤–∞–Ω–∏—è –∫–æ–º–∞–Ω–¥ –∏–∑ –∑–∞–ø—Ä–æ—Å–∞ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è, –µ—Å–ª–∏ –æ–Ω–∏ —É–∫–∞–∑–∞–Ω—ã. –ï—Å–ª–∏ –Ω–∞–∑–≤–∞–Ω–∏—è –∫–æ–º–∞–Ω–¥—ã –Ω–µ—Ç, —Ç–æ —É–∫–∞–∂–∏ 'null'
–û—Ç–≤–µ—Ç –¥–æ–ª–∂–µ–Ω –±—ã—Ç—å –≤–æ–∑–≤—Ä–∞—â–µ–Ω –≤ —Ñ–æ—Ä–º–∞—Ç–µ JSON —Å–æ —Å–ª–µ–¥—É—é—â–∏–º–∏ –∫–ª—é—á–∞–º–∏:
{
    "team_name": "<–Ω–∞–∑–≤–∞–Ω–∏–µ –ø–µ—Ä–≤–æ–π –∫–æ–º–∞–Ω–¥—ã –∏–ª–∏ null>",
    "opponent_name": "<–Ω–∞–∑–≤–∞–Ω–∏–µ –≤—Ç–æ—Ä–æ–π –∫–æ–º–∞–Ω–¥—ã –∏–ª–∏ null>"    
}
–ù–µ –¥–æ–±–∞–≤–ª—è–π –Ω–∏–∫–∞–∫–∏—Ö –∫–æ–º–º–µ–Ω—Ç–∞—Ä–∏–µ–≤, –≤–æ–∑–≤—Ä–∞—â–∞–π —á–∏—Å—Ç—ã–π JSON-–æ–±—ä–µ–∫—Ç.
"""
match_predictor_prompt = """–¢—ã ‚Äî –∏–Ω—Ç–µ–ª–ª–µ–∫—Ç—É–∞–ª—å–Ω—ã–π –ø–∞—Ä—Å–µ—Ä –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å—Å–∫–∏—Ö –∑–∞–ø—Ä–æ—Å–æ–≤. –¢–≤–æ—è –∑–∞–¥–∞—á–∞ ‚Äî –∏–∑–≤–ª–µ—á—å –∏–∑ —Ç–µ–∫—Å—Ç–∞ —Å–ª–µ–¥—É—é—â–∏–µ –¥–∞–Ω–Ω—ã–µ:
–í —Ç–≤–æ–µ–π –ø–∞–º—è—Ç–∏ –Ω–µ—Ç –∞–±—Å–æ–ª—é—Ç–Ω–æ –Ω–∏–∫–∞–∫–∏—Ö –∏–≥—Ä.
üîπ –û–±—è–∑–∞—Ç–µ–ª—å–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ:
- –ù–∞–∑–≤–∞–Ω–∏–µ –ø–µ—Ä–≤–æ–π –∫–æ–º–∞–Ω–¥—ã (team_name)
- –ù–∞–∑–≤–∞–Ω–∏–µ –≤—Ç–æ—Ä–æ–π –∫–æ–º–∞–Ω–¥—ã (opponent_name)

üîπ –î–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ (–µ—Å–ª–∏ —É–∫–∞–∑–∞–Ω—ã):
- –î–∞—Ç–∞ –º–∞—Ç—á–∞ (date) ‚Äî –æ–±—è–∑–∞—Ç–µ–ª—å–Ω–æ –≤ —Ñ–æ—Ä–º–∞—Ç–µ dd/mm/yyyy (–Ω–∞–ø—Ä–∏–º–µ—Ä: 25/11/2024)
- –ù–∞–∑–≤–∞–Ω–∏–µ –∫–∞—Ä—Ç—ã (map_name)

–û—Ç–≤–µ—Ç –¥–æ–ª–∂–µ–Ω –±—ã—Ç—å –≤–æ–∑–≤—Ä–∞—â—ë–Ω —Å—Ç—Ä–æ–≥–æ –≤ —Ñ–æ—Ä–º–∞—Ç–µ JSON —Å–æ —Å–ª–µ–¥—É—é—â–∏–º–∏ –∫–ª—é—á–∞–º–∏:

{
  "team_name": "<–Ω–∞–∑–≤–∞–Ω–∏–µ –ø–µ—Ä–≤–æ–π –∫–æ–º–∞–Ω–¥—ã –∏–ª–∏ null>",
  "opponent_name": "<–Ω–∞–∑–≤–∞–Ω–∏–µ –≤—Ç–æ—Ä–æ–π –∫–æ–º–∞–Ω–¥—ã –∏–ª–∏ null>",
  "date": "<–¥–∞—Ç–∞ –≤ —Ñ–æ—Ä–º–∞—Ç–µ dd/mm/yyyy –∏–ª–∏ null>",
  "map_name": "<–Ω–∞–∑–≤–∞–Ω–∏–µ –∫–∞—Ä—Ç—ã –∏–ª–∏ null>"
}

–ï—Å–ª–∏ –∫–∞–∫–∏–µ-–ª–∏–±–æ –∏–∑ –¥–∞–Ω–Ω—ã—Ö –æ—Ç—Å—É—Ç—Å—Ç–≤—É—é—Ç –≤ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å—Å–∫–æ–º –∑–∞–ø—Ä–æ—Å–µ, —É–∫–∞–∂–∏ –∑–Ω–∞—á–µ–Ω–∏–µ `null` (—ç—Ç–æ JSON null, –±–µ–∑ –∫–∞–≤—ã—á–µ–∫). –ù–µ –¥–æ–±–∞–≤–ª—è–π –Ω–∏–∫–∞–∫–∏—Ö –∫–æ–º–º–µ–Ω—Ç–∞—Ä–∏–µ–≤ –∏–ª–∏ —Ç–µ–∫—Å—Ç–∞ –≤–Ω–µ JSON ‚Äî —Ç–æ–ª—å–∫–æ —á–∏—Å—Ç—ã–π JSON-–æ–±—ä–µ–∫—Ç.
–ï—Å–ª–∏ –Ω–∞–∑–≤–∞–Ω–∏—è –∫–æ–º–∞–Ω–¥—ã –Ω–µ—Ç, —Ç–æ —É–∫–∞–∂–∏ 'null'
–ï—Å–ª–∏ –Ω–∞–∑–≤–∞–Ω–∏—è –∫–æ–º–∞–Ω–¥—ã –Ω–µ—Ç, —Ç–æ —É–∫–∞–∂–∏ 'null'
–ï—Å–ª–∏ –Ω–∞–∑–≤–∞–Ω–∏—è –∫–æ–º–∞–Ω–¥—ã –Ω–µ—Ç, —Ç–æ —É–∫–∞–∂–∏ 'null'
–ü—Ä–∏–º–µ—Ä –∫–æ—Ä—Ä–µ–∫—Ç–Ω–æ–≥–æ –æ—Ç–≤–µ—Ç–∞:

{
  "team_name": "Team Spirit",
  "opponent_name": "Natus Vincere",
  "date": "25/11/2024",
  "map_name": "Mirage"
}

–ë—É–¥—å –≤–Ω–∏–º–∞—Ç–µ–ª–µ–Ω –∫ —Ñ–æ—Ä–º—É–ª–∏—Ä–æ–≤–∫–∞–º ‚Äî –Ω–∞–∑–≤–∞–Ω–∏—è –∫–æ–º–∞–Ω–¥, –¥–∞—Ç—É –∏ –∫–∞—Ä—Ç—É –º–æ–≥—É—Ç —É–∫–∞–∑—ã–≤–∞—Ç—å –≤ —Å–≤–æ–±–æ–¥–Ω–æ–π —Ñ–æ—Ä–º–µ. –¢–≤–æ—è –∑–∞–¥–∞—á–∞ ‚Äî —Ç–æ—á–Ω–æ –∏–∑–≤–ª–µ—á—å –¥–∞–Ω–Ω—ã–µ –∏ –≤–µ—Ä–Ω—É—Ç—å –∏—Ö –≤ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ–º —Ñ–æ—Ä–º–∞—Ç–µ.
"""

test_case_match_pred_1 = llm_json_mode.invoke(
    [SystemMessage(content=match_predictor_prompt)]
    + [
        HumanMessage(
            content="–Ω–∞—Å–∫–æ–ª—å–∫–æ –±–ª–∏–∑–∫–æ–π –±—ã–ª–∞ –∏–≥—Ä–∞ Navi - G2 –Ω–∞ –∫–∞—Ä—Ç–µ –º–∏—Ä–∞–∂"
        )
    ]
)
test_case_match_pred_2 = llm_json_mode.invoke(
    [SystemMessage(content=match_predictor_prompt)]
    + [HumanMessage(content="–ø—Ä–æ–∞–Ω–∞–ª–∏–∑–∏—Ä—É–π –∏–≥—Ä—É Team Spirit –ø—Ä–æ—Ç–∏–≤ Virtus Pro 25 –æ–∫—Ç—è–±—Ä—è 2023 –≥–æ–¥–∞")]
)
test_case_match_pred_3 = llm_json_mode.invoke(       # fix this ffs
    [SystemMessage(content=match_predictor_prompt)]
    + [HumanMessage(content="–ü—Ä–æ–∞–Ω–∞–ª–∏–∑–∏—Ä—É–π –≤—á–µ—Ä–∞—à–Ω—é—é –∏–≥—Ä—É")]
)
print(
    json.loads(test_case_match_pred_1.content),
    json.loads(test_case_match_pred_2.content),
    json.loads(test_case_match_pred_3.content),
)

hard_test_case = llm_json_mode.invoke([SystemMessage(content=test_case_prompt)]
    + [HumanMessage(content="–ü—Ä–æ–∞–Ω–∞–ª–∏–∑–∏—Ä—É–π –≤—á–µ—Ä–∞—à–Ω—é—é –∏–≥—Ä—É")]
)

print(json.loads(hard_test_case.content))

{'team_name': 'Natus Vincere', 'opponent_name': 'G2 Esports', 'date': None, 'map_name': 'Mirage'} {'team_name': 'Team Spirit', 'opponent_name': 'Virtus Pro', 'date': '25/10/2023', 'map_name': None} {'team_name': None, 'opponent_name': None, 'date': 'null', 'map_name': None}
{'team_name': 'null', 'opponent_name': 'null'}


In [11]:
import operator
from typing_extensions import TypedDict
from typing import List, Annotated

class GraphState(TypedDict):
    initial_prompt: str # initial user prompt
    generation: str # LLM generation
    max_retries: int # max number of retries for answering
    answers: int # number of answers generated
    loop_step: Annotated[int, operator.add] # have to use annotated int since using default int will lead into multiple edges not being able to combine values properly (and some other stuff)
    source: List[str] # stats or any other retrieved valuables
    #source: dict
    #extra_source: dict
    extra_source: List[str] #i ve made this source just to be sure it is not overridden, del later or fix

In [30]:
match_doc = match_retriever_certain.invoke('Heroic –ø—Ä–æ—Ç–∏–≤ G2')
def match_metadata_fetch(match_doc):
    matchdoc_data = match_doc[0].metadata
    team_id = str(re.sub(r'\.\d*', '', matchdoc_data['team_id']))
    opponent_id = re.sub(r'\.\d*', '', matchdoc_data['opponent_id'])
    map_name = matchdoc_data['map_name'] #dont ask why those str turned into float
    return team_id, opponent_id, map_name

team_id, opponent_id, map_name = match_metadata_fetch(match_doc)


def team_map_stats(team_id, opponent_id, map_name):
    response = collection_team_map.query.fetch_objects(
        filters=(Filter.by_property("team_id").equal(team_id)) & Filter.by_property("map_name").equal(map_name)
    )
    if response.objects:
        team1_winrate = response.objects[0].properties['winrate']
        team1_pickrate = response.objects[0].properties['pickrate']
        team1_banrate = response.objects[0].properties['banrate']

    response = collection_team_map.query.fetch_objects(
        filters=(Filter.by_property("team_id").equal(opponent_id)) & Filter.by_property("map_name").equal(map_name)
    )
    if response.objects:
        team2_winrate = response.objects[0].properties['winrate']
        team2_pickrate = response.objects[0].properties['pickrate']
        team2_banrate = response.objects[0].properties['banrate']
    df = pd.DataFrame([[
        team1_winrate,
        team1_pickrate,
        team1_banrate,
        team2_winrate,
        team2_pickrate,
        team2_banrate
    ]], columns=[
        'team_map_winrate',
        'team_map_pickrate',
        'team_map_banrate',
        'opponent_map_winrate',
        'opponent_map_pickrate',
        'opponent_map_banrate'
    ])

    return df

sample_map_stats = team_map_stats(team_id, opponent_id, map_name)

pred = closeness_analyzer_model.predict(sample_map_stats)
labels = ["One-sided win", "Close win", "Close loss", "One-sided loss"]
print("Prediction:", labels[int(pred[0])])

Prediction: Close win


  print("Prediction:", labels[int(pred[0])])


In [13]:
# this is query + filter
# it is not working
# why?
# because i fucked up big time
# how?
# by tweaking manual embedding

"""sample_source = {
    "date":'null',
    "map_name":'Mirage'
}

sample_query = 'Heroic –ø—Ä–æ—Ç–∏–≤ G2 –Ω–∞ Mirage'

filter_sample = []
if sample_source['date'] != 'null' or sample_source['date'] != 'None':
    filter_sample.append(Filter.by_property("date").equal(sample_source["date"]))
if sample_source['map_name'] != 'null' or sample_source['map_name'] != 'None':
    filter_sample.append(Filter.by_property("map_name").equal(sample_source["map_name"]))

combined_filter = None
if filter_sample:
    combined_filter = Filter.all_of([*filter_sample])
response = collection_match.query.near_text(
    query=sample_query,
    filters=combined_filter,
    limit=3
)

print(response)"""

'sample_source = {\n    "date":\'null\',\n    "map_name":\'Mirage\'\n}\n\nsample_query = \'Heroic –ø—Ä–æ—Ç–∏–≤ G2 –Ω–∞ Mirage\'\n\nfilter_sample = []\nif sample_source[\'date\'] != \'null\' or sample_source[\'date\'] != \'None\':\n    filter_sample.append(Filter.by_property("date").equal(sample_source["date"]))\nif sample_source[\'map_name\'] != \'null\' or sample_source[\'map_name\'] != \'None\':\n    filter_sample.append(Filter.by_property("map_name").equal(sample_source["map_name"]))\n\ncombined_filter = None\nif filter_sample:\n    combined_filter = Filter.all_of([*filter_sample])\nresponse = collection_match.query.near_text(\n    query=sample_query,\n    filters=combined_filter,\n    limit=3\n)\n\nprint(response)'

In [14]:

# –æ—á–µ–Ω—å –≥–ª—É–ø—ã–π –∏ –ø—Ä–æ—Å—Ç–æ–π –ø—Ä–æ–º–ø—Ç 
predictor_final_prompt = """–¢—ã ‚Äî —ç–∫—Å–ø–µ—Ä—Ç –ø–æ –∞–Ω–∞–ª–∏–∑—É –º–∞—Ç—á–µ–π –≤ –∫–∏–±–µ—Ä—Å–ø–æ—Ä—Ç–µ, —Ä–∞–±–æ—Ç–∞—é—â–∏–π –Ω–∞–¥ –∑–∞–¥–∞—á–∞–º–∏ –≤–æ–ø—Ä–æ—Å-–æ—Ç–≤–µ—Ç –≤ —Ä–∞–º–∫–∞—Ö –∏–Ω—Ç–µ–ª–ª–µ–∫—Ç—É–∞–ª—å–Ω–æ–≥–æ –∞–≥–µ–Ω—Ç–∞.

–í–æ—Ç –¥–æ—Å—Ç–æ–≤–µ—Ä–Ω—ã–π –∫–æ–Ω—Ç–µ–∫—Å—Ç, –∫–æ—Ç–æ—Ä—ã–π –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –ø—Ä–∏ —Ñ–æ—Ä–º—É–ª–∏—Ä–æ–≤–∞–Ω–∏–∏ –æ—Ç–≤–µ—Ç–∞:

{context}

–ù–∏–∂–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å—Å–∫–∏–π –∑–∞–ø—Ä–æ—Å, –æ–ø–∏—Å—ã–≤–∞—é—â–∏–π –∏–Ω—Ç–µ—Ä–µ—Å—É—é—â–∏–π –º–∞—Ç—á:

{user_prompt}

–¢–∞–∫–∂–µ –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–ª–µ–Ω—ã –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã–µ —Å—Ç–∞—Ç–∏—Å—Ç–∏—á–µ—Å–∫–∏–µ –¥–∞–Ω–Ω—ã–µ –ø–æ –º–∞—Ç—á—É:

{extra_source}

–î–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ —Å–æ–¥–µ—Ä–∂–∞—Ç —á–∏—Å–ª–æ–≤—É—é —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫—É –ø–æ –∫–∞—Ä—Ç–µ –≤ —Å–ª–µ–¥—É—é—â–µ–º –ø–æ—Ä—è–¥–∫–µ:
- team_map_winrate ‚Äî –≤–∏–Ω—Ä–µ–π—Ç –∫–æ–º–∞–Ω–¥—ã –Ω–∞ –∫–∞—Ä—Ç–µ
- team_map_pickrate ‚Äî —á–∞—Å—Ç–æ—Ç–∞ –ø–∏–∫–∞ –∫–∞—Ä—Ç—ã –∫–æ–º–∞–Ω–¥–æ–π
- team_map_banrate ‚Äî —á–∞—Å—Ç–æ—Ç–∞ –±–∞–Ω–∞ –∫–∞—Ä—Ç—ã –∫–æ–º–∞–Ω–¥–æ–π
- opponent_map_winrate ‚Äî –≤–∏–Ω—Ä–µ–π—Ç —Å–æ–ø–µ—Ä–Ω–∏–∫–∞ –Ω–∞ –∫–∞—Ä—Ç–µ
- opponent_map_pickrate ‚Äî —á–∞—Å—Ç–æ—Ç–∞ –ø–∏–∫–∞ –∫–∞—Ä—Ç—ã —Å–æ–ø–µ—Ä–Ω–∏–∫–æ–º
- opponent_map_banrate ‚Äî —á–∞—Å—Ç–æ—Ç–∞ –±–∞–Ω–∞ –∫–∞—Ä—Ç—ã —Å–æ–ø–µ—Ä–Ω–∏–∫–æ–º

–¢–≤–æ—è –∑–∞–¥–∞—á–∞:
1. –ü—Ä–∏–Ω—è—Ç—å –∫–æ–Ω—Ç–µ–∫—Å—Ç –∫–∞–∫ –ø–æ–ª–Ω–æ—Å—Ç—å—é –¥–æ—Å—Ç–æ–≤–µ—Ä–Ω—É—é –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é.
2. –ò—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫—É –∏–∑ –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω–æ–≥–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞, —á—Ç–æ–±—ã –æ–±—ä—è—Å–Ω–∏—Ç—å, –ø–æ—á–µ–º—É –º–∞—Ç—á —Å–ª–æ–∂–∏–ª—Å—è –∏–º–µ–Ω–Ω–æ —Ç–∞–∫.
3. –£—á–∏—Ç—ã–≤–∞—Ç—å —Å–æ–¥–µ—Ä–∂–∞–Ω–∏–µ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å—Å–∫–æ–≥–æ –∑–∞–ø—Ä–æ—Å–∞ –ø—Ä–∏ —Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏–∏ –æ–±—ä—è—Å–Ω–µ–Ω–∏—è.
4. –ù–µ –ø—Ä–∏–¥—É–º—ã–≤–∞—Ç—å –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –∏ –Ω–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –≤–Ω–µ—à–Ω–∏–µ –∏—Å—Ç–æ—á–Ω–∏–∫–∏ ‚Äî –æ–ø–∏—Ä–∞–π—Å—è —Ç–æ–ª—å–∫–æ –Ω–∞ –¥–∞–Ω–Ω—ã–µ, –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–Ω—ã–µ –≤—ã—à–µ.

–î–∞–π —á—ë—Ç–∫–∏–π –∏ –ª–æ–≥–∏—á–Ω—ã–π –æ—Ç–≤–µ—Ç –Ω–∞ —Ä—É—Å—Å–∫–æ–º —è–∑—ã–∫–µ. –ú–∞–∫—Å–∏–º—É–º 3 –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏—è. –û—Ç–≤–µ—Ç –¥–æ–ª–∂–µ–Ω –±—ã—Ç—å –∫—Ä–∞—Ç–∫–∏–º –∏ –ø–æ —Å—É—â–µ—Å—Ç–≤—É.

–û—Ç–≤–µ—Ç:"""

In [47]:
from langchain.schema import Document #for docs but remove before pushing
from langgraph.graph import END


# nodes
def retrieve_for_match(state):
    # –Ω–∞–¥–æ –Ω–∞–π—Ç–∏ –º–∞—Ç—á, –∏—Å–ø–æ–ª—å–∑—É—è —Ñ–∏–ª—å—Ç—Ä—ã
    # –Ω–æ —Ñ–∏–ª—å—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –Ω–µ —É–¥–∞–ª–æ—Å—å, –ø–æ—Ç–æ–º—É —á—Ç–æ —è –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª –º–∞–Ω—É–∞–ª —ç–º–±–µ–¥–¥–∏–Ω–≥ —Ä–∞–Ω–µ–µ (—Ö–∞–∞—Ö–∞—Ö–∞—Ö–∞—Ö–∞—Ö–∞—Ö)
    # –∏ —Ç–µ–ø–µ—Ä—å —É –º–µ–Ω—è –ø—Ä–æ–±–ª–µ–º–∞, –≤ –æ–¥–Ω–æ–π –∏–∑ –∑–∞–∫–æ–º–µ–Ω—á–µ–Ω–Ω—ã—Ö cells –≤—ã—à–µ –æ –Ω–µ–π –Ω–∞–ø–∏—Å–∞–Ω–æ
    # –ø–æ—ç—Ç–æ–º—É –º—ã —Ç—É–ø–æ –Ω–∞—Ö–æ–¥–∏–º –ø–æ –≤–µ–∫—Ç–æ—Ä—É –¥–µ—Å–∫—Ä–∏–ø—à–Ω–∞
    # –∑–∞–±–∏–≤–∞—è, –∫ —Å–æ–∂–∞–ª–µ–Ω–∏—é, –Ω–∞ —Ñ–∏–ª—å—Ç—Ä—ã
    source_raw = match_retriever_certain.invoke(state['initial_prompt'])
    # –Ω–∞—à–ª–∏ –º–∞—Ç—á –∏ –¥–∞–ª–µ–µ —Ä–∞–±–æ—Ç–∞–µ–º —Å —ç—Ç–∏–º
    matchdoc_data = source_raw[0].metadata # –∏ —Ç—É—Ç –º—ã –±–µ—Ä–µ–º –≤—Å–µ–≥–æ –ª–∏—à—å –ø–µ—Ä–≤—ã–π –æ–±—ä–µ–∫—Ç, —Ö–æ—Ç—è –º—ã –µ–≥–æ –±–µ—Ä–µ–º –µ—â–µ —Å –ø–æ–º–æ—â—å—é top k, –≥–¥–µ k=1 –∏—Å–ø–æ–ª—å–∑—É—è retriever_certain
    team_id = str(re.sub(r'\.\d*', '', matchdoc_data['team_id']))
    opponent_id = re.sub(r'\.\d*', '', matchdoc_data['opponent_id'])
    map_name = matchdoc_data['map_name'] #dont ask why those str turned into float
    features_map_stats = team_map_stats(team_id, opponent_id, map_name) # –ø–æ —Ö–æ—Ä–æ—à–µ–º—É –Ω—É–∂–Ω–æ —Ä–µ—Ñ–∞–∫—Ç–æ—Ä–∏—Ç—å –∫–æ–¥, —á—Ç–æ–±—ã —Ç–∞ —Ñ—É–Ω–∫—Ü–∏—è –±—ã–ª–∞ –≥–¥–µ –Ω–∏–±—É–¥—å –∑–¥–µ—Å—å)
    pred = closeness_analyzer_model.predict(features_map_stats)
    labels = ["–û–¥–Ω–æ—Å—Ç–æ—Ä–æ–Ω–Ω—è—è –ø–æ–±–µ–¥–∞", "–ë–ª–∏–∑–∫–∞—è –ø–æ–±–µ–¥–∞", "–ë–ª–∏–∑–∫–æ–µ –ø–æ—Ä–∞–∂–µ–Ω–∏–µ", "–û–¥–Ω–æ—Å—Ç–æ—Ä–æ–Ω–Ω–µ–µ –ø–æ—Ä–∞–∂–µ–Ω–∏–µ"]
    source = labels[int(pred[0])]
    features_map_stats = features_map_stats.iloc[0].to_dict()
    extra_source = features_map_stats
    return {"source": source, "extra_source": extra_source}

def predict_match_first(state):
    # –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å –∑–∞–¥–∞–ª –≤–æ–ø—Ä–æ—Å –∏ –ø–æ–ø–∞–ª –≤ —ç—Ç—É –≤–µ—Ç–∫—É
    # –¥–ª—è –Ω–∞—á–∞–ª–∞ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ –ø—Ä–æ–≤–µ—Ä–∏—Ç—å –ø–æ–ª–Ω–æ—Ç—É –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –ø—Ä–æ–≤–µ–¥–µ–Ω–∏—è –∞–Ω–∞–ª–∏—Ç–∏–∫–∏, –≤ –Ω–∞—à–µ–º —Å–ª—É—á–∞–µ –Ω–∞–º –Ω—É–∂–Ω—ã: 
    #   –Ω–∞–∑–≤–∞–Ω–∏—è –¥–≤—É—Ö –∫–æ–º–∞–Ω–¥, 
    #   –¥–∞—Ç–∞
    #       –µ—Å–ª–∏ –Ω–µ —É–∫–∞–∑–∞–Ω–∞, —Ç–æ –±–µ—Ä–µ—Ç—Å—è –ª—é–±–∞—è (–∏—Å–ø—Ä–∞–≤–∏—Ç—å –Ω–∞ —Å–∞–º—É—é –Ω–µ–¥–∞–≤–Ω—é—é –≤ –¥–∞–ª—å–Ω–µ–π—à–µ–º)
    #       (–≤ –¥–∞–ª—å–Ω–µ–π—à–µ–º –¥–æ–±–∞–≤–∏—Ç—å) –µ—Å–ª–∏ –¥–∞—Ç–∞ —É–∫–∞–∑–∞–Ω–∞, –Ω–æ –º–∞—Ç—á–∞ –Ω–µ—Ç, —Ç–æ –Ω–∞–ø–∏—Å–∞—Ç—å –æ–± —ç—Ç–æ–º –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—é (–∏–∑–≤–ª–µ—á–µ–Ω–∏–µ –∏ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ –¥–∞—Ç—ã —Å –ø–æ–º–æ—â—å—é –ª–ª–º –∏ –¥–∞–ª—å–Ω–µ–π—à–µ–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ —Ñ–∏–ª—å—Ç—Ä–∞ weaviate)
    #   –∫–∞—Ä—Ç–∞
    #       –µ—Å–ª–∏ –∫–∞—Ä—Ç–∞ –Ω–µ —É–∫–∞–∑–∞–Ω–∞, —Ç–æ –±–µ—Ä–µ—Ç—Å—è —Ç–æ–ø3 –∫ –∏ –ø–µ—Ä–µ—á–∏—Å–ª—è–µ—Ç—Å—è –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—é –¥–ª—è —É—Ç–æ—á–Ω–µ–Ω–∏—è (todo++++)
    #       –µ—Å–ª–∏ –∫–∞—Ä—Ç–∞ —É–∫–∞–∑–∞–Ω–∞ –∏ —É–∫–∞–∑–∞–Ω–∞ –¥–∞—Ç–∞, –Ω–æ –º–∞—Ç—á–∞ –Ω–µ –Ω–∞—à–ª–æ—Å—å, —Ç–æ –Ω–∞–ø–∏—Å–∞—Ç—å –æ —Ç–æ–º, —á—Ç–æ –¥–∞–Ω–Ω—ã–π –º–∞—Ç—á –Ω–µ –Ω–∞–π–¥–µ–Ω
    # –î–ª—è –Ω–∞—á–∞–ª–∞ –ª–ª–º –æ–ø—Ä–µ–¥–µ–ª–∏—Ç —É–∫–∞–∑–∞–Ω–∞ –ª–∏ –∫–∞—Ä—Ç–∞ –∏ –¥–∞—Ç–∞ –≤ –∑–∞–ø—Ä–æ—Å–µ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è
    # –¥–∞–ª–µ–µ –ø—Ä–æ–≤–µ—Ä—è–µ–º –Ω–∞–ª–∏—á–∏–µ –¥–∞–Ω–Ω—ã—Ö –ø–æ –∑–∞–ø—Ä–æ—Å—É –∏ –æ—Ç–¥–∞–µ–º –æ—Ç–≤–µ—Ç
    # 
    #initial_prompt = state["initial_prompt"]
    match_predictor_data_check = llm_json_mode.invoke(
    [SystemMessage(content=match_predictor_prompt)]
    + [HumanMessage(content=state['initial_prompt'])])
    source = json.loads(match_predictor_data_check.content)
    return {"source": source}
        #return "proceed_match_predict"
    #source = match_metadata_fetch(source)
    # —Å–Ω–∞—á–∞–ª–∞ –∑–¥–µ—Å—å –ø—Ä–æ–≤–µ—Ä–∏—Ç—å –ø–æ–ª–Ω–æ—Ç—É –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –ø—Ä–æ–≤–µ–¥–µ–Ω–∏—è –∞–Ω–∞–ª–∏—Ç–∏–∫–∏ (–¥–∞–Ω–Ω—ã–µ, –∫–æ—Ç–æ—Ä—ã–µ –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–∏–ª –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å)
    # –¥–∞–ª–µ–µ –≤—ã–∑–≤–∞—Ç—å —Ñ—É–Ω–∫—Ü–∏—é –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞ –º–æ–¥–µ–ª–∏, –Ω–∞–ø–∏—Å–∞–Ω–Ω–æ–π –¥–ª—è –∞–Ω–∞–ª–∏—Ç–∏–∫–∏
    # –¥–∞–ª–µ–µ –ø–µ—Ä–µ–¥–∞—Ç—å –≤–æ–ø—Ä–æ—Å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è –∏ –æ—Ç–≤–µ—Ç –º–æ–¥–µ–ª–∏ –≤ –≤–∏–¥–µ initial prompt = question –∏ source = context
    print("–ø—Ä–æ–∏–∑–æ—à–µ–ª –ø—Ä–µ–¥–∏–∫—Ç –º–∞—Ç—á–∞")
    #initial_prompt = state['initial_prompt']
    #source = state["source"]
    #loop_step = state.get("loop_step, 0")
    #return {"generation: generation, "loop_step": loop_step + 1}
    # pretty much the same goes for other analyzers, the only thing is the way it generates using premade prompt

def predict_match_second(state):
    # –Ω–∞ —ç—Ç–æ–º —ç—Ç–∞–ø–µ –º—ã –∑–Ω–∞–µ–º —á—Ç–æ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å –¥–∞–ª –±–æ–ª–µ–µ –º–µ–Ω–µ–µ –∫–æ—Ä—Ä–µ–∫—Ç–Ω—ã–π –∑–∞–ø—Ä–æ—Å
    initial_prompt = state['initial_prompt']
    source = state['source']
    extra_source = state['extra_source']
    formatted_prompt = predictor_final_prompt.format(
        context=source,
        user_prompt=initial_prompt,
        extra_source=json.dumps(extra_source, ensure_ascii=False))
    generation = llm.invoke([HumanMessage(content=formatted_prompt)])
    return {"generation": generation.content}

def rejector(state):
    print('–ù–µ –±—ã–ª–∏ —É–∫–∞–∑–∞–Ω—ã –Ω–∞–∑–≤–∞–Ω–∏—è –∫–æ–º–∞–Ω–¥ (–∏–ª–∏ –±—ã–ª–æ —É–∫–∞–∑–∞–Ω–æ –Ω–∞–∑–≤–∞–Ω–∏–µ —Ç–æ–ª—å–∫–æ –¥–ª—è –æ–¥–Ω–æ–π –∫–æ–º–∞–Ω–¥—ã)\n–ü–æ–≤—Ç–æ—Ä–∏—Ç–µ –≤–∞—à –∑–∞–ø—Ä–æ—Å, —É–∫–∞–∑–∞–≤ –Ω–∞–∑–≤–∞–Ω–∏—è –∫–æ–º–∞–Ω–¥')

def analyze_shape(state):
    print("–ø—Ä–æ–∏–∑–æ—à–ª–∞ –∞–Ω–∞–ª–∏—Ç–∏–∫–∞ —Ñ–æ—Ä–º—ã –∏–≥—Ä–æ–∫–∞ –∏–ª–∏ –∫–æ–º–∞–Ω–¥—ã")

def analyze_behaviour(state):
    print("–ø—Ä–æ–∏–∑–æ—à–ª–∞ –∞–Ω–∞–ª–∏—Ç–∏–∫–∞ –≤–Ω—É—Ç—Ä–∏–∏–≥—Ä–æ–≤–æ–≥–æ –ø–æ–≤–µ–¥–µ–Ω–∏—è –∫–æ–º–∞–Ω–¥—ã")

def fallback(state):
    # should finish the generation with proper apology
    print('–∏–∑–≤–∏–Ω–∏—Ç–µ—Å—å –∑–∞ –≤–∞—à –∑–∞–ø—Ä–æ—Å')

def decide_to_generate_predict_match(state):
    """
    –§—É–Ω–∫—Ü–∏—è —Ä–µ—à–∞–µ—Ç –¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ –ª–∏ –¥–∞–Ω–Ω—ã—Ö –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–∏–ª –¥–ª—è –ø—Ä–æ–≤–µ–¥–µ–Ω–∏—è –∞–Ω–∞–ª–∏—Ç–∏–∫–∏
    –ï—Å–ª–∏ –¥–∞–Ω–Ω—ã—Ö –Ω–µ–¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ, —Ç–æ —Å—Å—ã–ª–∞–µ—Ç—Å—è –Ω–∞ –¥—Ä—É–≥—É—é —Ñ—É–Ω–∫—Ü–∏—é, –∫–æ—Ç–æ—Ä–∞—è –∏—â–µ—Ç –¥–∞–Ω–Ω—ã–µ –≤ –¥–∞—Ç–∞—Å—Ç–æ—Ä–µ –∏–ª–∏ –±–¥
    (–≤–æ–∑–º–æ–∂–Ω–æ —Å—Ç–æ–∏—Ç –¥–æ—Ä–∞–±–æ—Ç–∞—Ç—å, —á—Ç–æ–±—ã –¥–∞–Ω–Ω—ã–µ –ø—Ä–æ–≤–µ—Ä—è–ª–∏—Å—å –Ω–∞ –ø—Ä–∞–≤–¥–∏–≤–æ—Å—Ç—å)
    –§—É–Ω–∫—Ü–∏—è –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç –±—É–ª–µ–≤–æ –∑–Ω–∞—á–µ–Ω–∏–µ.
    """
    initial_prompt = state["initial_prompt"]
    filtered_documents = state["documents"]

    




# edges
def route_questions(state):
    route_question = llm_json_mode.invoke(
        [SystemMessage(content=router_instructions_ru)]
        + [HumanMessage(content=state["initial_prompt"])]
    )
    source = json.loads(route_question.content)["node"]
    if source == "predictor":
        return "predict_match"
    elif source == "shape":
        return "analyze_shape"
    elif source == "team":
        return "analyze_behaviour"
    elif source == "fallback":
        return "fallback"


def decide_match_predict_route(state):
    source = state["source"]

    if source['team_name'] in ['null', 'None'] or source['opponent_name'] in ['null', 'None']:
        return "rejector"
    else:
        return "proceed_match_predict"



hallucination_grader_instructions = ''

# do not connect until graders are finished
def grade_generation(state):
    """ Decide whether the output is hallucinated or based on stats. Then determine if it is useful or not.
    """
    initial_prompt = state['initial_prompt']
    source = state['source']
    generation = state['generation']
    max_retries = state.get('max_retries', 2) # default to 2 retries

    hallucination_grader_instructions_formatted = hallucination_grader_instructions.format(
        source = 'smth', generation=generation.content
    )
    result = llm_json_mode.invoke(
        [SystemMessage(content=hallucination_grader_instructions)]
        + [HumanMessage(content=hallucination_grader_instructions_formatted)]
    )
    grade = json.loads(result.content)["binary_score"]
    

    if grade == 'yes':
        # check here if generation is full enough, useful and answers the question properly
        # similar to hallucination grader but with different prompt and output will be binary yes or no again
        if grade == 'yes':
            return 'useful'
        elif state['loop_step'] <= max_retries:
            return 'not_useful'
        else:
            return "max retries" #means model couldnt answer the question properly in given max retries
    elif state['loop_step'] <= max_retries:
        return 'hallucinated' # hallucinated therefore couldnt get useful stats and generated on their own or smth so this will be regeneration attempt
    else:
        return "max retries"


In [48]:
from langgraph.graph import StateGraph
from IPython.display import Image, display

workflow = StateGraph(GraphState)

workflow.add_node("retrieve_for_match", retrieve_for_match)  # retriever
workflow.add_node("predict_match", predict_match_first)  # predictor
workflow.add_node("analyze_shape", analyze_shape)
workflow.add_node("analyze_behaviour", analyze_behaviour)
workflow.add_node("fallback", fallback)
workflow.add_node('rejector', rejector)
workflow.add_node('predict_match_second', predict_match_second)

workflow.set_conditional_entry_point(
    route_questions,
    {
        "predict_match": "predict_match",
        "analyze_shape": "analyze_shape",
        "analyze_behaviour" : "analyze_behaviour",
        "fallback" : "fallback"
    },
)


#

workflow.add_conditional_edges(
    "predict_match",
    decide_match_predict_route,
    {
        "rejector": "rejector",
        "proceed_match_predict": "retrieve_for_match",
    },
)

workflow.add_edge("predict_match", "retrieve_for_match")
workflow.add_edge('retrieve_for_match', 'predict_match_second')

# Compile
graph = workflow.compile()
#display(Image(graph.get_graph().draw_mermaid_png())) #not displaying cuz not loading mermaid LOL


In [49]:
inputs = {"initial_prompt": "–ü–†–∏–≤–µ—Ç —è –∫–æ–∑–∞", "max_retries": 3}
for event in graph.stream(inputs, stream_mode="values"):
    print(event)
#inputs = {"initial_prompt": "–†–∞—Å—Å–∫–∞–∂–∏ –æ –∑–∞–≤—Ç—Ä–∞—à–Ω–µ–º –º–∞—Ç—á–µ Navi vs Falcons", "max_retries": 3}
#for event in graph.stream(inputs, stream_mode="values"):
#    print(event)

inputs = {"initial_prompt": "–†–∞—Å—Å–∫–∞–∂–∏ –æ –º–∞—Ç—á–µ heroic –ø—Ä–æ—Ç–∏–≤ Cloud9 –Ω–∞ –∫–∞—Ä—Ç–µ Nuke 24/11/23", "max_retries": 3}
for event in graph.stream(inputs, stream_mode="values"):
    print(event) 
inputs = {"initial_prompt": "–•–æ—Ä–æ—à–æ –ª–∏ —Å–µ–π—á–∞—Å –∏–≥—Ä–∞–µ—Ç Niko", "max_retries": 3}
for event in graph.stream(inputs, stream_mode="values"):
    print(event)

{'initial_prompt': '–ü–†–∏–≤–µ—Ç —è –∫–æ–∑–∞', 'max_retries': 3, 'loop_step': 0}
–∏–∑–≤–∏–Ω–∏—Ç–µ—Å—å –∑–∞ –≤–∞—à –∑–∞–ø—Ä–æ—Å
{'initial_prompt': '–†–∞—Å—Å–∫–∞–∂–∏ –æ –º–∞—Ç—á–µ heroic –ø—Ä–æ—Ç–∏–≤ Cloud9 –Ω–∞ –∫–∞—Ä—Ç–µ Nuke 24/11/23', 'max_retries': 3, 'loop_step': 0}
{'initial_prompt': '–†–∞—Å—Å–∫–∞–∂–∏ –æ –º–∞—Ç—á–µ heroic –ø—Ä–æ—Ç–∏–≤ Cloud9 –Ω–∞ –∫–∞—Ä—Ç–µ Nuke 24/11/23', 'max_retries': 3, 'loop_step': 0, 'source': {'team_name': 'Heroic', 'opponent_name': 'Cloud9', 'date': '24/11/2023', 'map_name': 'Nuke'}}
{'initial_prompt': '–†–∞—Å—Å–∫–∞–∂–∏ –æ –º–∞—Ç—á–µ heroic –ø—Ä–æ—Ç–∏–≤ Cloud9 –Ω–∞ –∫–∞—Ä—Ç–µ Nuke 24/11/23', 'max_retries': 3, 'loop_step': 0, 'source': '–û–¥–Ω–æ—Å—Ç–æ—Ä–æ–Ω–Ω—è—è –ø–æ–±–µ–¥–∞', 'extra_source': {'team_map_winrate': 41.9, 'team_map_pickrate': 14.7, 'team_map_banrate': 62.1, 'opponent_map_winrate': 64.3, 'opponent_map_pickrate': 43.3, 'opponent_map_banrate': 8.6}}


  source = labels[int(pred[0])]


{'initial_prompt': '–†–∞—Å—Å–∫–∞–∂–∏ –æ –º–∞—Ç—á–µ heroic –ø—Ä–æ—Ç–∏–≤ Cloud9 –Ω–∞ –∫–∞—Ä—Ç–µ Nuke 24/11/23', 'generation': '–ú–∞—Ç—á –º–µ–∂–¥—É –∫–æ–º–∞–Ω–¥–∞–º–∏ heroic –∏ Cloud9 –Ω–∞ –∫–∞—Ä—Ç–µ Nuke 24 –Ω–æ—è–±—Ä—è 2023 –≥–æ–¥–∞ —Å–ª–æ–∂–∏–ª—Å—è —Ç–∞–∫, –ø–æ—Ç–æ–º—É —á—Ç–æ Cloud9 –∏–º–µ–ª–∏ –±–æ–ª–µ–µ –≤—ã—Å–æ–∫–∏–π –≤–∏–Ω—Ä–µ–π—Ç (64.3%) –∏ —á–∞—Å—Ç–æ—Ç—É –ø–∏–∫–∞ –∫–∞—Ä—Ç—ã (43.3%), —á—Ç–æ –¥–∞–ª–æ –∏–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–æ –≤ –∏–≥—Ä–µ. –≠—Ç–æ, –≤–µ—Ä–æ—è—Ç–Ω–æ, —Å–ø–æ—Å–æ–±—Å—Ç–≤–æ–≤–∞–ª–æ –æ–¥–Ω–æ—Å—Ç–æ—Ä–æ–Ω–Ω–µ–π –ø–æ–±–µ–¥–µ Cloud9.', 'max_retries': 3, 'loop_step': 0, 'source': '–û–¥–Ω–æ—Å—Ç–æ—Ä–æ–Ω–Ω—è—è –ø–æ–±–µ–¥–∞', 'extra_source': {'team_map_winrate': 41.9, 'team_map_pickrate': 14.7, 'team_map_banrate': 62.1, 'opponent_map_winrate': 64.3, 'opponent_map_pickrate': 43.3, 'opponent_map_banrate': 8.6}}
{'initial_prompt': '–•–æ—Ä–æ—à–æ –ª–∏ —Å–µ–π—á–∞—Å –∏–≥—Ä–∞–µ—Ç Niko', 'max_retries': 3, 'loop_step': 0}
–ø—Ä–æ–∏–∑–æ—à–ª–∞ –∞–Ω–∞–ª–∏—Ç–∏–∫–∞ —Ñ–æ—Ä–º—ã –∏–≥—Ä–æ–∫–∞ –∏–ª