In [1]:
%pip install langchain langchain-community langchain-huggingface neo4j pandas spacy trans
!python -m spacy download en_core_web_sm





[notice] A new release of pip is available: 23.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB 1.3 MB/s eta 0:00:10
     ---------------------------------------- 0.1/12.8 MB 1.3 MB/s eta 0:00:10
      --------------------------------------- 0.2/12.8 MB 1.5 MB/s eta 0:00:09
      --------------------------------------- 0.3/12.8 MB 2.0 MB/s eta 0:00:07
     - -------------------------------------- 0.4/12.8 MB 2.2 MB/s eta 0:00:06
     - -------------------------------------- 0.5/12.8 MB 2.3 MB/s eta 0:00:06
     -- ------------------------------------- 0.7/12.8 MB 2.7 MB/s eta 0:00:05
     -- ------------------------------------- 0.8/12.8 MB 2.7 MB/s eta 0:00:05
     -- ------------------------------------- 0.9/12.8 MB 2.8 MB/s eta 0:00:05
     --- --------------------------------


[notice] A new release of pip is available: 23.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from langchain_community.graphs import Neo4jGraph
from neo4j import GraphDatabase

In [3]:
config = {}

with open('config.txt', 'r') as file:
    for line in file:
        if "=" in line:
            key, value = line.split('=', 1)
            config[key.strip()] = value.strip()

uri = config.get('URI')
username = config.get('USERNAME')
password = config.get('PASSWORD')
driver = GraphDatabase.driver(uri, auth=(username, password))
print("Connected to Neo4j database")

Connected to Neo4j database


In [5]:
# Connect using the LangChain wrapper
graph = Neo4jGraph(
    url=uri,
    username=username,
    password=password,
    refresh_schema= False
)
# Ensure the connection is working by running a quick query (optional)
print(graph.query("MATCH (s:Season) RETURN s"))

[{'s': {'season_name': '2021-22'}}, {'s': {'season_name': '2022-23'}}]


In [6]:
import spacy
import re

nlp = spacy.load("en_core_web_sm")
# Load the kb from the graph database (optional enhancement)
def load_fpl_kb(graph: Neo4jGraph) -> dict:
    kb = {
        "players": [],
        "teams": [],
        "positions": ["gk","gkp", "def", "mid", "fwd", "goalkeeper", "defender", "midfielder", "forward"],
        "stats": {}
    }

    # Load players
    player_results = graph.query("MATCH (p:Player) RETURN p.player_name AS name")
    kb["players"] = [record["name"].lower() for record in player_results]

    # Load teams
    team_results = graph.query("MATCH (t:Team) RETURN t.name AS name")
    kb["teams"] = [record["name"].lower() for record in team_results]

    # Load stats mapping
    kb["stats"] = {
        "points": "total_points",
        "goals": "goals_scored",
        "assists": "assists",
        "minutes": "minutes",
        "bonus": "bonus",
        "influence": "influence",
        "creativity": "creativity",
        "threat": "threat",
        "ict": "ict_index",
        "clean sheets": "clean_sheets",
        "form": "form"
    }

    return kb

In [7]:
FPL_KB = load_fpl_kb(graph)

In [None]:
import transformers
import torch
from transformers import pipeline
classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [None]:
INTENTS = {
    "fixture_details": [
        "fixture", "fixtures", "when do", "when does", "when is", "play next", 
        "next match", "kickoff", "schedule", "upcoming match", "future match"
    ],

    "best_players_by_metric": [
        "top", "best", "highest", "leader", "rank", "ranking", 
        "top scorer", "top assist", "highest points", "most points", "stat leaders", "top players","best forward",
        "best midfielder", "best defender", "best goalkeeper","top number","best number"
    ],
    "player_or_team_performance": [
        "how did", "performance", "stats", "statistics", "record", "scored", 
        "assists", "goals", "points", "clean sheets", "how many", 
        "results","compare", "vs", "versus", "better than", "head to head", "compare stats", "comparison","more than","compare player1 and player2"
    ]
}

In [None]:
def classify_fpl_intents(query: str):
    """
    Returns the highest confidence intent
    """
    candidate_labels = list(INTENTS.keys())
    result = classifier(query, candidate_labels, multi_label=True)

    return result["labels"][0]  # the top intent

In [None]:
ENTITY_LOOKUP = {}

def add_to_lookup(terms, category):
    for item in terms:
        # If it's a dict (like stats), the item is the key, canonical is the value
        if isinstance(terms, dict):
            value = terms[item]
            key = item
        else:
            value = item.title()
            key = item
        
        ENTITY_LOOKUP[key.lower()] = (category, value)
add_to_lookup(FPL_KB["players"], "player")
add_to_lookup(FPL_KB["teams"], "team")
add_to_lookup(FPL_KB["positions"], "position")
add_to_lookup(FPL_KB["stats"], "stat")

In [None]:
def extract_fpl_entities(query: str) -> dict:
    
    doc = nlp(query)
    entities = {
        "stat_type": "total_points" # Default fallback
    }
    
    query_lower = query.lower()
    
    for token in doc:
        text = token.text.lower()
        lemma = token.lemma_.lower()
        
        match = ENTITY_LOOKUP.get(text) or ENTITY_LOOKUP.get(lemma)
        
        if match:
            category, value = match
            
            # 1. Handle Players
            if category == "player":
                if "player1" not in entities:
                    entities["player1"] = value
                    entities["player_name"] = value
                elif entities["player1"] != value: # Avoid self-match
                    entities["player2"] = value
            
            # 2. Handle Teams
            elif category == "team":
                if "team1" not in entities:
                    entities["team1"] = value
                    entities["team_name"] = value
                elif entities["team1"] != value:
                    entities["team2"] = value
            
            # 3. Handle Positions
            elif category == "position":
                # Normalize Aliases
                norm = value.upper()
                if "MID" in norm: norm = "MID"
                elif "FWD" in norm or "FORWARD" in norm: norm = "FWD"
                elif "DEF" in norm: norm = "DEF"
                elif "GK" in norm or "GKP" in norm or "GOALKEEPER" in norm: norm = "GKP"
                entities["position"] = norm
            
            # 4. Handle Stats
            elif category == "stat":
                entities["stat_type"] = value

        # Check for numeric values that are NOT part of the season or GW (simple heuristic)
        if token.like_num:
            # Avoid overwriting if regex captured it, but here we capture loose numbers
            # e.g. "more than 5"
            try:
                val = float(token.text)
                # Heuristic: Filter values are usually small integers (< 2000) unlike years
                if val < 1000 and "gw" not in query_lower: 
                     entities["filter_value"] = val
            except:
                pass

    # B. Regex Extraction (Best for strict patterns like 'GW 10' or Years)
    
    # Extract "Gameweek X" or "GW X"
    gw_match = re.search(r"(?:gw|gameweek)\s*(\d+)", query_lower)
    if gw_match:
        entities["gw_number"] = int(gw_match.group(1))

    # Extract Season (e.g., "2022", "23/24")
    season_match = re.search(r"(20\d{2}-\d{2})", query_lower)
    if season_match:
        entities["season"] = season_match.group(1)

    return entities

In [None]:
def get_fpl_cypher_query(intent: str, entities: dict) -> str:
    """
    Generate Cypher query based on FPL intents + extracted entities
    """

    player1 = entities.get("player1")
    player2 = entities.get("player2")
    team1 = entities.get("team1")
    team2 = entities.get("team2")
    stat = entities.get("stat_type", "total_points")
    gw = entities.get("gw_number")
    limit = entities.get("limit", 10)
    season = entities.get("season", "2022-23")
    

    # ----------------------------------------------------------------------
    # 1) PERFORMANCE: Single Player
    # ----------------------------------------------------------------------
    if intent == "player_or_team_performance" and player1 and not player2:
        if gw:
            return f"""
                MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g:Gameweek {{GW_number:{gw}}})-[:HAS_FIXTURE]->(f:Fixture)
                MATCH (p:Player {{player_name:'{player1}'}})-[pi:PLAYED_IN]->(f)
                RETURN p.player_name AS player, pi.{stat} AS {stat}, g.GW_number AS gameweek
        
            """
        
        else:
            return f"""
                MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g)-[:HAS_FIXTURE]->(f)
                MATCH (p:Player {{player_name:'{player1}'}})-[pi:PLAYED_IN]->(f)
                RETURN p.player_name AS player, SUM(pi.{stat}) AS total_{stat}, '{season}' AS season
            """

    # ----------------------------------------------------------------------
    # 2) PERFORMANCE: Single Team Performance summary
    # ----------------------------------------------------------------------
    if intent == "player_or_team_performance" and team1 and not team2:
        if gw:
            return f"""
                MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g:Gameweek {{GW_number:{gw}}})-[:HAS_FIXTURE]->(f:Fixture)
                WHERE f.home_team = '{team1}' OR f.away_team = '{team1}'
                MATCH (p:Player)-[pi:PLAYED_IN]->(f)
                RETURN '{team1}' AS team, SUM(pi.{stat}) AS total_{stat}, g.GW_number AS gameweek
            """
        else:
            return f"""
                MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g)-[:HAS_FIXTURE]->(f)
                WHERE f.home_team = '{team1}' OR f.away_team = '{team1}'
                MATCH (p:Player)-[pi:PLAYED_IN]->(f)
                RETURN '{team1}' AS team, SUM(pi.{stat}) AS total_{stat}
            """

    # ----------------------------------------------------------------------
    # 3) PERFORMANCE: Compare Two Players
    # ----------------------------------------------------------------------
    if intent == "player_or_team_performance" and player1 and player2:
        return f"""
            MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g)-[:HAS_FIXTURE]->(f)
            MATCH (p:Player)-[pi:PLAYED_IN]->(f)
            WHERE p.player_name IN ['{player1}', '{player2}']
            RETURN p.player_name AS player, SUM(pi.{stat}) AS total_{stat}
        """

    # ----------------------------------------------------------------------
    # 4) PERFORMANCE: Compare Two Teams
    # ----------------------------------------------------------------------
    if intent == "player_or_team_performance" and team1 and team2:
        return f"""
            MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g)-[:HAS_FIXTURE]->(f)
            WHERE f.home_team IN ['{team1}', '{team2}'] OR f.away_team IN ['{team1}', '{team2}']
            MATCH (p:Player)-[pi:PLAYED_IN]->(f)
            RETURN CASE 
                WHEN f.home_team = '{team1}' OR f.away_team = '{team1}' THEN '{team1}'
                ELSE '{team2}' END AS team,
                SUM(pi.{stat}) AS total_{stat}
        """

    # ----------------------------------------------------------------------
    # 5) FIXTURE: Next Fixture by Team
    # ----------------------------------------------------------------------
    if intent == "fixture_details" and team1 and gw and not team2:
        return f"""
            MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g:Gameweek {{GW_number:{gw}}})-[:HAS_FIXTURE]->(f:Fixture)
            WHERE f.home_team = '{team1}' OR f.away_team = '{team1}'
            RETURN f.fixture_number AS fixture, f.kickoff_time, f.home_team, f.away_team
            ORDER BY f.kickoff_time ASC LIMIT 1
        """

    # ----------------------------------------------------------------------
    # 6) FIXTURE: Next Fixture involving Two Teams (Head-to-head)
    # ----------------------------------------------------------------------
    if intent == "fixture_details" and team1 and team2 and gw:
        return f"""
            MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g:Gameweek {{GW_number:{gw}}})-[:HAS_FIXTURE]->(f:Fixture)
            WHERE (f.home_team='{team1}' AND f.away_team='{team2}')
            OR (f.home_team='{team2}' AND f.away_team='{team1}')
            RETURN f.fixture_number AS fixture, f.kickoff_time, f.home_team, f.away_team
            ORDER BY f.kickoff_time ASC LIMIT 1
        """

    # ----------------------------------------------------------------------
    # 7) BEST PLAYERS BY METRIC: Overall
    # ----------------------------------------------------------------------
    if intent == "best_players_by_metric" and not entities.get("position"):
        return f"""
            MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g)-[:HAS_FIXTURE]->(f)
            MATCH (p:Player)-[pi:PLAYED_IN]->(f)
            RETURN p.player_name AS player, SUM(pi.{stat}) AS total_{stat}
            ORDER BY total_{stat} DESC LIMIT {limit}
        """

    # ----------------------------------------------------------------------
    # 8) BEST PLAYERS BY METRIC AND POSITION
    # ----------------------------------------------------------------------
    if intent == "best_players_by_metric" and entities.get("position"):
        position = entities["position"]
        return f"""
            MATCH (p:Player)-[:PLAYS_AS]->(pos:Position {{name:'{position}'}})
            MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g)-[:HAS_FIXTURE]->(f)
            MATCH (p)-[pi:PLAYED_IN]->(f)
            RETURN p.player_name AS player, SUM(pi.{stat}) AS total_{stat}, pos.name AS position
            ORDER BY total_{stat} DESC LIMIT {limit}
        """

    # ----------------------------------------------------------------------
    # 9) BEST PLAYERS — FILTER WHERE STAT ABOVE VALUE
    # ----------------------------------------------------------------------
    if intent == "best_players_by_metric" and entities.get("filter_value"):
        val = entities["filter_value"]
        return f"""
            MATCH (:Season {{season_name:'{season}'}})-[:HAS_GW]->(g)-[:HAS_FIXTURE]->(f)
            MATCH (p:Player)-[pi:PLAYED_IN]->(f)
            WITH p, SUM(pi.{stat}) AS total_stat
            WHERE total_stat > {val}
            RETURN p.player_name AS player, total_stat
            ORDER BY total_stat DESC LIMIT {limit}
        """

    # ----------------------------------------------------------------------
    # 10) FALLBACK
    # ----------------------------------------------------------------------
    return "-- Cannot map to any Cypher query based on extracted intent and entities"

In [17]:
def format_query_result(intent: str, result: list) -> str:
    if intent == "PLAYER_STATS_COMPARE":
        if result:
            rec = result[0]
            return (f"{rec['p1.player_name']} has {rec['P1_Stat']} points, "
                    f"while {rec['p2.player_name']} has {rec['P2_Stat']} points.")
        else:
            return "No data found for the players."

    elif intent == "PLAYER_STATS_TOP":
        response = "Top Players:\n"
        for rec in result:
            response += f"- {rec['p.player_name']}: {rec['TotalStat']} points\n"
        return response

    elif intent == "PLAYER_STATS_DETAIL_TOTAL":
        if result:
            rec = result[0]
            return (f"{rec['p.player_name']} has {rec['Points']} points, "
                    f"{rec['Goals']} goals, and {rec['Assists']} assists this season.")
        else:
            return "No data found for the player."

    elif intent == "TEAM_FIXTURE_FIND":
        if result:
            response = "Upcoming Fixtures:\n"
            for rec in result:
                response += f"- GW {rec['gw.GW_number']} on {rec['f.kickoff_time']}\n"
            return response
        else:
            return "No upcoming fixtures found."

    elif intent == "TEAM_FIXTURE_HISTORY":
        if result:
            rec = result[0]
            return (f"Fixture {rec['f.fixture_number']}: "
                    f"Home Score: {rec['f.home_score']}, Away Score: {rec['f.away_score']}.")
        else:
            return "No fixture history found between the teams."

    elif intent == "POSITION_RANKING":
        response = f"Top Players:\n"
        for rec in result:
            response += f"- {rec['p.player_name']}: {rec['PositionStat']} points\n"
        return response
    elif intent == "RECOMMENDATION_FORM":
        response = "Recommended Players Based on Recent Form:\n"
        for rec in result:
            response += f"- {rec['p.player_name']}: Avg Form {rec['AvgForm']}\n"
        return response
    elif intent == "BONUS_ANALYSIS":
        response = "Top Players by Bonus Points:\n"
        for rec in result:
            response += f"- {rec['p.player_name']}: {rec['TotalBonus']} bonus points\n"
        return response
    elif intent == "METADATA_QUERY":
        if result:
            rec = result[0]
            return f"Total number of players in the database: {rec['PlayerCount']}."
        else:
            return "No metadata available."
    elif intent == "PLAYER_STATS_DETAIL_GW":
        if result:
            rec = result[0]
            return (f"In GW {rec['gw.GW_number']}, {rec['p.player_name']} scored "
                    f"{rec['r.total_points']} points, with {rec['r.goals_scored']} goals "
                    f"and played {rec['r.minutes']} minutes.")
        else:
            return "No data found for the player in the specified gameweek."

    else:
        return "Query executed. Results retrieved."

## Embeddings

For creation of feature vector embeddings, text descriptions where constructed from the numerical features using the following query:

MATCH (p:Player)-[:PLAYS_AS]->(pos:Position)
MATCH (p)-[r:PLAYED_IN]->(f:Fixture)<-[:HAS_FIXTURE]-(gw:Gameweek)

// 2. Aggregate ALL stats
WITH p, pos, 
     count(r) as games_played,
     sum(r.total_points) as total_pts,
     sum(r.goals_scored) as goals,
     sum(r.assists) as assists,
     sum(r.minutes) as mins,
     sum(r.clean_sheets) as clean_sheets,
     sum(r.bonus) as bonus,
     sum(r.yellow_cards) as yellow,
     sum(r.red_cards) as red,
     sum(r.saves) as saves,
     round(avg(r.ict_index), 1) as avg_ict,
     round(avg(r.influence), 1) as avg_inf,
     round(avg(r.creativity), 1) as avg_cre,
     round(avg(r.threat), 1) as avg_thr,
     round(avg(r.form), 1) as avg_form

// 3. Construct the Rich Description
// We use descriptive sentences to help the LLM understand the context of the numbers
WITH p, 
     "Player Profile: " + p.player_name + " is a " + pos.name + ". " +
     "Performance: They accumulated " + toString(total_pts) + " total points, " +
     "scoring " + toString(goals) + " goals and providing " + toString(assists) + " assists. " +
     "Advanced Metrics: They had an average ICT Index of " + toString(avg_ict) + 
     " (Influence: " + toString(avg_inf) + ", Creativity: " + toString(avg_cre) + ", Threat: " + toString(avg_thr) + "). " +
     "Defensive & Discipline: They kept " + toString(clean_sheets) + " clean sheets, " +
     "made " + toString(saves) + " saves, and received " + toString(yellow) + " yellow cards. " +
     "Form & Impact: Their average form was " + toString(avg_form) + " and they earned " + toString(bonus) + " bonus points." 
     AS text

// 4. Write to the Player Node
SET p.fpl_features = text
RETURN p.player_name, p.fpl_features LIMIT 3

Then embeddings where created for these texts

In [28]:
def reset_vector_index(index_name, label, property_name, dimension):
    try:
        graph.query(f"DROP INDEX {index_name} IF EXISTS")
    except Exception as e:
        print(f"   - Warning dropping index: {e}")

    # 2. Create the new index
    create_query = f"""
    CREATE VECTOR INDEX {index_name}
    FOR (n:{label})
    ON (n.{property_name})
    OPTIONS {{indexConfig: {{
      `vector.dimensions`: {dimension},
      `vector.similarity_function`: 'cosine'
    }}}}
    """
    
    try:
        graph.query(create_query)
    except Exception as e:
        print(f"   - ❌ Error creating index: {e}")

In [29]:
def generate_feature_vector_embeddings(graph: Neo4jGraph, embedding_model):

    fetch_query = """
    MATCH (p:Player)
    WHERE p.fpl_features IS NOT NULL
    RETURN p.player_name AS name, p.fpl_features AS text
    """
    data = graph.query(fetch_query)

    update_query = """
    MATCH (p:Player {player_name: $name})
    SET p.feature_vector_embedding = $embedding
    """

    for row in data:
        vector = embedding_model.embed_query(row['text'])
        graph.query(update_query, {'name': row['name'], 'embedding': vector})

    reset_vector_index(
        index_name="player_feature_index", 
        label="Player", 
        property_name="feature_vector_embedding", 
        dimension=len(vector)
    )

    print("Feature Vector Embeddings generation complete!")

In [30]:
from langchain_huggingface import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
generate_feature_vector_embeddings(graph, embedding_model)

Feature Vector Embeddings generation complete!


In [31]:
from langchain_community.vectorstores import Neo4jVector

def retrieve_embedding_search(query: str, embeddings_model):

    vector_store = Neo4jVector.from_existing_index(
        embedding=embeddings_model,  
        url=config.get('URI'),
        username=config.get('USERNAME'),
        password=config.get('PASSWORD'),
        index_name="player_feature_index",
        node_label="Player",
        embedding_node_property="feature_vector_embedding",
        text_node_property="fpl_features",
    )

    results = vector_store.similarity_search(query, k=3) 
    
    formatted_context = "\n---\n".join([doc.page_content for doc in results])
    
    return formatted_context

In [32]:
from langchain_core.language_models import LLM
from typing import Optional, List, Any
from pydantic import Field

class GemmaLangChainWrapper(LLM):
    client: Any = Field(...)
    max_tokens: int = 500
    
    @property
    def _llm_type(self) -> str:
        return "gemma_hf_api"
    
    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        response = self.client.chat_completion(
            messages=[{"role": "user", "content": prompt}],
            max_tokens=self.max_tokens,
            temperature=0.2 
        )
        return response.choices[0].message["content"]


In [33]:
%pip install -U langchain langchain-community langchain-core pydantic typing-extensions

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [34]:
%pip install langchain langchain-community langchain-core langchain-huggingface

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [35]:
%pip install langchain-classic

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [44]:
from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

def rag_pipline(llm, embedding_model, query):
    # 1. Retrieve from KG via Cypher
    intent = classify_fpl_intent(query)
    entities = extract_fpl_entities(query)
    cypher_info = get_fpl_cypher_query(intent, entities)
    cypher_result = graph.query(cypher_info["query"], cypher_info["params"])
    formatted_cypher = format_query_result(intent, cypher_result)

    embedding_context = retrieve_embedding_search(query, embedding_model)

    # 3. Combine Contexts
    combined_context = f"Cypher Results:\n{formatted_cypher}\n\nEmbedding Results:\n{embedding_context}"
    print("Combined Context:\n", combined_context)

    # 4. Create Prompt
    prompt = ChatPromptTemplate.from_template("""
    You are an expert Fantasy Premier League assistant.

    Use the context below to answer the user's question.

    <context>
    {context}
    </context>

    Question: {input}
    """)

    document_chain = create_stuff_documents_chain(llm, prompt)

    class DummyRetriever(BaseRetriever):
        def _get_relevant_documents(self, query: str):
            return [Document(page_content=combined_context)]

    dummy_retriever = DummyRetriever()
    qa_chain = create_retrieval_chain(dummy_retriever, document_chain)

    return qa_chain

In [39]:
from langchain_huggingface import HuggingFaceEndpoint
from huggingface_hub import InferenceClient

HF_TOKEN = config.get('HF_TOKEN')
client = InferenceClient(
        model="google/gemma-2-2b-it",
        token=HF_TOKEN
    )
gemma_llm = GemmaLangChainWrapper(client=client,max_tokens=500)

In [45]:
query = "Who is the top player when it comes to total points in season 2022-23?"
rag_chain = rag_pipline(
    llm=gemma_llm,
    embedding_model=embedding_model,
    query=query
)
response = rag_chain.invoke({"input": query})
print(response["answer"])

Combined Context:
 Cypher Results:
Top Players:
- Erling Haaland: 272 points
- Harry Kane: 263 points
- Mohamed Salah: 239 points
- Martin Ødegaard: 212 points
- Marcus Rashford: 205 points
- Bukayo Saka: 202 points
- Gabriel Martinelli Silva: 198 points
- Kieran Trippier: 198 points
- Kevin De Bruyne: 183 points
- Ivan Toney: 182 points


Embedding Results:
Player Profile: Josh Dasilva is a MID. Performance: They accumulated 81 total points, scoring 4 goals and providing 2 assists. Advanced Metrics: They had an average ICT Index of 1.8 (Influence: 8.0, Creativity: 5.4, Threat: 4.8). Defensive & Discipline: They kept 5 clean sheets, made 0 saves, and received 1 yellow cards. Form & Impact: Their average form was 0.2 and they earned 3 bonus points.
---
Player Profile: Mohamed Naser El Sayed Elneny is a MID. Performance: They accumulated 27 total points, scoring 0 goals and providing 2 assists. Advanced Metrics: They had an average ICT Index of 0.8 (Influence: 3.9, Creativity: 3.3, Threa

In [None]:
# Test 1: Comparison
user_query_1 = "Who has more goals, Salah or Haaland?"
print(f"Query: {user_query_1}")

ents_1 = extract_fpl_entities(user_query_1)
intent_1 = classify_fpl_intent(user_query_1)
result_1 = get_fpl_cypher_query(intent_1, ents_1)

print(f"Entities: {ents_1}")
print(f"Intent: {intent_1}")
print(f"Cypher Params: {result_1['params']}")
print("-" * 30)

# Test 2: Specific GW Stats
user_query_2 = "How many points did Saka get in Gameweek 5?"
print(f"Query: {user_query_2}")

ents_2 = extract_fpl_entities(user_query_2)
intent_2 = classify_fpl_intent(user_query_2)
result_2 = get_fpl_cypher_query(intent_2, ents_2)

print(f"Entities: {ents_2}")
print(f"Intent: {intent_2}")
print(f"Cypher Params: {result_2['params']}")