In [1]:
from dotenv import load_dotenv
load_dotenv()

import logging
import sys
date_strftime_format = "%Y-%m-%y %H:%M:%S"
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s", datefmt=date_strftime_format)

from falkordb_gemini_kg.classes.model_config import (
    KnowledgeGraphModelConfig,
    StepModelConfig,
)
from falkordb_gemini_kg import KnowledgeGraph, Ontology
from falkordb_gemini_kg.classes.source import Source
import vertexai
import os
from random import shuffle
import json
from falkordb import FalkorDB

# Initialize the Vertex AI client
vertexai.init(project=os.getenv("PROJECT_ID"), location=os.getenv("REGION"))

### Import source data

In [2]:
src_files = "/Users/davidzimberknopf/Documents/Apps/ufc-crawler/data/event/fight"
sources = []

# For each file in the source directory, create a new Source object
for file in os.listdir(src_files):
    sources.append(Source(os.path.join(src_files, file)))

### Automatically create the ontology from the sources

In [None]:
boundaries = """
    Extract only the most information about the fighters, fights, and events in the UFC.
    Do not create nodes for what can be expressed as attributes.
"""

ontology = Ontology.from_sources(
    sources=sources[: round(len(sources) * 0.1)],
    boundaries=boundaries,
    model_config=StepModelConfig(model="gemini-1.5-pro-001"),
)


db = FalkorDB()
graph = db.select_graph("ufc_ontology")
ontology.save_to_graph(graph)

# Save ontology to json file
with open("ontologies/ufc_ontology.json", "w", encoding="utf-8") as file:
    file.write(json.dumps(ontology.to_json(), indent=2))

### Read ontology from json file

In [2]:
ontology_file = "ontologies/ufc_ontology_corrected.json"
with open(ontology_file, "r", encoding="utf-8") as file:
    ontology = Ontology.from_json(json.loads(file.read()))

db = FalkorDB()
graph = db.select_graph("ufc_ontology")
ontology.save_to_graph(graph)

kg = KnowledgeGraph(
    name="ufc",
    model_config=KnowledgeGraphModelConfig.from_dict(d={"model": "gemini-1.5-pro-001"}),
    ontology=ontology,
)

2024-06-24 10:02:45 Query: MERGE (n:Fighter {name: "string!*", nickname: "string!"}) RETURN n
2024-06-24 10:02:45 Query: MERGE (n:Fight {name: "string!*", date: "string*", location: "string*", method: "string", time: "string", details: "string", rounds: "number", decision: "string", winner: "string", loser: "string"}) RETURN n
2024-06-24 10:02:45 Query: MERGE (n:FightTotals {fight_name: "string!*", fighter: "string!*", knockdowns: "number", significant_strikes: "number", significant_strikes_attempted: "number", significant_strikes_percentage: "number", total_strikes: "number", total_strikes_attempted: "number", takedowns: "number", takedowns_attempted: "number", takedown_percentage: "number", submissions_attempted: "number", passes: "number", reversals: "number"}) RETURN n
2024-06-24 10:02:45 Query: MERGE (n:Event {name: "string!*", location: "string*", date: "string*"}) RETURN n
2024-06-24 10:02:45 Query: MERGE (n:WeightClass {name: "string!*"}) RETURN n
2024-06-24 10:02:45 Query: MER

### Process the sources raw data into the knowledge graph

In [None]:
kg.process_sources(sources)

### Ask a single question to the model

In [7]:
kg.ask("What were the last 5 fights? When were they? How many rounds did they have?")

2024-06-24 10:05:46 Cypher: 
MATCH (f:Fight)
RETURN f.name, f.date, f.rounds
ORDER BY f.date DESC
LIMIT 5

2024-06-24 10:05:46 Context size: 5
2024-06-24 10:05:46 Context characters: 289


'The last 5 fights were Kyung Ho Kang v Muin Gafurov, Robert Whittaker v Ikram Aliskerov, Shara Magomedov v Antonio Trocoli, Kelvin Gastelum v Daniel Rodriguez, and Sergei Pavlovich v Alexander Volkov. They all took place on June 22, 2024.  Kyung Ho Kang v Muin Gafurov, Shara Magomedov v Antonio Trocoli, Kelvin Gastelum v Daniel Rodriguez, and Sergei Pavlovich v Alexander Volkov were 3 round fights. Robert Whittaker v Ikram Aliskerov was a 1 round fight. \n'

In [6]:
kg.ask("How many takedowns did Alexandre Pantoja have in all fights?")

2024-06-24 10:03:50 Cypher: 
MATCH (f:Fighter {name: "Alexandre Pantoja"})-[:FOUGHT_IN]->(fight:Fight)<-[:FIGHT_TOTAL_STATS]-(totals:FightTotals)
RETURN totals.takedowns, totals

2024-06-24 10:03:50 Error: Edge FIGHT_TOTAL_STATS has a mismatched source or target. Make sure the edge direction is correct. The edge should connect Fight to FightTotals.
2024-06-24 10:03:52 Cypher: 
MATCH (f:Fighter {name: "Alexandre Pantoja"})-[:FOUGHT_IN]->(fight:Fight)-[:FIGHT_TOTAL_STATS]->(totals:FightTotals)
RETURN totals.takedowns, totals

2024-06-24 10:03:52 Context size: 10
2024-06-24 10:03:52 Context characters: 3579


'Alexandre Pantoja had 23 takedowns in all fights. \n'

### Start a chat session with the model

In [3]:
chat = kg.chat_session()

print(chat.send_message("Who is Salsa Boy?"))

2024-06-24 10:02:50 Cypher: 
MATCH (f:Fighter {nickname: 'Salsa Boy'}) RETURN f

2024-06-24 10:02:50 Context size: 1
2024-06-24 10:02:50 Context characters: 65


Salsa Boy is the nickname of Waldo Cortes-Acosta. 

