In [1]:
from dotenv import load_dotenv
load_dotenv()

import logging
import sys
date_strftime_format = "%Y-%m-%y %H:%M:%S"
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s", datefmt=date_strftime_format)

from falkordb_gemini_kg.classes.model_config import (
    KnowledgeGraphModelConfig,
    StepModelConfig,
)
from falkordb_gemini_kg import KnowledgeGraph, Ontology
from falkordb_gemini_kg.classes.source import Source
import vertexai
import os
from random import shuffle
import json
from falkordb import FalkorDB

# Initialize the Vertex AI client
vertexai.init(project=os.getenv("PROJECT_ID"), location=os.getenv("REGION"))

### Import source data

In [2]:
src_files = "/Users/davidzimberknopf/Documents/Apps/ufc-crawler/data/event/fight"
sources = []

# For each file in the source directory, create a new Source object
for file in os.listdir(src_files):
    sources.append(Source(os.path.join(src_files, file)))

### Automatically create the ontology from the sources

In [None]:
boundaries = """
    Extract only the most information about the fighters, fights, and events in the UFC.
    Do not create nodes for what can be expressed as attributes.
"""

ontology = Ontology.from_sources(
    sources=sources[: round(len(sources) * 0.1)],
    boundaries=boundaries,
    model_config=StepModelConfig(model="gemini-1.5-pro-001"),
)


db = FalkorDB()
graph = db.select_graph("ufc_ontology")
ontology.save_to_graph(graph)

# Save ontology to json file
with open("ufc_ontology.json", "w", encoding="utf-8") as file:
    file.write(json.dumps(ontology.to_json(), indent=2))

### Read ontology from json file

In [2]:
ontology_file = "ufc_ontology_corrected.json"
with open(ontology_file, "r", encoding="utf-8") as file:
    ontology = Ontology.from_json(json.loads(file.read()))

db = FalkorDB()
graph = db.select_graph("ufc_ontology")
ontology.save_to_graph(graph)

kg = KnowledgeGraph(
    name="ufc",
    model_config=KnowledgeGraphModelConfig.from_dict(d={"model": "gemini-1.5-pro-001"}),
    ontology=ontology,
)

2024-06-24 10:28:26 Query: MERGE (n:Fighter {name: "string!*", nickname: "string!"}) RETURN n
2024-06-24 10:28:26 Query: MERGE (n:Fight {name: "string!*", date: "string*", location: "string*", method: "string", time: "string", details: "string", rounds: "number", decision: "string", winner: "string", loser: "string"}) RETURN n
2024-06-24 10:28:26 Query: MERGE (n:FightTotals {fight_name: "string!*", fighter: "string!*", knockdowns: "number", significant_strikes: "number", significant_strikes_attempted: "number", significant_strikes_percentage: "number", total_strikes: "number", total_strikes_attempted: "number", takedowns: "number", takedowns_attempted: "number", takedown_percentage: "number", submissions_attempted: "number", passes: "number", reversals: "number"}) RETURN n
2024-06-24 10:28:26 Query: MERGE (n:Event {name: "string!*", location: "string*", date: "string*"}) RETURN n
2024-06-24 10:28:26 Query: MERGE (n:WeightClass {name: "string!*"}) RETURN n
2024-06-24 10:28:26 Query: MER

### Process the sources raw data into the knowledge graph

In [None]:
kg.process_sources(sources)

### Ask a single question to the model

In [5]:
kg.ask("What were the last 5 fights? When were they? How many rounds did they have?")

2024-06-24 10:24:52 Cypher: 
MATCH (f:Fight)
RETURN f
ORDER BY f.date DESC
LIMIT 5



'The last five fights were Kyung Ho Kang v Muin Gafurov (3 rounds), Robert Whittaker v Ikram Aliskerov (1 round), Shara Magomedov v Antonio Trocoli (3 rounds), Kelvin Gastelum v Daniel Rodriguez (3 rounds), and Sergei Pavlovich v Alexander Volkov (3 rounds).  All of these fights took place on June 22, 2024. \n'

In [3]:
kg.ask("Who fought the most fights?")

2024-06-24 10:28:35 Cypher: 
MATCH (f:Fighter)-[:FOUGHT_IN]->(fight:Fight)
RETURN f, count(fight) AS fightCount
ORDER BY fightCount DESC
LIMIT 1



'Charles "InnerG" Johnson fought the most fights. \n'

In [4]:
kg.ask("How many takedowns did Charles Johnson had in all his fights?")

2024-06-24 10:28:44 Cypher: 
MATCH (f:Fighter {name: "Charles Johnson"})-[:FOUGHT_IN]->(fight:Fight)<-[:FIGHT_TOTAL_STATS]-(totals:FightTotals)
RETURN totals.takedowns, totals.takedowns_attempted

2024-06-24 10:28:44 Error: Edge FIGHT_TOTAL_STATS has a mismatched source or target. Make sure the edge direction is correct. The edge should connect Fight to FightTotals.
2024-06-24 10:28:46 Cypher: 
MATCH (f:Fighter {name: "Charles Johnson"})-[:FOUGHT_IN]->(fight:Fight)-[:FIGHT_TOTAL_STATS]->(totals:FightTotals)
RETURN totals.takedowns, totals.takedowns_attempted



'Charles Johnson had a total of 22 takedowns in all his fights. \n'

### Start a chat session with the model

In [6]:
chat = kg.chat_session()

print(chat.send_message("Who is Salsa Boy?"))
print(chat.send_message("How many takedown attempts did he have in all fights?"))

2024-06-24 10:30:28 Cypher: 
MATCH (f:Fighter {nickname: 'Salsa Boy'})
RETURN f



Salsa Boy is Waldo Cortes-Acosta. 



2024-06-24 10:30:31 Cypher: 

2024-06-24 10:30:31 Error: Error: empty query.
2024-06-24 10:30:33 Cypher: 
MATCH (f:Fighter)-[:FOUGHT_IN]->(fight:Fight)<-[:FIGHT_TOTAL_STATS]-(stats:FightTotals)
WHERE f.nickname = 'Salsa Boy'
RETURN sum(stats.takedowns_attempted)

2024-06-24 10:30:33 Error: Edge FIGHT_TOTAL_STATS has a mismatched source or target. Make sure the edge direction is correct. The edge should connect Fight to FightTotals.
2024-06-24 10:30:35 Cypher: 
MATCH (f:Fighter)-[:FOUGHT_IN]->(fight:Fight)-[:FIGHT_TOTAL_STATS]->(stats:FightTotals)
WHERE f.nickname = 'Salsa Boy'
RETURN sum(stats.takedowns_attempted)



He had 12 takedown attempts in all fights. 

