In [7]:
# src/test_storage.py
from src.utils.data_loader import DataLoader
from src.storage.data_store import DataStore
from src.utils.schemas import SCHEMAS

def main():
    loader = DataLoader(SCHEMAS)
    cleaned = loader.load_all("data")
    genres = cleaned.get("genres", [])
    print(f"Loaded {len(genres)} genres.")

    # Use reuse_free_slots=True to reuse deleted IDs (set False to always append)
    store = DataStore(index_attributes=["id", "name"], reuse_free_slots=True)

    # Insert all genres
    for record in genres:
        store.insert_record(record)
    print("Inserted into DataStore.")

    print("\n=== Search by name 'strategy' ===")
    results = store.search_by_attr("name", "strategy")
    print(results)

    print("\n=== Genres with id between 10 and 20 ===")
    ranged = store.range_query("id", 10, 20)
    print("Found:", len(ranged))
    for r in ranged[:10]:
        print(r)

    print("\n=== Delete record 0 ===")
    ok = store.delete_record(0)
    print("Deleted:", ok)
    print("Record 0 now:", store.get_record(0))

    new_id = store.insert_record(genres[0])
    print("Reinserted at ID:", new_id)
    print("Record at reinserted ID:", store.get_record(new_id))
    print("=== End of DataStore test ===")

if __name__ == "__main__":
    main()



Loaded 154 genres.
Inserted into DataStore.

=== Search by name 'strategy' ===
[{'id': 5, 'name': 'strategy'}]

=== Genres with id between 10 and 20 ===
Found: 11
{'id': 10, 'name': 'simulaatio'}
{'id': 11, 'name': 'simulation'}
{'id': 12, 'name': 'sexual content'}
{'id': 13, 'name': 'azione'}
{'id': 14, 'name': 'aksiyon'}
{'id': 15, 'name': 'racing'}
{'id': 16, 'name': 'massively multiplayer'}
{'id': 17, 'name': 'simuladores'}
{'id': 18, 'name': 'eventyr'}
{'id': 19, 'name': 'mmo'}

=== Delete record 0 ===
Deleted: True
Record 0 now: None
Reinserted at ID: 0
Record at reinserted ID: {'id': 1, 'name': 'aventură'}
=== End of DataStore test ===


In [8]:
# src/test_storage.py

from src.utils.data_loader import DataLoader
from src.storage.data_store import DataStore
from src.utils.schemas import SCHEMAS
from src.query_engine.query_handler import QueryEngine


def main():
    # -----------------------------
    # 1) Load data using DataLoader
    # -----------------------------
    loader = DataLoader(SCHEMAS)
    cleaned = loader.load_all("data")
    genres = cleaned.get("genres", [])
    print(f"Loaded {len(genres)} genres.")

    # --------------------------------------
    # 2) Create DataStore and insert records
    # --------------------------------------
    # Use reuse_free_slots=True to reuse deleted IDs (set False to always append)
    store = DataStore(index_attributes=["id", "name"], reuse_free_slots=True)

    # Insert all genres into DataStore
    for record in genres:
        store.insert_record(record)

    print("Inserted into DataStore.\n")

    # --------------------------------------
    # 3) DataStore tests
    # --------------------------------------
    print("========== DATASTORE TESTS ==========\n")

    # Search by name using AVL index
    print("=== DataStore: Search by name 'strategy' ===")
    results = store.search_by_attr("name", "strategy")
    print(results)

    # Range query on id
    print("\n=== DataStore: Genres with id between 10 and 20 ===")
    ranged = store.range_query("id", 10, 25)
    print("Found:", len(ranged))
    for r in ranged[:10]:
        print(r)

    # Delete by internal record ID
    print("\n=== DataStore: Delete record with internal ID 0 ===")
    ok = store.delete_record(0)
    print("Deleted:", ok)
    print("Record 0 now:", store.get_record(0))

    # Reinsert one record and check ID reuse
    new_id = store.insert_record(genres[0])
    print("Reinserted at internal ID:", new_id)
    print("Record at reinserted ID:", store.get_record(new_id))

    print("\n=== End of DataStore tests ===\n")

    # --------------------------------------
    # 4) QueryEngine tests
    # --------------------------------------
    print("========== QUERY ENGINE TESTS ==========\n")

    # Create a QueryEngine on top of the same DataStore.
    engine = QueryEngine(store, key_attribute="id")

    # Pick a target record from the original genres list
    if not genres:
        print("No genres loaded, skipping QueryEngine tests.")
        return

    target_record = genres[10] if len(genres) > 10 else genres[0]
    target_id = target_record["id"]
    original_name = target_record["name"]

    print(f"Target record for tests: id={target_id}, name='{original_name}'")

    # ---- QueryEngine: search_record (READ) ----
    print("\n=== QueryEngine: search_record(id) ===")
    qe_search_results = engine.search_record(target_id)
    print(qe_search_results)

    # ---- QueryEngine: update_record (UPDATE) ----
    print("\n=== QueryEngine: update_record(id, 'name', 'test_name') ===")
    updated = engine.update_record(target_id, "name", "test_name")
    print("Updated:", updated)
    print("After update:", engine.search_record(target_id))

    # Restore original name so we don't leave dirty state
    engine.update_record(target_id, "name", original_name)
    print("Restored original name:", engine.search_record(target_id))

    # ---- QueryEngine: range_query (delegated) ----
    print("\n=== QueryEngine: range_query('id', 10, 20) ===")
    qe_range_results = engine.range_query("id", 10, 20)
    print(f"Found: {len(qe_range_results)} records")
    for rec in qe_range_results[:10]:
        print(rec)

    # ---- QueryEngine: delete_record (DELETE) ----
    print(f"\n=== QueryEngine: delete_record(id={target_id}) ===")
    deleted = engine.delete_record(target_id)
    print("Deleted:", deleted)
    print("After delete, search_record(id):", engine.search_record(target_id))

    print("\n=== End of QueryEngine tests ===")


if __name__ == "__main__":
    main()


Loaded 154 genres.
Inserted into DataStore.


=== DataStore: Search by name 'strategy' ===
[{'id': 5, 'name': 'strategy'}]

=== DataStore: Genres with id between 10 and 20 ===
Found: 16
{'id': 10, 'name': 'simulaatio'}
{'id': 11, 'name': 'simulation'}
{'id': 12, 'name': 'sexual content'}
{'id': 13, 'name': 'azione'}
{'id': 14, 'name': 'aksiyon'}
{'id': 15, 'name': 'racing'}
{'id': 16, 'name': 'massively multiplayer'}
{'id': 17, 'name': 'simuladores'}
{'id': 18, 'name': 'eventyr'}
{'id': 19, 'name': 'mmo'}

=== DataStore: Delete record with internal ID 0 ===
Deleted: True
Record 0 now: None
Reinserted at internal ID: 0
Record at reinserted ID: {'id': 1, 'name': 'aventură'}

=== End of DataStore tests ===


Target record for tests: id=11, name='simulation'

=== QueryEngine: search_record(id) ===
[{'id': 11, 'name': 'simulation'}]

=== QueryEngine: update_record(id, 'name', 'test_name') ===
Updated: True
After update: [{'id': 11, 'name': 'test_name'}]
Restored original name: [{'id': 11, '

Test Graphs


In [9]:
from src.graph.graph_model import Graph

g = Graph()
a = g.insert_vertex({"name": "DevA"}, kind="developer", id_=1)
b = g.insert_vertex({"name": "PubX"}, kind="publisher", id_=10)

g.insert_edge(a, b, {"games": {123}, "weight": 1})

print("Vertices:", g.vertex_count())    # expect 2
print("Edges:", g.edge_count())        # expect 1

for v in g.vertices():
    print(v, "degree:", g.degree(v))


Vertices: 2
Edges: 1
Vertex(developer:DevA) degree: 1
Vertex(publisher:PubX) degree: 1


In [11]:
from src.graph.graph_model import Graph
from src.graph.graph_algorithms import bfs_traversal, dfs_traversal, shortest_path, connected_components

g = Graph()
a = g.insert_vertex({"name": "A"})
b = g.insert_vertex({"name": "B"})
c = g.insert_vertex({"name": "C"})
g.insert_edge(a, b)
g.insert_edge(b, c)

print("BFS from A:", bfs_traversal(g, a))
print("DFS from A:", dfs_traversal(g, a))
print("Shortest A -> C:", shortest_path(g, a, c))
print("Components:", connected_components(g))


BFS from A: [Vertex(None:A), Vertex(None:B), Vertex(None:C)]
DFS from A: [Vertex(None:A), Vertex(None:B), Vertex(None:C)]
Shortest A -> C: [Vertex(None:A), Vertex(None:B), Vertex(None:C)]
Components: [[Vertex(None:A), Vertex(None:B), Vertex(None:C)]]


Real Data Graphs   implementation

In [12]:
from src.utils.data_loader import DataLoader
from src.utils.schemas import SCHEMAS
from src.graph.graph_model import build_dev_pub_graph
from src.graph.graph_algorithms import bfs_traversal, dfs_traversal, shortest_path, connected_components

# Adjust "data" path if your CSVs are elsewhere
loader = DataLoader(SCHEMAS)
cleaned = loader.load_all("data")

graph, dev_vertices, pub_vertices = build_dev_pub_graph(cleaned)

print("Number of vertices:", graph.vertex_count())
print("Number of edges:", graph.edge_count())


Number of vertices: 186925
Number of edges: 116326


In [None]:
# Show a few developers & publishers to know what IDs/names you have
print("Some developers:")
for dev_id, v in list(dev_vertices.items())[:10]:
    print(dev_id, "->", v.element().get("name"))

print("\nSome publishers:")
for pub_id, v in list(pub_vertices.items())[:10]:
    print(pub_id, "->", v.element().get("name"))





Some developers:
1 -> skala entertainment
2 -> flicksync
3 -> sn mobile technology
4 -> chestnut team
5 -> sqyd.studio
6 -> susewind games
7 -> husky-leaf studios
8 -> algox interactive ltd.
9 -> contortionist games
10 -> procrastination games

Some publishers:
1 -> skala entertainment
2 -> playdigious originals
3 -> sn mobile technology
4 -> sqyd.studio
5 -> susewind games
6 -> husky-leaf studios
7 -> algox interactive ltd.
8 -> contortionist games
9 -> procrastination games
10 -> hoffnung haydn

 
 
 

BFS order from developer skala entertainment
   Vertex(developer:skala entertainment)
   Vertex(publisher:skala entertainment)

DFS order from same developer
   Vertex(developer:skala entertainment)
   Vertex(publisher:skala entertainment)


In [None]:
# Choose any existing developer id you saw above, e.g. dev_id_example = 5
dev_id_example = next(iter(dev_vertices.keys()))   # just take the first one
start_dev = dev_vertices[dev_id_example]

bfs_order = bfs_traversal(graph, start_dev)
dfs_order = dfs_traversal(graph, start_dev)

print("BFS order from developer", start_dev.element().get("name"))
for v in bfs_order[:20]:  # print only first 20
    print("  ", v)

print("\nDFS order from same developer")
for v in dfs_order[:20]:
    print("  ", v)
    
    
    
    # Pick one developer and one publisher (for demo, first ones)
dev_example = next(iter(dev_vertices.values()))
pub_example = next(iter(pub_vertices.values()))

print("Developer:", dev_example.element().get("name"))
print("Publisher:", pub_example.element().get("name"))

path = shortest_path(graph, dev_example, pub_example)

if path is None:
    print("No path between them.")
else:
    print("\nShortest path:")
    for v in path:
        kind = v.kind()
        name = v.element().get("name")
        print(f"  {kind}: {name}")
        
        
        
        
        




BFS order from developer skala entertainment
   Vertex(developer:skala entertainment)
   Vertex(publisher:skala entertainment)

DFS order from same developer
   Vertex(developer:skala entertainment)
   Vertex(publisher:skala entertainment)
Developer: skala entertainment
Publisher: skala entertainment

Shortest path:
  developer: skala entertainment
  publisher: skala entertainment
Number of connected components: 76072
Largest component size: 18283

Some vertices from the largest component:
   Vertex(developer:lazurite games)
   Vertex(publisher:deck13)
   Vertex(developer:retrovertigo)
   Vertex(developer:knasibas)
   Vertex(developer:datadyne llc)
   Vertex(publisher:datadyne llc)
   Vertex(developer:adeel jafree)
   Vertex(developer:david schütze)
   Vertex(developer:a grumpy fox)
   Vertex(developer:shroomoon ug)
   Vertex(developer:just2d)
   Vertex(developer:はがね)
   Vertex(developer:fourexo entertainment)
   Vertex(publisher:fourexo entertainment)
   Vertex(developer:2nd player ga

In [19]:
components = connected_components(graph)
print("Number of connected components:", len(components))

# Show size of the largest component
largest = max(components, key=len)
print("Largest component size:", len(largest))

# Optionally print a few vertices from that component
print("\nSome vertices from the largest component:")
for v in largest[:20]:
    print("  ", v)

Number of connected components: 76072
Largest component size: 18283

Some vertices from the largest component:
   Vertex(developer:lazurite games)
   Vertex(publisher:deck13)
   Vertex(developer:retrovertigo)
   Vertex(developer:knasibas)
   Vertex(developer:datadyne llc)
   Vertex(publisher:datadyne llc)
   Vertex(developer:adeel jafree)
   Vertex(developer:david schütze)
   Vertex(developer:a grumpy fox)
   Vertex(developer:shroomoon ug)
   Vertex(developer:just2d)
   Vertex(developer:はがね)
   Vertex(developer:fourexo entertainment)
   Vertex(publisher:fourexo entertainment)
   Vertex(developer:2nd player games)
   Vertex(developer:wobble ghost)
   Vertex(developer:gerrit wolf)
   Vertex(developer:umami tiger (matthias linda))
   Vertex(publisher:deck13 spotlight)
   Vertex(developer:florian wolf)
