In [1]:
import os
from dotenv import load_dotenv
from arango import ArangoClient

In [2]:
# Load environment variables
load_dotenv()

True

In [3]:
# Initialize the ArangoDB client.
host = os.getenv("HOST")
username = os.getenv("USER")
password = os.getenv("PASSWORD")

In [4]:
# Connect to ArangoDB
client = ArangoClient(hosts=host)
sys_db = client.db('_system', username=username, password=password)

# Create 'events' database if it doesn't exist
if not sys_db.has_database('events global'):
    sys_db.create_database('events global')

In [5]:
# Connect to the events database
db = client.db('events global', username=username, password=password)

# Create collections if they don't exist
if not db.has_collection('events'):
    events = db.create_collection('events')
if not db.has_collection('countries'):
    countries = db.create_collection('countries')
if not db.has_collection('groups'):
    groups = db.create_collection('groups')

In [6]:
# Create edge collections as regular edge collections
if not db.has_collection('located_in'):
    located_in = db.create_collection('located_in', edge=True)
if not db.has_collection('caused_by'):
    caused_by = db.create_collection('caused_by', edge=True)

In [7]:
# Create graph to define the relationships
if not db.has_graph('events_graph'):
    graph = db.create_graph('events_graph')
    
    # Add edge definitions to the graph
    graph.create_edge_definition(
        edge_collection='located_in',
        from_vertex_collections=['events'],
        to_vertex_collections=['countries']
    )
    
    graph.create_edge_definition(
        edge_collection='caused_by',
        from_vertex_collections=['groups'],
        to_vertex_collections=['events']
    )
else:
    graph = db.graph('events_graph')

In [8]:
# Access collections
events = db.collection('events')
countries = db.collection('countries')
groups = db.collection('groups')
located_in = db.collection('located_in')
caused_by = db.collection('caused_by')

In [9]:
# Function to process and upload your dataset
def upload_dataset(dataset):
    # Track countries and groups we've already added
    added_countries = set()
    added_groups = set()
    
    for record in dataset:
        # Create event document
        event_key = f"{record['Cleaned Incident']}_{record['Year']}".replace(' ', '_')
        event_doc = {
            '_key': event_key,
            'incident_name': record['Cleaned Incident'],
            'year': record['Year'],
            'event_type': record['Cleaned Event'],
            'impact': record['Cleaned Impact'],
            'responsible_group': record['Cleaned Group'],
            'outcome': record['Cleaned Outcome'],
            'country': record['Location'],
            'latitude': record['Latitude'],
            'longitude': record['Longitude']
        }
        
        # Add event document
        try:
            events.insert(event_doc)
            print(f"Added event: {event_key}")
        except Exception as e:
            print(f"Error adding event {event_key}: {e}")
        
        # Add country if not already added
        country_key = record['Location'].replace(' ', '_')
        if country_key not in added_countries:
            country_doc = {
                '_key': country_key,
                'name': record['Location'],
                'latitude': record['Latitude'],
                'longitude': record['Longitude']
            }
            
            try:
                countries.insert(country_doc)
                added_countries.add(country_key)
                print(f"Added country: {country_key}")
            except Exception as e:
                print(f"Error adding country {country_key}: {e}")
        
        # Connect event to country
        edge_doc = {
            '_from': f"events/{event_key}",
            '_to': f"countries/{country_key}"
        }
        
        try:
            located_in.insert(edge_doc)
            print(f"Added location edge: {event_key} -> {country_key}")
        except Exception as e:
            print(f"Error adding location edge: {e}")
        
        # Add responsible group if available and not already added
        if record['Cleaned Group'] and record['Cleaned Group'] != 'Unknown':
            group_key = record['Cleaned Group'].replace(' ', '_')
            
            if group_key not in added_groups:
                group_doc = {
                    '_key': group_key,
                    'name': record['Cleaned Group']
                }
                
                try:
                    groups.insert(group_doc)
                    added_groups.add(group_key)
                    print(f"Added group: {group_key}")
                except Exception as e:
                    print(f"Error adding group {group_key}: {e}")
            
            # Connect group to event
            edge_doc = {
                '_from': f"groups/{group_key}",
                '_to': f"events/{event_key}"
            }
            
            try:
                caused_by.insert(edge_doc)
                print(f"Added causation edge: {group_key} -> {event_key}")
            except Exception as e:
                print(f"Error adding causation edge: {e}")

In [None]:
import pandas as pd

# Load from CSV
df = pd.read_csv('processed_dataset.csv')
cleaned_data = df.to_dict('records')

# Then upload it
upload_dataset(cleaned_data)

Added event: establishment_of_the_delhi_sultanate_1206
Added country: India
Added location edge: establishment_of_the_delhi_sultanate_1206 -> India
Added group: qutbunknownudunknowndin_aibak
Added causation edge: qutbunknownudunknowndin_aibak -> establishment_of_the_delhi_sultanate_1206
Added event: battle_of_panipat_1526
Added location edge: battle_of_panipat_1526 -> India
Added group: babur
Added causation edge: babur -> battle_of_panipat_1526
Added event: establishment_of_british_raj_1858
Added location edge: establishment_of_british_raj_1858 -> India
Added group: british_east_india_companyempire
Added causation edge: british_east_india_companyempire -> establishment_of_british_raj_1858
Added event: partition_of_india_1947
Added location edge: partition_of_india_1947 -> India
Added group: british_empire,_indian_political_leaders
Added causation edge: british_empire,_indian_political_leaders -> partition_of_india_1947
Added event: indounknownpakistani_war_of_1971_1971
Added location 