CREATING ENTIRE GRAPH WITH ATTRIBUTES FROM TRAINING DATA CSV

### IMPORT STATEMENTS

In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from io import StringIO
import random 
import pickle
import os
from dotenv import load_dotenv

load_dotenv()

True

### GRAPH CREATION

In [2]:
# Read the CSV file into a DataFrame
train_df = pd.read_csv('train_data_new3.csv')
print(train_df.head())


                                     Event ID  Event Date Event Type  \
0          20240322-8146-07451ba739d6_CONCEDE  2024-03-22    CONCEDE   
1  20240322-3055-05b4f08fd688_MOBILIZE_police  2024-03-22   MOBILIZE   
2     20240326-7347-a0f35a46adea_PROTEST_demo  2024-03-26    PROTEST   
3     20240327-6458-32d2a90b8883_PROTEST_demo  2024-03-27    PROTEST   
4           20240401-5055-9674f90e4f73_ACCUSE  2024-04-01     ACCUSE   

  Event Mode  Event Intensity           Quad Code   Contexts       Actor Name  \
0        NaN              4.5  VERBAL COOPERATION  election   Aam Aadmi Party   
1     police             -5.5   MATERIAL CONFLICT        NaN  Aam Aadmi Party   
2       demo             -5.0   MATERIAL CONFLICT        NaN  Aam Aadmi Party   
3       demo             -5.0   MATERIAL CONFLICT        NaN  Aam Aadmi Party   
4        NaN             -3.0     VERBAL CONFLICT        NaN  Aam Aadmi Party   

  Actor Country Actor COW  ... GeoNames ID  \
0         India       750  ...    

In [3]:
G = nx.Graph()
# Function to add nodes and edges
def add_event_to_graph(row):

    # Date
    date = row['Event Date']
    G.add_node(date)

    # Event attributes
    event_id = row['Event ID']
    event_date = row['Event Date']
    event_type = row['Event Type']
    event_intensity = row['Event Intensity']
    quad_code = row['Quad Code']
    contexts = row['Contexts']
    
    #Other attributes

    #Event Type Node 
    G.add_node(event_type)

    # Create an event node
    G.add_node(event_id, event_date=event_date, event_type=event_type,event_intensity=event_intensity,quad_code=quad_code,contexts=contexts)

    # Add actor nodes with attributes
    actor = row['Actor Name']
    if actor == '':
        actor=row['Actor Country']
    G.add_node(actor, country=row['Actor Country'],cow=row['Actor COW'], primary_sector=row['Primary Actor Sector'],sectors=row['Actor Sectors'],title=row['Actor Title'],id=row['Wikipedia Actor ID'], actor_type='actor') 

    # Add recipient nodes with attributes
    recipient = row['Recipient Name'] 
    G.add_node(recipient, country=row['Recipient Country'], cow=row['Recipient COW'], primary_sector=row['Primary Recipient Sector'],sectors=row['Recipient Sectors'],title=row['Recipient Title'],id=row['Wikipedia Recipient ID'],actor_type='recipient')

    #Location node
    place = row['Raw Placename']
    G.add_node(place,city=row['City'],district=row['District'],province=row['Province'],latitude=row['Latitude'],longitude=row['Longitude'],geoid=row['GeoNames ID'],raw=row['Raw Placename'])

    # Add information source node
    info_source = row['Source']
    G.add_node(info_source, publication_date=row['Publication Date'],feature=row['Feature Type'], date=row['Publication Date'],people=row['Story People'],organization=row['Story Organizations'],location=row['Story Locations'],language=row['Language'], type='source')

    # Add edges between nodes
    G.add_edge(event_id,event_type,relation='type')
    G.add_edge(event_id,date,relation='date')
    G.add_edge(event_id, actor, relation='actor')
    G.add_edge(event_id, recipient, relation='recipient')
    G.add_edge(event_id, place, relation='location')
    G.add_edge(event_id, info_source, relation='source')

# Iterate over each row to build the graph
for index, row in train_df.iterrows():
    add_event_to_graph(row)

In [4]:
# Save the graph using pickle
with open('new_graph3.pkl', 'wb') as f:
    pickle.dump(G, f)

In [None]:
with open('new_graph.pkl', 'rb') as f:
    G = pickle.load(f)