In [1]:
import networkx as nx
import json
import pandas as pd
import plotly.express as px

In [3]:
def load_and_organize_communications(graph_path):
    # Load the graph from JSON file
    with open(graph_path) as f:
        json_data = json.load(f)

    G = nx.json_graph.node_link_graph(json_data, edges="edges")

    communications = []

    for node_id in G.nodes():
        node_data = G.nodes[node_id]
        if node_data.get('type') == 'Event' and node_data.get('sub_type') == 'Communication':
            # Extract relevant information from the Communication event
            comm_timestamp = node_data.get('timestamp')
            comm_content = node_data.get('content', '')

            source_entity = None
            target_entity = None

            # Check incoming edges for source Entity
            for predecessor in G.predecessors(node_id):
                pred_node = G.nodes[predecessor]
                if pred_node.get('type') == 'Entity':
                    source_entity = pred_node.get('label', '')

            # Check outgoing edges for target Entity
            for successor in G.successors(node_id):
                succ_node = G.nodes[successor]
                if succ_node.get('type') == 'Entity':
                    target_entity = succ_node.get('label', '')

            # Create a dictionary for the communication data
            comm_data = {
                'id': node_id,
                'timestamp': comm_timestamp,
                'content': comm_content,
                'source': source_entity,
                'target': target_entity
            }
            communications.append(comm_data)

    return communications

# Example usage:
graph_path = "../data/MC3_graph.json"
communications_data = load_and_organize_communications(graph_path)
print(communications_data[0])

{'id': 'Event_Communication_1', 'timestamp': '2040-10-01 08:09:00', 'content': "Hey The Intern, it's The Lookout! Just spotted a pod of dolphins near the eastern point this morning. They were so playful! If you're free this weekend, the migratory birds are starting to arrive too. Let me know if you want to join for some birdwatching!", 'source': 'The Lookout', 'target': 'The Intern'}


In [11]:
df = pd.DataFrame(communications_data)
df['timestamp'] = pd.to_datetime(df['timestamp'])

df['time_window'] = df['timestamp'].dt.floor('H')
aggregated_df = df.groupby(['source', 'time_window']).size().reset_index(name='count')

aggregated_df = df.groupby(['source', 'time_window']).size().reset_index(name='count')
pivot_df = aggregated_df.pivot(index='time_window', columns='source', values='count').fillna(0)

fig = px.line(pivot_df, x=pivot_df.index, y=pivot_df.columns,
                labels={'x': 'Time', 'value': 'Number of Communications'},
                title='Communications per Source over Time (1h)')

fig.update_layout(
    xaxis=dict(showgrid=True, gridwidth=1),
    yaxis=dict(showgrid=True, gridwidth=1),
    font=dict(size=12)
)

fig.show()


'H' is deprecated and will be removed in a future version, please use 'h' instead.



In [12]:
df = pd.DataFrame(communications_data)
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Extract the hour of day for each timestamp
df['hour_of_day'] = df['timestamp'].dt.hour

# Group by source and hour of day, then count occurrences
aggregated_df = df.groupby(['source', 'hour_of_day']).size().reset_index(name='count')

# Pivot the DataFrame to have hours as index and sources as columns
pivot_df = aggregated_df.pivot(index='hour_of_day', columns='source', values='count').fillna(0)

# Create a line plot using Plotly Express
fig = px.line(pivot_df, x=pivot_df.index, y=pivot_df.columns,
                labels={'x': 'Hour of Day', 'value': 'Number of Communications'},
                title='Communications per Source by Hour of Day (1H Windows)')

# Update the layout for better readability
fig.update_layout(
    xaxis=dict(showgrid=True, gridwidth=1),
    yaxis=dict(showgrid=True, gridwidth=1),
    font=dict(size=12)
)

# Display the plot
fig.show()

In [None]:
df = pd.DataFrame(communications_data)
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Extract date and hour of day for each timestamp
df['date'] = df['timestamp'].dt.date
df['hour_of_day'] = df['timestamp'].dt.hour

# Group by date, source, and hour of day, then count occurrences
aggregated_df = df.groupby(['date', 'source', 'hour_of_day']).size().reset_index(name='count')

# Create a line plot using Plotly Express with facet rows for each date
fig = px.line(aggregated_df, x='hour_of_day', y='count', color='source',
                facet_row='date',
                labels={'hour_of_day': 'Hour of Day', 'count': 'Number of Communications'},
                title='Daily Communication Patterns by Source (1H Windows)')

# Update the layout for better readability
fig.update_layout(
    height=1000,  # Adjust height to accommodate multiple rows
    width=800,
    font=dict(size=12)
)

# Show the plot
fig.show()

In [11]:
df = pd.DataFrame(communications_data)
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Extract date and hour of day for each timestamp
df['date'] = df['timestamp'].dt.date
df['hour_of_day'] = df['timestamp'].dt.hour

# Create month-day format for compact row identification
df['month_day'] = df['date'].apply(lambda x: f"{x.month:02d}-{x.day:02d}")

# Group by date, source, and hour of day, then count occurrences
aggregated_df = df.groupby(['month_day', "date", 'source', 'hour_of_day']).size().reset_index(name='count')

# Define color and shape mappings for each unique source
sources = aggregated_df['source'].unique()
colors = px.colors.qualitative.Alphabet  # Using a predefined set of colors
shapes = ['circle', 'square', 'diamond', 'triangle-up']  # Limit to avoid repetition

color_map = {source: color for source, color in zip(sources, colors[:len(sources)])}
shape_map = {source: shape for source, shape in zip(sources, shapes[:len(sources)])}

# Create a scatter plot with markers
fig = px.scatter(aggregated_df, x='hour_of_day', y='count',
                    color='source', facet_row='month_day',
                    category_orders={'month_day': aggregated_df['month_day'].unique()},
                    labels={'hour_of_day': 'Hour of Day', 'count': '', 'source': ''},
                    title='Daily Communication Patterns by Source (Markers)',
                    opacity=1.0)

# Update markers based on source
for source in sources:
    fig.update_traces(
        marker=dict(color=color_map[source], symbol=shape_map[source]),
        selector={'legendgroup': source})

# Remove y-axis labels and adjust layout
fig.update_layout(
    showlegend=True,
    margin=dict(l=40, r=40, t=100, b=40),
    height=800,  # Adjust based on the number of rows
    width=600)

fig.update_yaxes(showgrid=False, title=None)

# Show the plot
fig.show()

KeyError: 'Himark Harbor'

In [12]:
sources

array(['Boss', 'Davis', 'Glitters Team', 'Green Guardians',
       'Himark Harbor', 'Horizon', 'Kelly', 'Liam Thorne', 'Mako',
       'Marlin', 'Mrs. Money', 'Oceanus City Council', 'Paackland Harbor',
       'Reef Guardian', 'Remora', 'Samantha Blake', 'Sentinel',
       'Serenity', 'The Intern', 'The Lookout', 'The Middleman',
       'Clepper Jensen', 'EcoVigil', 'Haacklee Harbor', 'Miranda Jordan',
       'Osprey', 'Rodriguez', 'Small Fry', 'Defender', 'Northern Light',
       'V. Miesel Shipping', 'Sailor Shifts Team', 'Nadia Conti', 'Sam',
       'Neptune', 'Elise', 'Seawatch', 'The Accountant', 'Knowles'],
      dtype=object)