In [None]:
import os
import json
import re
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import BigInteger, Text
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import gzip
import time
import pickle


TABLE_NAME = "tweets_test"
DATABASE_URL = "postgresql://dbadmin:BZ6uHRGxki6a7qD@dcpostgres.postgres.database.azure.com:5432/DataChallenge"

t0 = time.time()
# Load the graph
try:

    script_dir = os.path.dirname(os.path.abspath(_file_))

    project_root = os.path.dirname(script_dir)
    print(f"Running as a script. Project root found at: {project_root}")
except NameError:

    project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
    print(f"Running in an interactive environment. Assuming project root is: {project_root}")

# Define the path to the data file, relative to the project root
data_folder = os.path.join(project_root, "data")
graph_filename = "conversation_airlines_senti_correct.gpickle"
graph_path = os.path.join(data_folder, graph_filename)

# Now, use this robust path to load your file
t0 = time.time()
print(f"\nLoading graph from: {graph_path}")

try:
    with open(graph_path, "rb") as f:
        G = pickle.load(f)
    print("Pickle load time (s):", time.time() - t0)
except FileNotFoundError:
    print(f"\n--- ERROR ---")
    print(f"File not found at the specified path: {graph_path}")
    print("Please check that the file exists and that the project structure is correct.")
print("Pickle load time (s):", time.time() - t0)


total_nodes = G.number_of_nodes()

print("Total nodes:", total_nodes)

node_created_at = []
for node in G.nodes:
    created = G.nodes[node].get("created")
    node_created_at.append(created)

node_created_at = pd.to_datetime(pd.Series(node_created_at), errors='coerce').dropna()

# Convert to periods (months)
months = node_created_at.dt.to_period("M").astype(str)

# Count entries per month
month_counts = months.value_counts().sort_index()

# Plot
plt.figure(figsize=(12, 6))
sns.barplot(x=month_counts.index, y=month_counts.values, color='skyblue')

# Formatting
plt.xticks(rotation=45, ha='right')
plt.title("Number of tweets per month", fontsize=14)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Tweet Count", fontsize=12)
plt.tight_layout()
#plt.savefig("tweets_per_month_transparent.png", dpi=300, bbox_inches="tight", transparent=True)
plt.show()