In [2]:
import os
import json
import dotenv
import psycopg2
import psycopg2.extras
from datetime import datetime

# Load environment variables
dotenv.load_dotenv()

def convert_datetime(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    raise TypeError(f"Type {type(obj)} not serializable")

def export_arxiv_papers(local=True, output_file="arxiv_papers_dump.json"):
    db_params = {
        "dbname": os.getenv("LOCAL_PGDATABASE") if local else os.getenv("PGDATABASE"),
        "user": os.getenv("LOCAL_PGUSER") if local else os.getenv("PGUSER"),
        "password": os.getenv("LOCAL_PGPASSWORD") if local else os.getenv("PGPASSWORD"),
        "host": os.getenv("LOCAL_PGHOST") if local else os.getenv("PGHOST"),
        "port": os.getenv("LOCAL_PGPORT") if local else os.getenv("PGPORT", "5432"),
    }

    try:
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

        cursor.execute("SELECT * FROM arxiv_papers;")
        rows = cursor.fetchall()

        papers = [dict(row) for row in rows]

        with open(output_file, "w", encoding="utf-8") as f:
            json.dump(papers, f, ensure_ascii=False, indent=4, default=convert_datetime)

        print(f"Exported {len(papers)} papers to {output_file}")

    except Exception as e:
        print("Error:", e)
    finally:
        if conn:
            conn.close()

# Run the export
export_arxiv_papers(local=True)

Exported 30 papers to arxiv_papers_dump.json
