In [35]:
import os, pandas as pd
from dotenv import load_dotenv
from neo4j import GraphDatabase, basic_auth
import ast
from datetime import datetime, timezone

load_dotenv()
driver = GraphDatabase.driver(
    os.getenv("NEO4J_URI"),
    auth=basic_auth("neo4j", "sebastienM19")
)

In [31]:
df = pd.read_csv("/Users/sebastienm/Desktop/DS/SOAKN/data/Retail_Transactions_Dataset.csv")
df["Product"] = df["Product"].apply(ast.literal_eval)

In [39]:
query = open("cyphers/setup.cypher").read()   # the template above

In [36]:
def to_neo4j_datetime(ts_str: str):
    """
    Convert '2020-09-03 12:39:59' → Python datetime with UTC tzinfo
    The Neo4j driver will map this to a Cypher `datetime`.
    """
    # 1. parse the naïve timestamp
    dt = datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S")

    # 2. attach a timezone (UTC here; choose another if your data is local)
    return dt.replace(tzinfo=timezone.utc)

# ── usage inside your ingestion code ───────────────────────────────────────────
row_ts = to_neo4j_datetime("2020-09-03 12:39:59")

In [44]:
df_small = df.head(1000)
def df_to_dicts(df):
    return [
        dict(
            txn_id=int(r.Transaction_ID),
            date=to_neo4j_datetime(r.Date),
            customer=r.Customer_Name,
            products=r.Product,
            items=int(r.Total_Items),
            cost=float(r.Total_Cost),
            payment=r.Payment_Method,
            city=r.City,
            store_type=r.Store_Type,
            discount=bool(r.Discount_Applied),
            cust_cat=r.Customer_Category,
            season=r.Season,
            promo=None if pd.isna(r.Promotion) or r.Promotion in ("None", "", None) else r.Promotion
        )
        for r in df.itertuples(index=False)
    ]

rows = df_to_dicts(df_small)
with driver.session() as s:
    s.execute_write(lambda tx: tx.run(query, rows=rows))

Unnamed: 0,person,movie,stars
0,Alice,Inception,4
1,Alice,The Matrix,5
2,Bob,Interstellar,5
3,Bob,The Matrix,4
4,Charlie,Inception,5
