In [None]:
import pandas as pd
from sqlalchemy import create_engine

# 数据库配置
database_username = ''
database_password = ''
database_ip       = '127.0.0.1'
database_name     = 'myemail'
database_connection = f'mysql+pymysql://{database_username}:{database_password}@{database_ip}/{database_name}'

# 创建数据库引擎
engine = create_engine(database_connection)

# 从数据库读取表到DataFrame
df = pd.read_sql_table('emails2', con=engine)
print(df.shape)

# 关闭数据库引擎
engine.dispose()

In [None]:
# 确定新的DataFrame大小为原始大小的1/100
sampled_size = int(len(df) / 100)

# 随机抽取1/100的行
df_sampled = df.sample(n=sampled_size, random_state=42)

# 显示新DataFrame的大小和前几行数据
print("新DataFrame的行数:", len(df_sampled))
print(df_sampled.shape)
print(df_sampled.head())

In [None]:
# 确保 'Timed' 列是 datetime 类型
df_sampled['Timed'] = pd.to_datetime(df_sampled['Timed'], errors='coerce')

# 打印 'Timed' 列中的数据，检查是否有无效的时间戳
print("原始 'Timed' 列数据:")
print(df_sampled['Timed'])

In [None]:
from neo4j import GraphDatabase


# 连接到 Neo4j 数据库
uri = "bolt://localhost:7687"
user = "neo4j"
password = ""
driver = GraphDatabase.driver(uri, auth=(user, password))

def add_email(tx, message_idd, sender, recipient, subjectd, contentd, timed):
    query = """
    MERGE (p1:Person {email: $sender})
    MERGE (p2:Person {email: $recipient})
    MERGE (p1)-[r:SENDS]->(p2)
    ON CREATE SET r.message_idd = $message_idd, r.subjectd = $subjectd, r.contentd = $contentd, r.timed = datetime($timed), r.weight = 1
    ON MATCH SET r.weight = r.weight + 1, r.subjectd = coalesce(r.subjectd, '') + '; ' + $subjectd, r.contentd = coalesce(r.contentd, '') + '\\n\\n' + $contentd
    """
    tx.run(query, message_idd=message_idd, sender=sender, recipient=recipient, subjectd=subjectd, contentd=contentd, timed=timed)

# 将 DataFrame 中的数据添加到 Neo4j
with driver.session() as session:
    for index, row in df_sampled.iterrows():
        # 确保直接引用 DataFrame 的列名
        session.execute_write(add_email, row['Message-IDd'], row['Fromd'], row['Tod'], row['Subjectd'], row['contentd'], row['Timed'])

driver.close()