In [4]:
from pathlib import Path
import numpy as np, pandas as pd
from datetime import datetime, timedelta

# 1) Đặt project root đúng chỗ cậu muốn
PROJECT_ROOT = Path("/Users/ngocanh/Documents/Buid/Project 2")

# 2) Tạo thư mục con
DATA_DIR = PROJECT_ROOT / "data"
IMAGES_DIR = PROJECT_ROOT / "images"
DATA_DIR.mkdir(parents=True, exist_ok=True)
IMAGES_DIR.mkdir(parents=True, exist_ok=True)

print("Project root:", PROJECT_ROOT)
print("Data folder:", DATA_DIR)
print("Images folder:", IMAGES_DIR)

# 3) Sinh dữ liệu giả lập
n = 50_000
rng = np.random.default_rng(2024)

users = rng.integers(1, 501, size=n)
merchants = rng.choice(
    ["Amazon","Walmart","Target","eBay","BestBuy","Starbucks",
     "Uber","Netflix","Apple","Google","Microsoft","Costco"],
    size=n
)
amounts = np.clip(rng.lognormal(mean=4.5, sigma=0.6, size=n), 1, 500).round(2)

start = datetime(2024,1,1)
dates = [(start + timedelta(days=int(d))).date().isoformat() 
         for d in rng.integers(0, 366, size=n)]

df = pd.DataFrame({
    "transaction_id": np.arange(1, n+1),
    "user_id": users,
    "merchant": merchants,
    "amount": amounts,
    "date": dates
})

# 4) Lưu file CSV
out_path = DATA_DIR / "transactions.csv"
df.to_csv(out_path, index=False)
print(f"[OK] Saved {len(df):,} rows → {out_path}")

Project root: /Users/ngocanh/Documents/Buid/Project 2
Data folder: /Users/ngocanh/Documents/Buid/Project 2/data
Images folder: /Users/ngocanh/Documents/Buid/Project 2/images
[OK] Saved 50,000 rows → /Users/ngocanh/Documents/Buid/Project 2/data/transactions.csv


In [5]:
from pathlib import Path
import pandas as pd
import sqlite3

# ===== 0) Đường dẫn project =====
PROJECT_ROOT = Path("/Users/ngocanh/Documents/Buid/Project 2")
DATA_DIR = PROJECT_ROOT / "data"
DB_PATH = PROJECT_ROOT / "finance.db"
CSV_PATH = DATA_DIR / "transactions.csv"

assert CSV_PATH.exists(), f"Không tìm thấy {CSV_PATH}. Chạy Bước 1 trước nha."

# ===== 1) Đọc CSV =====
df = pd.read_csv(CSV_PATH)

# ===== 2) Nạp vào SQLite =====
conn = sqlite3.connect(DB_PATH)
df.to_sql("transactions", conn, if_exists="replace", index=False)

# (tuỳ chọn) tạo index để query nhanh hơn
conn.execute("CREATE INDEX IF NOT EXISTS idx_transactions_user ON transactions(user_id);")
conn.execute("CREATE INDEX IF NOT EXISTS idx_transactions_date ON transactions(date);")
conn.commit()

# ===== 3) Kiểm tra nhanh =====
n_rows = conn.execute("SELECT COUNT(*) FROM transactions;").fetchone()[0]
sample = pd.read_sql("SELECT * FROM transactions LIMIT 5;", conn)

print(f"[OK] Loaded {n_rows:,} rows vào {DB_PATH} (table: transactions)")
print(sample)

conn.close()

[OK] Loaded 50,000 rows vào /Users/ngocanh/Documents/Buid/Project 2/finance.db (table: transactions)
   transaction_id  user_id merchant  amount        date
0               1      121   Google  125.71  2024-05-10
1               2      338     eBay   31.82  2024-09-24
2               3       47     Uber  103.07  2024-10-17
3               4      108   Target   90.16  2024-10-01
4               5      159     Uber   63.28  2024-09-07


SyntaxError: invalid syntax (74908919.py, line 1)