In [12]:
%pip install yfinance lxml

Collecting lxml
  Downloading lxml-6.0.0-cp39-cp39-win_amd64.whl.metadata (6.8 kB)
Downloading lxml-6.0.0-cp39-cp39-win_amd64.whl (4.0 MB)
   ---------------------------------------- 0.0/4.0 MB ? eta -:--:--
   ------------------ --------------------- 1.8/4.0 MB 10.1 MB/s eta 0:00:01
   ------------------------------------ --- 3.7/4.0 MB 9.1 MB/s eta 0:00:01
   ---------------------------------------- 4.0/4.0 MB 8.9 MB/s eta 0:00:00
Installing collected packages: lxml
Successfully installed lxml-6.0.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
import yfinance as yf, pandas as pd

tickers = [f"{code}.SI" for code in ["D05","O39","U11","S68"]]  # 自备代码表
rows = []
for t in tickers:
    info = yf.Ticker(t).fast_info  # fast_info 比 .info 更快
    rows.append({"ticker": t, "marketCapUSD": info['marketCap']})

df = pd.DataFrame(rows)           # → marketCap 已是美元值
df.to_csv("G:/My Drive/NUS MSBA SEM2/UOB/SGX Annual Reports/sgx_cap_yf.csv", index=False)
df.to_csv("C:/Users/22601/Downloads/downloads/files/sgx_cap_yf.csv", index=False)


In [13]:
import pandas as pd, requests, io, datetime as dt

# # 1⃣ 直接下载 StockAnalysis 的“Download”CSV（含 Market Cap, Price, Revenue …）
# csv_url = "https://stockanalysis.com/list/singapore-exchange/?download"
# csv = requests.get(csv_url).content
url = "https://stockanalysis.com/list/singapore-exchange/"
html = requests.get(url).text

# 读取所有 <table> 标签，然后选第一个（通常是我们需要的股价市值表）
dfs = pd.read_html(io.StringIO(html), match="Market Cap")
df = dfs[0]  # 必须用 [0] 拿第一个表，否则 pandas 返回的是 list 而非 DataFrame :contentReference[oaicite:1]{index=1}


In [26]:
df.size

1662

In [14]:
df.head()

Unnamed: 0,No.,Symbol,Company Name,Market Cap,Stock Price,% Change,Revenue
0,1,HTCD,Tencent Holdings Limited,769.42B,8.49,0.12%,126.06B
1,2,HSHD,HSBC Holdings plc,279.21B,3.21,0.31%,77.93B
2,3,HXXD,Xiaomi Corporation,242.16B,4.66,1.08%,74.38B
3,4,HYDD,BYD Company Limited,182.47B,2.07,-,152.31B
4,5,HPAD,"Ping An Insurance (Group) Company of China, Ltd.",173.12B,4.31,1.89%,175.11B


In [18]:
df = df[['Symbol', 'Company Name', 'Market Cap']]

# 定义转换函数
def parse_market_cap(val):
    if pd.isna(val):
        return None
    val = val.strip()
    num, suffix = val[:-1], val[-1].upper()
    try:
        num = float(num)
    except ValueError:
        return None
    if suffix == 'B':
        return num * 1e9
    elif suffix == 'M':
        return num * 1e6
    else:
        return num

# 应用转换并生成USD列
df['marketCap'] = df['Market Cap'].apply(parse_market_cap)
SGD2USD = 0.74
df['marketCapUSD'] = df['marketCap'] * SGD2USD

df = df.dropna(subset=['marketCapUSD'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['marketCap'] = df['Market Cap'].apply(parse_market_cap)


In [19]:
df.head()

Unnamed: 0,Symbol,Company Name,Market Cap,marketCap,marketCapUSD
0,HTCD,Tencent Holdings Limited,769.42B,769420000000.0,569370800000.0
1,HSHD,HSBC Holdings plc,279.21B,279210000000.0,206615400000.0
2,HXXD,Xiaomi Corporation,242.16B,242160000000.0,179198400000.0
3,HYDD,BYD Company Limited,182.47B,182470000000.0,135027800000.0
4,HPAD,"Ping An Insurance (Group) Company of China, Ltd.",173.12B,173120000000.0,128108800000.0


In [20]:
# 3⃣ 保存到本地供 Cypher LOAD CSV
fname = f"C:/Users/22601/Downloads/downloads/files/sgx_market_cap_{dt.date.today()}.csv"
df[['Symbol','Company Name','marketCapUSD']].to_csv(fname, index=False)
fname = f"G:/My Drive/NUS MSBA SEM2/UOB/SGX Annual Reports/sgx_market_cap_{dt.date.today()}.csv"
df[['Symbol','Company Name','marketCapUSD']].to_csv(fname, index=False)
print("Saved:", fname)

Saved: G:/My Drive/NUS MSBA SEM2/UOB/SGX Annual Reports/sgx_market_cap_2025-07-22.csv


In [30]:
from neo4j import GraphDatabase
import pandas as pd, datetime as dt

df = pd.read_csv(fname)
URI = "neo4j+s://8f6e6423.databases.neo4j.io"
USER = "neo4j"
PWD = "TUOx-U2EDDDXXNAteOqarP3aEj7XxMcsoilyEtL7NLI"
DRY_RUN = False
driver = GraphDatabase.driver(URI, auth=(USER, PWD))

# ------------------------------------------------------------------
# Load CSV (expect列: Symbol, marketCapUSD)
# ------------------------------------------------------------------
print(f"Loaded {len(df):,} rows from {fname}")
# --- 预处理: 清洗公司名 ---------------------------------------------------
# 去掉首尾空格 / 引号，并统一为大写做匹配

def canon(name: str) -> str:
    return name.strip().strip('"').upper()

df['canonName'] = df['Company Name'].apply(canon)


# -- 检查节点是否存在 -------------------------------------------------------

def node_exists(tx, c_name: str) -> bool:
    return tx.run(
        """
        MATCH (c:Company)
        WHERE toUpper(trim(c.name)) = $n
        RETURN 1 LIMIT 1
        """,
        n=c_name
    ).single() is not None

# -- 更新 marketCap --------------------------------------------------------

def update_market_cap(tx, c_name: str, cap: float, iso_date: str):
    tx.run(
        """
        MATCH (c:Company)
        WHERE toUpper(trim(c.name)) = $n
        SET   c.marketCap     = $cap,
              c.marketCapDate = date($iso)
        """,
        n=c_name, cap=cap, iso=iso_date
    )

# ------------------------------------------------------------------
# Dry‑Run & Commit --------------------------------------------------
# ------------------------------------------------------------------
with driver.session() as sess:
    today_iso = str(dt.date.today())
    todo = []

    # --- Dry‑Run discovery -----------------------------------------
    for _, row in df.iterrows():
        cname = row['canonName']
        cap_usd = float(row['marketCapUSD'])
        if sess.execute_read(node_exists, cname):
            todo.append((cname, cap_usd))

    print(f"Found {len(todo):,} existing companies to update.")

    # --- Preview list ---------------------------------------------
    for n, c in todo[:20]:
        print(f"  {n[:40]:<40} → {c/1e9:,.2f} B USD")
    if len(todo) > 20:
        print("  … (truncated) …")

    # --- Commit ----------------------------------------------------
    if not DRY_RUN:
        for n, c in todo:
            sess.execute_write(update_market_cap, n, c, today_iso)
        print("✅ Update committed.")
    else:
        print("⚠️  DRY_RUN = True → no data written. Set DRY_RUN=False to commit.")

driver.close()
print("Done.")


Loaded 554 rows from G:/My Drive/NUS MSBA SEM2/UOB/SGX Annual Reports/sgx_market_cap_2025-07-22.csv
Found 134 existing companies to update.
  PRUDENTIAL PLC                           → 30.13 B USD
  SINGAPORE TECHNOLOGIES ENGINEERING LTD   → 19.40 B USD
  SINGAPORE AIRLINES LIMITED               → 16.63 B USD
  WILMAR INTERNATIONAL LIMITED             → 14.09 B USD
  CAPITALAND INTEGRATED COMMERCIAL TRUST   → 11.91 B USD
  CAPITALAND INVESTMENT LIMITED            → 10.15 B USD
  CAPITALAND ASCENDAS REIT                 → 9.41 B USD
  NIO INC.                                 → 9.35 B USD
  MAPLETREE INDUSTRIAL TRUST               → 4.28 B USD
  OLAM GROUP LIMITED                       → 2.87 B USD
  SIA ENGINEERING COMPANY LIMITED          → 2.76 B USD
  VENTURE CORPORATION LIMITED              → 2.66 B USD
  KEPPEL INFRASTRUCTURE TRUST              → 1.96 B USD
  FIRST RESOURCES LIMITED                  → 1.79 B USD
  IFAST CORPORATION LTD.                   → 1.58 B USD
  CAPITALAND C