In [1]:
!pip install yfinance
!pip install tqdm 




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
# --------------------------------------------------------------------------------
# Google Colab ready: Multi-studio historical market-cap aggregator by YEAR (CSV + XML)
# - Reads studios.txt (format: "Name, TICKER" per line; supports TYO:, NASDAQ:, NYSE:)
# - Uses yfinance to fetch history and sharesOutstanding
# - Computes daily market cap (Close * sharesOutstanding) when possible
# - Attempts to convert to USD using FX tickers like "JPY=X" or "<CUR>USD=X"
# - Aggregates by YEAR using last trading-day of each year (gives year-end market cap)
# - Outputs yearly_values.csv and yearly_values.xml
# --------------------------------------------------------------------------------



import yfinance as yf
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET
from tqdm import tqdm
from datetime import datetime
import time
import re
import os

# -----------------------
# Helper functions
# -----------------------
def normalize_ticker(raw_ticker):
    """Normalize user-style tickers to yfinance/Yahoo format.
       Accept examples: 'TYO:4816', 'NASDAQ:NFLX', 'NYSE:DIS', '4816.T', 'NFLX'
    """
    raw = raw_ticker.strip()
    if ':' in raw:
        prefix, code = raw.split(':', 1)
        prefix = prefix.strip().upper()
        code = code.strip()
        if prefix in ('TYO', 'TSE', 'JPX', 'TOKYO'):
            # Yahoo uses .T for Tokyo Exchange
            return f"{code}.T"
        elif prefix in ('NASDAQ', 'NAS'):
            return code
        elif prefix in ('NYSE',):
            return code
        elif prefix in ('OTC',):
            return code  # why not
        else:
            # fallback: just return the code
            return code
    else:
        # already maybe in Yahoo format; return raw
        return raw

def read_studios_file(path="studios.txt"):
    """Reads studios.txt and returns list of (name, raw_ticker) for lines that have tickers."""
    entries = []
    if not os.path.exists(path):
        raise FileNotFoundError(f"{path} not found. Upload your studios.txt to Colab filesystem.")
    with open(path, 'r', encoding='utf-8') as f:
        for raw in f:
            line = raw.strip()
            if not line or line.startswith('#'):
                continue
            # expect "Name, TICKER"
            if ',' not in line:
                # skip lines without a comma
                continue
            name, ticker = line.split(',', 1)
            name = name.strip()
            ticker = ticker.strip()
            # ignore commented-in-line
            if ticker.startswith('#') or ticker == '':
                continue
            entries.append((name, ticker))
    return entries

def attempt_fx_conversion_series(series_dates, from_currency):
    """Attempt to fetch FX series to convert given currency to USD.
       Returns a pd.Series indexed by date with FX rate (units: 1 USD = rate in that currency) OR
       returns series of USD-per-unit (i.e., multiplier to convert currency -> USD).
       
       Yahoo FX tickers examples:
         USDJPY pair ticker on Yahoo: 'JPY=X' returns USD/JPY? Historically 'JPY=X' = USDJPY (i.e., 1 USD = X JPY)
       We'll fetch <CUR>USD=X (e.g., 'JPYUSD=X') often fails, so strategy:
         - If from_currency == 'JPY': use 'JPY=X' -> this gives USD/JPY (1 USD = X JPY) => to convert JPY -> USD, divide JPY_amount by (USD/JPY)
         - For other currencies try '<CUR>=X' or '<CUR>USD=X' patterns; fallback = None
    """
    cur = from_currency.upper()
    # If already USD, return ones
    if cur in ('USD', 'US$'):
        return pd.Series(1.0, index=series_dates)
    # Known mapping: for JPY use 'JPY=X' (Yahoo returns USD/JPY, value ~110 meaning 1 USD = 110 JPY)
    # To convert JPY_amount to USD: USD = JPY_amount / (USD/JPY rate)
    try_candidates = []
    if cur == 'JPY':
        try_candidates = ['JPY=X']
    else:
        # try EURUSD=X, GBPUSD=X, etc.
        try_candidates = [f"{cur}USD=X", f"{cur}=X"]
    for fx in try_candidates:
        try:
            ticker_fx = yf.Ticker(fx)
            hist_fx = ticker_fx.history(period="max", auto_adjust=False)
            if hist_fx is None or hist_fx.empty:
                continue
            # use Close as FX rate
            fx_close = hist_fx['Close'].rename('FX_Close')
            # reindex to given series_dates with forward/backfill
            fx_close = fx_close.reindex(series_dates.union(fx_close.index)).sort_index().ffill().reindex(series_dates)
            return fx_close
        except Exception as e:
            # print("FX fetch fail for", fx, e)
            continue
    # If we get here, failed to fetch FX
    return None

# -----------------------
# Main processing
# -----------------------

def main():
    # read studios.txt from working dir
    entries = read_studios_file("studios.txt")
    if not entries:
        print("No valid entries found in studios.txt. Please add lines like 'Toei Animation, TYO:4816'")
        return

    print(f"Found {len(entries)} tickers to process.")
    # build mapping of normalized ticker to (name, raw_ticker)
    normalized = []
    for name, raw_t in entries:
        ytick = normalize_ticker(raw_t)
        normalized.append((name, raw_t, ytick))
    # iterate and fetch using yfinance
    per_company_yearly = {}   # dict name -> DataFrame with index=year, columns=['market_cap_USD'/'market_cap_RAW']
    company_currency = {}     # name -> currency string
    failed_companies = []
    all_years = set()

    for (name, raw, ytick) in tqdm(normalized, desc="Tickers"):
        time.sleep(0.5)  # be polite
        try:
            print(f"\nProcessing {name} ({raw}) -> yfinance ticker: {ytick}")
            tk = yf.Ticker(ytick)
            info = {}
            try:
                info = tk.info
            except Exception:
                # sometimes .info is rate-limited; fallback to light approach
                info = {}
            currency = info.get('currency', None)
            shares_out = info.get('sharesOutstanding', None)  # may be None
            # fetch history
            hist = tk.history(period="max", auto_adjust=False)  # keep raw close
            if hist is None or hist.empty:
                print(f"  WARNING: no historical data for {ytick}. Skipping.")
                failed_companies.append((name, ytick, "no-history"))
                continue
            # ensure Date index is timezone-naive date index
            hist = hist.sort_index()
            # fix column names
            if 'Close' not in hist.columns and 'close' in hist.columns:
                hist['Close'] = hist['close']
            # compute daily market cap if shares available
            if shares_out and isinstance(shares_out, (int, float)) and shares_out > 0:
                hist['MarketCap_RAW'] = hist['Close'] * shares_out
                hist['MarketCap_raw_unit'] = currency or 'UNKNOWN'
                used_marketcap = True
            else:
                # fallback: we do not have sharesOutstanding; use Close as proxy (not market cap)
                hist['MarketCap_RAW'] = hist['Close']
                hist['MarketCap_raw_unit'] = currency or 'UNKNOWN'
                used_marketcap = False
                print(f"  NOTE: sharesOutstanding missing for {ytick}. Using Close price as proxy (unit={currency}).")
            # attempt to convert to USD if currency is not USD
            series_dates = hist.index
            usd_series = None
            if currency and currency.upper() != 'USD':
                fx = attempt_fx_conversion_series(series_dates, currency)
                if fx is None:
                    print(f"  WARNING: could not fetch FX pair to convert {currency} -> USD for {name}. Values remain in {currency}.")
                    hist['MarketCap_USD'] = np.nan
                    converted = False
                else:
                    # special handling for JPY: fx (JPY=X) typically gives USD/JPY (1 USD = X JPY)
                    # To convert JPY_amount -> USD: USD = JPY_amount / (USD/JPY rate)
                    if currency.upper() == 'JPY':
                        hist['MarketCap_USD'] = hist['MarketCap_RAW'] / fx.reindex(hist.index).ffill()
                    else:
                        # assume fx series is direct rate currency->USD (e.g., EURUSD = 1.1 meaning 1 EUR = 1.1 USD).
                        # If fx is USD/CUR (unlikely), results may be wrong — user should verify.
                        hist['MarketCap_USD'] = hist['MarketCap_RAW'] * fx.reindex(hist.index).ffill()
                    converted = True
            else:
                # already USD
                hist['MarketCap_USD'] = hist['MarketCap_RAW']
                converted = True

            # get yearly last value (use last trading day of each year)
            hist['Year'] = hist.index.year
            yearly = hist.groupby('Year').agg({
                'MarketCap_RAW': 'last',
                'MarketCap_USD': 'last'
            }).rename(columns={'MarketCap_RAW': 'RawValue', 'MarketCap_USD': 'USDValue'})

            # store
            per_company_yearly[name] = yearly
            company_currency[name] = currency or 'UNKNOWN'
            all_years.update(yearly.index.tolist())
        except Exception as e:
            print(f"  ERROR processing {name} ({ytick}): {e}")
            failed_companies.append((name, ytick, str(e)))
            continue

    if not per_company_yearly:
        print("No company data collected. Exiting.")
        return

    # Build master DataFrame with columns: Year, TotalValue_USD, Company1_USD, Company2_USD, ...
    years = sorted(list(all_years))
    master = pd.DataFrame(index=years)
    master.index.name = 'Year'
    # per-company columns
    for name, yearly in per_company_yearly.items():
        # Ensure index includes all years; reindex and keep NaN where missing
        col_usd = yearly['USDValue'].reindex(years)
        master[f"{name}"] = col_usd

    # compute TotalValue_USD as sum of companies with USD available (skip NaNs)
    master['TotalValue_USD'] = master.sum(axis=1, numeric_only=True)

    # Save CSV
    csv_name = "yearly_values.csv"
    master.reset_index().to_csv(csv_name, index=False, float_format="%.2f")
    print(f"\nSaved CSV: {csv_name}")

    # Save XML
    root = ET.Element("YearlyValues")
    for yr in master.index:
        yel = ET.SubElement(root, "Year", attrib={"value": str(int(yr))})
        total_el = ET.SubElement(yel, "TotalValue_USD")
        total_val = master.at[yr, 'TotalValue_USD']
        total_el.text = "" if pd.isna(total_val) else f"{float(total_val):.2f}"
        # add each company
        for name in per_company_yearly.keys():
            comp_el = ET.SubElement(yel, "Company", attrib={"name": name})
            val = master.at[yr, name]
            if pd.isna(val):
                comp_el.text = ""
                comp_el.set("unit", company_currency.get(name, "UNKNOWN"))
                comp_el.set("converted_to_usd", "false")
            else:
                comp_el.text = f"{float(val):.2f}"
                comp_el.set("unit", "USD")
                comp_el.set("converted_to_usd", "true")
    tree = ET.ElementTree(root)
    xml_name = "yearly_values.xml"
    tree.write(xml_name, encoding="utf-8", xml_declaration=True)
    print(f"Saved XML: {xml_name}")

    # Summary
    print("\nDone. Summary:")
    print(f"  Companies processed: {len(per_company_yearly)}")
    if failed_companies:
        print(f"  Failed companies: {len(failed_companies)} (see printed warnings above)")
    print("  Output files: yearly_values.csv, yearly_values.xml")
    print("\nNotes:")
    print(" - Values converted to USD when possible using Yahoo FX tickers (e.g., 'JPY=X').")
    print(" - If sharesOutstanding was missing for a company, the script used Close price as a proxy (not a true market cap).")
    print(" - Mixed-currency companies may not be included in TotalValue_USD if FX conversion failed.")
    print("\nPlease check the CSV for per-company currency/unit issues before using totals for analysis.")

# Run
if __name__ == "__main__":
    main()


Found 39 tickers to process.


Tickers:   0%|          | 0/39 [00:00<?, ?it/s]


Processing Toei Animation (TYO:4816) -> yfinance ticker: 4816.T


Tickers:   3%|▎         | 1/39 [00:03<02:17,  3.61s/it]


Processing IG Port (Production I.G (Wit Studio, Signal.MD), TYO:3791) -> yfinance ticker: 3791


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: 3791"}}}
$3791: possibly delisted; no timezone found
Tickers:   5%|▌         | 2/39 [00:05<01:44,  2.83s/it]


Processing Bandai Namco Holdings (TYO:7832) -> yfinance ticker: 7832.T


Tickers:   8%|▊         | 3/39 [00:08<01:35,  2.65s/it]


Processing Sony Group (Aniplex (Crunchyroll), TYO:6758) -> yfinance ticker: 6758


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: 6758"}}}
$6758: possibly delisted; no timezone found
Tickers:  10%|█         | 4/39 [00:10<01:27,  2.50s/it]


Processing Kadokawa Corporation (TYO:9468) -> yfinance ticker: 9468.T


Tickers:  13%|█▎        | 5/39 [00:13<01:24,  2.47s/it]


Processing Avex Group (Anime Music / Production) (TYO:7860) -> yfinance ticker: 7860.T


Tickers:  15%|█▌        | 6/39 [00:15<01:18,  2.37s/it]


Processing Bushiroad (Media Mix / Anime IP) (TYO:7815) -> yfinance ticker: 7815.T


Tickers:  18%|█▊        | 7/39 [00:17<01:12,  2.26s/it]


Processing CyberAgent (Owner of Abema (supports anime), TYO:4751) -> yfinance ticker: 4751


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: 4751"}}}
$4751: possibly delisted; no timezone found
Tickers:  21%|██        | 8/39 [00:19<01:08,  2.20s/it]


Processing TV Tokyo Holdings (TYO:9413) -> yfinance ticker: 9413.T


Tickers:  23%|██▎       | 9/39 [00:21<01:05,  2.17s/it]


Processing Fuji Media Holdings (TYO:4676) -> yfinance ticker: 4676.T


Tickers:  26%|██▌       | 10/39 [00:23<01:03,  2.18s/it]


Processing Nippon TV Holdings (Owns Studio Ghibli) (TYO:9404) -> yfinance ticker: 9404.T


Tickers:  28%|██▊       | 11/39 [00:25<01:02,  2.23s/it]


Processing Tokyo Broadcasting System (TBS) (TYO:9401) -> yfinance ticker: 9401.T


Tickers:  31%|███       | 12/39 [00:28<01:01,  2.28s/it]


Processing Asahi Broadcasting Group (TYO:9405) -> yfinance ticker: 9405.T


Tickers:  33%|███▎      | 13/39 [00:30<00:59,  2.28s/it]


Processing WOWOW (Anime channel) (TYO:4839) -> yfinance ticker: 4839.T


Tickers:  36%|███▌      | 14/39 [00:33<00:59,  2.37s/it]


Processing Toho Co. (Ltd., TYO:9602) -> yfinance ticker: 9602


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: 9602"}}}
$9602: possibly delisted; no timezone found
Tickers:  38%|███▊      | 15/39 [00:35<00:54,  2.28s/it]


Processing Shochiku Co. (Ltd., TYO:9601) -> yfinance ticker: 9601


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: 9601"}}}
$9601: possibly delisted; no timezone found
Tickers:  41%|████      | 16/39 [00:37<00:51,  2.22s/it]


Processing Kadokawa Pictures (covered by Kadokawa) (TYO:9468) -> yfinance ticker: 9468.T


Tickers:  44%|████▎     | 17/39 [00:39<00:49,  2.24s/it]


Processing Happinet (anime distributor) (TYO:7552) -> yfinance ticker: 7552.T


Tickers:  46%|████▌     | 18/39 [00:41<00:46,  2.22s/it]


Processing Shueisha (part of Hitotsubashi Group (not public)) -> yfinance ticker: not public)


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: NOT PUBLIC)"}}}
$NOT PUBLIC): possibly delisted; no timezone found
Tickers:  49%|████▊     | 19/39 [00:43<00:44,  2.20s/it]


Processing Square Enix Holdings (TYO:9684) -> yfinance ticker: 9684.T


Tickers:  51%|█████▏    | 20/39 [00:46<00:42,  2.26s/it]


Processing Media Do (digital manga) (TYO:3678) -> yfinance ticker: 3678.T


Tickers:  54%|█████▍    | 21/39 [00:48<00:38,  2.16s/it]


Processing Takara Tomy (TYO:7867) -> yfinance ticker: 7867.T


Tickers:  56%|█████▋    | 22/39 [00:50<00:37,  2.20s/it]


Processing Sanrio (Hello Kitty (anime collabs), TYO:8136) -> yfinance ticker: 8136


$8136: possibly delisted; no timezone found
Tickers:  59%|█████▉    | 23/39 [00:52<00:32,  2.01s/it]


Processing Broccoli Co. (Merch & anime goods) (TYO:2706) -> yfinance ticker: 2706.T


$2706.T: possibly delisted; no timezone found
Tickers:  62%|██████▏   | 24/39 [00:54<00:29,  1.96s/it]


Processing King Records (Starchild) (Owned by Kodansha (not public)) -> yfinance ticker: not public)


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: NOT PUBLIC)"}}}
$NOT PUBLIC): possibly delisted; no timezone found
Tickers:  64%|██████▍   | 25/39 [00:55<00:25,  1.83s/it]


Processing Pony Canyon (TYO:4763) -> yfinance ticker: 4763.T


Tickers:  67%|██████▋   | 26/39 [00:57<00:24,  1.88s/it]


Processing Netflix (Anime Production) (NASDAQ:NFLX) -> yfinance ticker: NFLX


Tickers:  69%|██████▉   | 27/39 [00:59<00:22,  1.85s/it]


Processing Amazon (Anime Licensing) (NASDAQ:AMZN) -> yfinance ticker: AMZN


Tickers:  72%|███████▏  | 28/39 [01:01<00:20,  1.82s/it]


Processing Disney (Anime streaming) (NYSE:DIS) -> yfinance ticker: DIS


Tickers:  74%|███████▍  | 29/39 [01:03<00:20,  2.02s/it]


Processing Rakuten (Rakuten TV) (TYO:4755) -> yfinance ticker: 4755.T


Tickers:  77%|███████▋  | 30/39 [01:05<00:18,  2.05s/it]


Processing Sega Sammy Holdings (TYO:6460) -> yfinance ticker: 6460.T


Tickers:  79%|███████▉  | 31/39 [01:07<00:16,  2.12s/it]


Processing Konami Group (TYO:9766) -> yfinance ticker: 9766.T


Tickers:  82%|████████▏ | 32/39 [01:10<00:14,  2.12s/it]


Processing Capcom (TYO:9697) -> yfinance ticker: 9697.T


Tickers:  85%|████████▍ | 33/39 [01:12<00:12,  2.17s/it]


Processing Koei Tecmo (TYO:3635) -> yfinance ticker: 3635.T


Tickers:  87%|████████▋ | 34/39 [01:14<00:11,  2.23s/it]


Processing NTT Docomo (Anime collaborations) (TYO:9437) -> yfinance ticker: 9437.T


$9437.T: possibly delisted; no timezone found
Tickers:  90%|████████▉ | 35/39 [01:16<00:08,  2.11s/it]


Processing SoftBank Group (TYO:9984) -> yfinance ticker: 9984.T


Tickers:  92%|█████████▏| 36/39 [01:18<00:06,  2.18s/it]


Processing LINE Yahoo Japan (Z Holdings) (TYO:4689) -> yfinance ticker: 4689.T


Tickers:  95%|█████████▍| 37/39 [01:21<00:04,  2.23s/it]


Processing GREE (anime mobile games) (TYO:3632) -> yfinance ticker: 3632.T


Tickers:  97%|█████████▋| 38/39 [01:23<00:02,  2.23s/it]


Processing DeNA (anime mobile games) (TYO:2432) -> yfinance ticker: 2432.T


Tickers: 100%|██████████| 39/39 [01:25<00:00,  2.19s/it]


Saved CSV: yearly_values.csv
Saved XML: yearly_values.xml

Done. Summary:
  Companies processed: 29
  Output files: yearly_values.csv, yearly_values.xml

Notes:
 - Values converted to USD when possible using Yahoo FX tickers (e.g., 'JPY=X').
 - If sharesOutstanding was missing for a company, the script used Close price as a proxy (not a true market cap).
 - Mixed-currency companies may not be included in TotalValue_USD if FX conversion failed.

Please check the CSV for per-company currency/unit issues before using totals for analysis.





In [None]:
#My Anime List scarping script to get all animes by year including their mean and their number of

In [1]:
import requests
import time
import pandas as pd

all_anime = []
page = 1

print("Downloading entire MyAnimeList anime database...\n")

while True:
    url = f"https://api.jikan.moe/v4/anime?page={page}"
    print(f"Fetching page {page} ...")

    response = requests.get(url)

    # Avoid rate limits
    if response.status_code == 429:
        print("Rate limited, waiting 2 seconds...")
        time.sleep(2)
        continue

    data = response.json()

    if "data" not in data or len(data["data"]) == 0:
        print("\n✔ Finished downloading all anime!")
        break

    # Extract relevant info
    for anime in data["data"]:
        title = anime.get("title")
        score = anime.get("score")
        year = anime.get("year")   # None if unknown

        all_anime.append({
            "title": title,
            "score": score,
            "year": year
        })

    page += 1
    time.sleep(1.1)  # Jikan rate limit

# Convert to DataFrame
df = pd.DataFrame(all_anime)

# Clean data
df = df.dropna(subset=["year"])   # remove entries without year

# Group by year
result = df.groupby("year").agg(
    anime_count=("title", "count"),
    mean_score=("score", "mean")
).reset_index()

# Save results
df.to_csv("all_anime_raw.csv", index=False)
result.to_csv("anime_year_stats.csv", index=False)

print("\n✔ Saved:")
print(" - all_anime_raw.csv (full dataset)")
print(" - anime_year_stats.csv (yearly stats)")
print(result)


Downloading entire MyAnimeList anime database...

Fetching page 1 ...
Fetching page 2 ...
Fetching page 3 ...
Fetching page 4 ...
Fetching page 5 ...
Fetching page 6 ...
Fetching page 7 ...
Fetching page 8 ...
Fetching page 9 ...
Fetching page 10 ...
Fetching page 11 ...
Fetching page 12 ...
Fetching page 13 ...
Fetching page 14 ...
Fetching page 15 ...
Fetching page 16 ...
Fetching page 17 ...
Fetching page 18 ...
Fetching page 19 ...
Fetching page 20 ...
Fetching page 21 ...
Fetching page 22 ...
Fetching page 23 ...
Fetching page 24 ...
Fetching page 25 ...
Fetching page 26 ...
Fetching page 27 ...
Fetching page 28 ...
Fetching page 29 ...
Fetching page 30 ...
Fetching page 31 ...
Fetching page 32 ...
Fetching page 33 ...
Fetching page 34 ...
Fetching page 35 ...
Fetching page 36 ...
Fetching page 37 ...
Fetching page 38 ...
Fetching page 39 ...
Fetching page 40 ...
Fetching page 41 ...
Fetching page 42 ...
Fetching page 43 ...
Fetching page 44 ...
Fetching page 45 ...
Fetching page 

In [18]:

!pip install scipy

In [25]:
import pandas as pd
import numpy as np
import math

# -----------------------------
# 1. Load datasets
# -----------------------------
anime = pd.read_csv("anime_year_stats.csv")       # columns: year, anime_count, mean_score
market = pd.read_csv("yearly_values.csv")        # columns: Year, TotalValue_USD, etc.

# Ensure column names match
anime.rename(columns={"year": "Year"}, inplace=True)

# Merge on Year
df = anime.merge(market, on="Year", how="inner")

# -----------------------------
# 2. Compute Pearson correlation manually
# -----------------------------
def pearson_corr(x, y):
    x_mean = np.mean(x)
    y_mean = np.mean(y)
    numerator = np.sum((x - x_mean) * (y - y_mean))
    denominator = np.sqrt(np.sum((x - x_mean)**2) * np.sum((y - y_mean)**2))
    r = numerator / denominator
    return r

# -----------------------------
# 3. Compute two-tailed p-value from r
# -----------------------------
def pearson_p_value(r, n):
    if n < 3:
        return float('nan')
    t_stat = r * math.sqrt((n - 2) / (1 - r**2))
    # two-tailed p-value using normal approximation
    p = 2 * (1 - 0.5 * (1 + math.erf(abs(t_stat) / math.sqrt(2))))
    return p

# -----------------------------
# 4. Compute correlations and p-values
# -----------------------------
n = len(df)

# Mean score vs market value
r_score = pearson_corr(df["mean_score"], df["TotalValue_USD"])
p_score = pearson_p_value(r_score, n)

# Anime count vs market value
r_count = pearson_corr(df["anime_count"], df["TotalValue_USD"])
p_count = pearson_p_value(r_count, n)

# -----------------------------
# 5. Print results
# -----------------------------
print("=== CORRELATION RESULTS ===")
print(f"Mean Score ↔ Market Value:  r = {r_score:.4f}, p = {p_score:.4e}")
print(f"Anime Count ↔ Market Value: r = {r_count:.4f}, p = {p_count:.4e}")

# -----------------------------
# 6. Save results to file (UTF-8)
# -----------------------------
with open("correlation_results.txt", "w", encoding="utf-8") as f:
    f.write("=== CORRELATION RESULTS ===\n\n")
    f.write(f"Mean Score ↔ Market Value\n  r = {r_score:.4f}\n  p = {p_score:.4e}\n\n")
    f.write(f"Anime Count ↔ Market Value\n  r = {r_count:.4f}\n  p = {p_count:.4e}\n")

print("\nSaved results to correlation_results.txt")



=== CORRELATION RESULTS ===
Mean Score ↔ Market Value:  r = 0.3356, p = 5.0327e-03
Anime Count ↔ Market Value: r = 0.7723, p = 0.0000e+00

Saved results to correlation_results.txt
