In [1]:
"""
DNFileVault Downloader for Windows (Python)
------------------------------------------------------------------
This script downloads ALL files from your DNFileVault account:
1. All Purchases
2. All Groups


It is designed for Windows users and includes explanations for each part.


BEFORE YOU RUN THIS:
1. You need Python installed.
2. You need the 'requests' library.
   Open Command Prompt (cmd.exe) or PowerShell and run:
       pip install requests


HOW TO CONFIGURE:
Scroll down to the "CONFIGURATION" section below and enter your
email, password, and where you want files to go.


TROUBLESHOOTING TIMEOUTS:
DNFileVault has anti-scanner protection. If requests look "bot-like" 
(e.g., default python-requests/curl User-Agent), the API will 
intentionally slow down responses (throttling). 
We fix this by setting a custom User-Agent and using longer timeouts.
"""


import os
import re
import sys
import time
from datetime import datetime


# We use the 'requests' library to talk to the internet (API).
try:
    import requests
except ImportError:
    print("ERROR: The 'requests' library is not installed.")
    print("Please open PowerShell or Command Prompt and run:")
    print("    pip install requests")
    print("")
    input("Press Enter to exit...")
    sys.exit(1)




# ==============================================================================
# CONFIGURATION
# ==============================================================================
# Enter your DNFileVault email and password here.
# NOTE: Keep the quotes "" around your text.


EMAIL = "ernest@predictnow.ai"
PASSWORD = "saby@123"


# Where should the files be saved?
# On Windows, you can use paths like r"C:\Downloads\DNFileVault"
# The 'r' before the quote tells Python to treat backslashes `\` normally.
OUTPUT_FOLDER = r"D:\work\Trade Analysis\Options Data"


# Filter settings (Optional)
# Set DAYS_TO_CHECK to a number (e.g., 1) to only download the newest files (top N).
# For example, set to 1 to only download the very latest file.
# Set to None to download EVERYTHING.
DAYS_TO_CHECK = None


# ==============================================================================




# Constants for the API
BASE_URL = "https://api.dnfilevault.com"
# A custom User-Agent identifies your script and prevents the API from throttling you.
USER_AGENT = "DNFileVaultClient/1.0 (+support@deltaneutral.com)"


def sanitize_filename(name):
    """
    Cleans up a filename so Windows is happy.
    Windows doesn't like characters like: < > : " / \ | ? *
    """
    if not name:
        return "unnamed_file"
    # Replace bad characters with an underscore
    clean = re.sub(r'[<>:"/\\\\|?*]', "_", str(name))
    return clean.strip() or "unnamed_file"


def ensure_folder_exists(folder_path):
    """
    Checks if a folder exists. If not, it creates it.
    """
    if not os.path.exists(folder_path):
        try:
            os.makedirs(folder_path)
            print(f"Created folder: {folder_path}")
        except OSError as e:
            print(f"Error creating folder {folder_path}: {e}")


def login_to_api(session):
    """
    Logs in using the EMAIL and PASSWORD variables.
    Returns the 'token' needed for downloading files.
    """
    print(f"Step 1: Logging in as {EMAIL}...")
    
    login_url = f"{BASE_URL}/auth/login"
    payload = {
        "email": EMAIL,
        "password": PASSWORD
    }
    
    try:
        # Send a POST request to the login page (using 60s timeout for stability)
        response = session.post(login_url, json=payload, timeout=60)
        
        # Check if login worked (Status code 200 means OK)
        if response.status_code == 200:
            data = response.json()
            token = data.get("token")
            print("Login successful!")
            return token
        elif response.status_code == 401:
            print("Login failed: Incorrect email or password. Please check your EMAIL and PASSWORD settings.")
        else:
            print(f"Login failed: Server returned error {response.status_code}")
            print(response.text)
            
    except requests.exceptions.ConnectionError:
        print("Login failed: ERROR - No internet connection or DNS failure. (Could not reach api.dnfilevault.com)")
    except requests.exceptions.Timeout:
        print("Login failed: ERROR - The request timed out. Your connection might be slow or the server is busy.")
    except requests.exceptions.RequestException as e:
        print(f"Login failed: ERROR - A network problem occurred: {e}")
        
    return None


def download_file(session, file_info, save_directory):
    """
    Downloads a single file to the save_directory.
    Checks if file already exists to avoid re-downloading.
    """
    filename = file_info.get("uuid_filename")
    display_name = file_info.get("display_name") or filename
    
    # Clean the name for Windows
    safe_name = sanitize_filename(display_name)
    full_save_path = os.path.join(save_directory, safe_name)
    
    # Check if we already have it
    if os.path.exists(full_save_path):
        # File exists - skip instructions
        # print(f"Skipping (already exists): {safe_name}")
        return


    download_url = f"{BASE_URL}/download/{filename}"
    
    print(f"Downloading: {safe_name}")
    
    try:
        # stream=True allows us to download large files without using all RAM.
        # We use a 300s (5 minute) timeout for downloads to handle large ZIP files.
        with session.get(download_url, stream=True, timeout=300) as r:
            if r.status_code == 200:
                # Write to a temporary file first
                temp_path = full_save_path + ".tmp"
                with open(temp_path, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=1024*1024): # 1 MB chunks
                        if chunk:
                            f.write(chunk)
                
                # Rename temp file to final name
                if os.path.exists(full_save_path):
                    os.remove(full_save_path)
                os.rename(temp_path, full_save_path)
            else:
                print(f"Failed to download {safe_name}. Status: {r.status_code}")
    except Exception as e:
        print(f"Error downloading {safe_name}: {e}")


def main():
    print("--- DNFileVault Windows Downloader ---")
    print(f"Saving files to: {OUTPUT_FOLDER}")
    
    # 1. Start a web session
    session = requests.Session()
    session.headers.update({"User-Agent": USER_AGENT})
    
    # 2. Login
    token = login_to_api(session)
    if not token:
        # If no token, we can't continue
        print("Stopping script due to login failure.")
        input("Press Enter to exit...")
        return


    # Add the token to all future requests
    session.headers.update({"Authorization": f"Bearer {token}"})


    # Prepare the output folder
    ensure_folder_exists(OUTPUT_FOLDER)


    # 3. Download Purchases
    # --------------------------------------------------------------------------
    print("\n--- Checking Purchases ---")
    try:
        resp = session.get(f"{BASE_URL}/purchases", timeout=60)
        data = resp.json()
        purchases = data.get("purchases", [])
    except Exception as e:
        print(f"Error checking purchases: {e}")
        purchases = []
        
    if not purchases:
        print("No purchases found.")


    for p in purchases:
        # Create a folder for each product
        folder_name = f"{p['id']} - {p.get('product_name', 'Unknown')}"
        safe_folder_name = sanitize_filename(folder_name)
        product_path = os.path.join(OUTPUT_FOLDER, "Purchases", safe_folder_name)
        ensure_folder_exists(product_path)
        
        # Get files for this purchase
        try:
            files_resp = session.get(f"{BASE_URL}/purchases/{p['id']}/files", timeout=60)
            files = files_resp.json().get("files", [])
            
            # Determine which files to download
            files_to_download = files
            if DAYS_TO_CHECK is not None:
                # If we have a limit, take only the first N files
                # (Assumes the API returns them sorted newly created -> older)
                files_to_download = files[:DAYS_TO_CHECK]


            for f in files_to_download:
                download_file(session, f, product_path)
                
        except Exception as e:
            print(f"Error getting files for purchase {p['id']}: {e}")


    # 4. Download Groups
    # --------------------------------------------------------------------------
    print("\n--- Checking Groups ---")
    try:
        resp = session.get(f"{BASE_URL}/groups", timeout=60)
        data = resp.json()
        groups = data.get("groups", [])
    except Exception as e:
        print(f"Error checking groups: {e}")
        groups = []


    if not groups:
        print("No groups found.")


    for g in groups:
        # Create a folder for each group
        folder_name = f"{g['id']} - {g.get('name', 'Unknown')}"
        safe_folder_name = sanitize_filename(folder_name)
        group_path = os.path.join(OUTPUT_FOLDER, "Groups", safe_folder_name)
        ensure_folder_exists(group_path)
        
        # Get files for this group
        try:
            files_resp = session.get(f"{BASE_URL}/groups/{g['id']}/files", timeout=60)
            files = files_resp.json().get("files", [])
            
            # Determine which files to download
            files_to_download = files
            if DAYS_TO_CHECK is not None:
                # If we have a limit, take only the first N files
                files_to_download = files[:DAYS_TO_CHECK]


            for f in files_to_download:
                download_file(session, f, group_path)
                
        except Exception as e:
            print(f"Error getting files for group {g['id']}: {e}")


    print("\n------------------------------------------------------------------")
    print("All done!")
    print(f"Check your files in: {OUTPUT_FOLDER}")
    # Pause so the user can see the message if they double-clicked the script
    input("Press Enter to close this window...")


if __name__ == "__main__":
    main()


--- DNFileVault Windows Downloader ---
Saving files to: D:\work\Trade Analysis\Options Data
Step 1: Logging in as ernest@predictnow.ai...
Login successful!

--- Checking Purchases ---
No purchases found.

--- Checking Groups ---
Created folder: D:\work\Trade Analysis\Options Data\Groups\2 - eodLevel3
Downloading: L3_20260123.zip
Downloading: L3_20260122.zip
Downloading: L3_20260121.zip
Downloading: L3_20260120.zip
Downloading: L3_20260116.zip
Downloading: L3_20260115.zip
Downloading: L3_20260114.zip
Downloading: L3_20260113.zip
Downloading: L3_20260112.zip
Downloading: L3_20260109.zip
Downloading: L3_20260108.zip
Downloading: L3_20260107.zip
Downloading: L3_20260106.zip
Downloading: L3_20260105.zip
Downloading: L3_20260102.zip
Downloading: L3_20251231.zip
Downloading: L3_20251230.zip
Downloading: L3_20251229.zip
Downloading: L3_20251226.zip
Downloading: L3_20251224.zip
Downloading: L3_20251223.zip
Downloading: L3_20251222.zip
Downloading: L3_20251219.zip
Downloading: L3_20251218.zip
Do

In [1]:
import os, re, zipfile
import pandas as pd
import numpy as np

# =========================
# CONFIG
# =========================
BASE_DIR = r"D:\work\Trade Analysis\Options Data\Groups\2 - eodLevel3"
XAUUSD_CSV = r"D:\work\Trade Analysis\XAUUSD.csv"   # <-- change this path on your machine
LOOKBACK_DAYS = 90
CHUNKSIZE = 600_000

# if your xau file uses different column names, set here:
XAU_DATE_COL = "date"
XAU_CLOSE_COL = "close"

OUT_CSV = os.path.join(BASE_DIR, "_stoploss_wall_output", "underlying_match_to_XAUUSD.csv")
os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)

# =========================
# HELPERS
# =========================
def list_l3_zips(base_dir: str):
    items = []
    for name in os.listdir(base_dir):
        m = re.match(r"^L3[_\-](\d{8})\.zip$", name, re.IGNORECASE)
        if m:
            items.append((m.group(1), os.path.join(base_dir, name)))
    items.sort(key=lambda x: x[0])
    return items

def pick_main_member(zpath: str) -> str:
    with zipfile.ZipFile(zpath, "r") as z:
        members = [m for m in z.infolist() if not m.is_dir()]
        members = [m for m in members if re.search(r"\.(csv|tsv|txt)$", m.filename, re.I)]
        if not members:
            raise FileNotFoundError(f"No csv/tsv/txt inside zip: {zpath}")
        option_like = [m for m in members if re.search(r"option", os.path.basename(m.filename), re.I)]
        pool = option_like if option_like else members
        pool.sort(key=lambda m: m.file_size, reverse=True)
        return pool[0].filename

def sniff_delim(first_line: str) -> str:
    return "\t" if first_line.count("\t") > first_line.count(",") else ","

# =========================
# LOAD XAUUSD DAILY SERIES
# =========================
xau = pd.read_csv(XAUUSD_CSV)
xau[XAU_DATE_COL] = pd.to_datetime(xau[XAU_DATE_COL], errors="coerce")
xau["DataDate"] = xau[XAU_DATE_COL].dt.date
xau["DataDate"] = pd.to_datetime(xau["DataDate"])
xau_daily = xau.groupby("DataDate")[XAU_CLOSE_COL].last().reset_index()
xau_daily = xau_daily.rename(columns={XAU_CLOSE_COL: "XAUUSD_Close"}).sort_values("DataDate")

# =========================
# BUILD DAILY UNDERLYING PRICE FOR EACH SYMBOL FROM L3
# =========================
zips = list_l3_zips(BASE_DIR)[-LOOKBACK_DAYS:]
all_daily = []

for date_str, zpath in zips:
    d = pd.to_datetime(date_str, format="%Y%m%d")
    member = pick_main_member(zpath)

    with zipfile.ZipFile(zpath, "r") as z:
        with z.open(member) as f:
            first_line = f.readline().decode("utf-8", errors="ignore")
        delim = sniff_delim(first_line)

        with z.open(member) as f:
            reader = pd.read_csv(
                f, sep=delim,
                usecols=lambda c: c.strip() in ["UnderlyingSymbol", "UnderlyingPrice"],
                chunksize=CHUNKSIZE, low_memory=True
            )

            # accumulate per-symbol prices for that day
            sym_prices = []
            for chunk in reader:
                # normalize column names
                chunk.columns = [c.strip() for c in chunk.columns]
                if "UnderlyingSymbol" not in chunk.columns or "UnderlyingPrice" not in chunk.columns:
                    continue

                chunk["UnderlyingSymbol"] = chunk["UnderlyingSymbol"].astype(str).str.strip()
                chunk["UnderlyingPrice"] = pd.to_numeric(chunk["UnderlyingPrice"], errors="coerce")
                chunk = chunk.dropna(subset=["UnderlyingPrice"])

                # median per symbol (robust if there are any odd rows)
                day_med = chunk.groupby("UnderlyingSymbol")["UnderlyingPrice"].median().reset_index()
                sym_prices.append(day_med)

            if sym_prices:
                day_df = pd.concat(sym_prices, ignore_index=True)
                # if duplicates across chunks, median again
                day_df = day_df.groupby("UnderlyingSymbol")["UnderlyingPrice"].median().reset_index()
                day_df["DataDate"] = d.date()
                all_daily.append(day_df)

if not all_daily:
    raise SystemExit("No underlying price data extracted from zips. Check file columns/delimiter.")

u = pd.concat(all_daily, ignore_index=True)
u["DataDate"] = pd.to_datetime(u["DataDate"])
u = u.sort_values(["UnderlyingSymbol", "DataDate"])

# =========================
# SCORE EACH SYMBOL VS XAUUSD
# =========================
scores = []
for sym, g in u.groupby("UnderlyingSymbol"):
    m = g.merge(xau_daily, on="DataDate", how="inner")
    if len(m) < 15:
        continue

    # return correlation (most important)
    m["u_ret"] = np.log(m["UnderlyingPrice"] / m["UnderlyingPrice"].shift(1))
    m["x_ret"] = np.log(m["XAUUSD_Close"] / m["XAUUSD_Close"].shift(1))
    corr_ret = m[["u_ret", "x_ret"]].corr().iloc[0, 1]

    # level ratio stability (secondary)
    ratio = (m["XAUUSD_Close"] / m["UnderlyingPrice"]).replace([np.inf, -np.inf], np.nan).dropna()
    ratio_std = ratio.std() if len(ratio) else np.nan
    ratio_med = ratio.median() if len(ratio) else np.nan

    scores.append({
        "UnderlyingSymbol": sym,
        "OverlapDays": len(m),
        "RetCorr_with_XAUUSD": corr_ret,
        "MedianScale_XAUUSD_div_Underlying": ratio_med,
        "ScaleStd": ratio_std,
        "LastUnderlyingPrice": m["UnderlyingPrice"].iloc[-1],
        "LastXAUUSD": m["XAUUSD_Close"].iloc[-1],
    })

scores_df = pd.DataFrame(scores)
scores_df = scores_df.sort_values(["RetCorr_with_XAUUSD", "OverlapDays"], ascending=[False, False])

scores_df.to_csv(OUT_CSV, index=False)
print("Saved ranking to:", OUT_CSV)
print("\nTOP 15 candidates:")
print(scores_df.head(15).to_string(index=False))


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Saved ranking to: D:\work\Trade Analysis\Options Data\Groups\2 - eodLevel3\_stoploss_wall_output\underlying_match_to_XAUUSD.csv

TOP 15 candidates:
UnderlyingSymbol  OverlapDays  RetCorr_with_XAUUSD  MedianScale_XAUUSD_div_Underlying  ScaleStd  LastUnderlyingPrice  LastXAUUSD
             UGL           34             0.952886                          76.368341  2.674075                71.64     4959.03
             GLD           34             0.951281                          10.877538  0.030493               451.89     4959.03
             IAU           34             0.950802                          53.112509  0.149589                92.56     4959.03
            SGOL           34             0.950616                         104.959969  0.292273                46.86     4959.03
            OUNZ           34             0.948353                         103.942970  0.299100                47.31     4959.03
            GBUG           23             0.936928                          97