In [4]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from itertools import permutations
import os
from concurrent.futures import ThreadPoolExecutor, as_completed

# Check if the date is a weekday (Monday = 0, Sunday = 6)
def is_weekday(date):
    return date.weekday() < 5  # 0-4 are weekdays (Monday to Friday)

# Get the most recent weekday if current day is weekend
def get_latest_weekday():
    today = datetime.today()
    if today.weekday() >= 5:  # If it's weekend
        # Calculate days to subtract to get to Friday
        days_to_subtract = today.weekday() - 4
        return today - timedelta(days=days_to_subtract)
    else:
        return today - timedelta(days=1)  # Yesterday if today is a weekday

currencies = [
    'USD', 'EUR', 'JPY', 'GBP', 'CNY', 'AUD', 'CAD', 'CHF', 'HKD', 'SGD',
    'KRW', 'NOK', 'NZD', 'SEK', 'MXN', 'INR', 'RUB', 'ZAR', 'BRL', 'TRY'
]

# All permutations of 2 currencies (e.g., USD/EUR and EUR/USD)
currency_pairs = list(permutations(currencies, 2))

# Convert pairs to symbols (no exclusions)
symbols = [f"{base}{quote}=X" for base, quote in currency_pairs]

print(f"Processing {len(symbols)} currency pairs")

# Get the latest weekday date
latest_weekday = get_latest_weekday()

# Only proceed if it's a weekday
if not is_weekday(latest_weekday):
    print(f"Error: The calculated date ({latest_weekday.strftime('%Y-%m-%d')}) is not a weekday.")
    print("Please check the date logic in the code.")
    exit(1)

# 1-minute data for the latest weekday
interval = '1m'
start_date = latest_weekday.replace(hour=0, minute=0, second=0, microsecond=0)
end_date = start_date + timedelta(days=1)

start_str = start_date.strftime('%Y-%m-%d')
end_str = end_date.strftime('%Y-%m-%d')

print(f"Downloading data for: {start_str} (a {start_date.strftime('%A')})")

# Output directory
os.makedirs("forex_data", exist_ok=True)

# Shared list for invalid pairs
invalid_pairs = []

def fetch_and_save(symbol):
    try:
        print(f"Fetching {symbol}...")
        data = yf.download(
            tickers=symbol,
            start=start_str,
            end=end_str,
            interval=interval,
            progress=False
        )

        if data.empty:
            print(f"❌ No data for {symbol}")
            return symbol  # Return invalid symbol

        filename = f"forex_data/{symbol}_{start_str}.csv"
        data.to_csv(filename)
        print(f"✅ Saved {symbol} to {filename}")
        return None  # Success

    except Exception as e:
        print(f"⚠️ Error fetching {symbol}: {e}")
        return symbol  # Return as invalid

# Run in parallel
max_threads = min(50, len(symbols))  # Avoid hammering the server
with ThreadPoolExecutor(max_workers=max_threads) as executor:
    future_to_symbol = {executor.submit(fetch_and_save, symbol): symbol for symbol in symbols}
    for future in as_completed(future_to_symbol):
        result = future.result()
        if result:
            invalid_pairs.append(result)

# Summary
print(f"\nFinished. {len(invalid_pairs)} pairs had no data.")
if invalid_pairs:
    print("Invalid pairs:", invalid_pairs)
    
# Optional: Save the list of invalid pairs for future reference
if invalid_pairs:
    with open("invalid_forex_pairs.txt", "w") as f:
        for pair in sorted(invalid_pairs):
            f.write(f"{pair}\n")
    print(f"Saved list of invalid pairs to invalid_forex_pairs.txt")

Processing 380 currency pairs
Downloading data for: 2025-04-18 (a Friday)
Fetching USDEUR=X...
Fetching USDJPY=X...
Fetching USDGBP=X...
Fetching USDCNY=X...
Fetching USDAUD=X...
Fetching USDCAD=X...
Fetching USDCHF=X...
Fetching USDHKD=X...
Fetching USDSGD=X...
Fetching USDKRW=X...
Fetching USDNOK=X...
Fetching USDNZD=X...
Fetching USDSEK=X...
Fetching USDMXN=X...
Fetching USDINR=X...
Fetching USDRUB=X...
Fetching USDZAR=X...
Fetching USDBRL=X...
Fetching USDTRY=X...
Fetching EURUSD=X...
Fetching EURJPY=X...
Fetching EURGBP=X...
Fetching EURCNY=X...
Fetching EURAUD=X...
Fetching EURCAD=X...
Fetching EURCHF=X...
Fetching EURHKD=X...
Fetching EURSGD=X...
Fetching EURKRW=X...
Fetching EURNOK=X...
Fetching EURNZD=X...
Fetching EURSEK=X...
Fetching EURMXN=X...
Fetching EURINR=X...
Fetching EURRUB=X...
Fetching EURZAR=X...
Fetching EURBRL=X...
Fetching EURTRY=X...
Fetching JPYUSD=X...
Fetching JPYEUR=X...
Fetching JPYGBP=X...
Fetching JPYCNY=X...
Fetching JPYAUD=X...
Fetching JPYCAD=X...
Fe

In [None]:
import networkx as nx
from pyvis.network import Network
import pandas as pd
import os
from datetime import datetime
from IPython.display import display, HTML

# Directory where forex data is saved
data_dir = "forex_data"

# Create a directed graph
G = nx.DiGraph()

# Dictionary to store the latest timestamp for each currency pair
latest_timestamps = {}

for filename in os.listdir(data_dir):
    if not filename.endswith(".csv"):
        continue
        
    filepath = os.path.join(data_dir, filename)
    
    # Extract currency pair from filename
    symbol = filename.split('_')[0]  # e.g., "USDJPY=X"
    print(f"\n--- Processing {filename} ---")
    
    # Make sure we have a valid currency pair before proceeding
    if len(symbol) >= 6:
        base, quote = symbol[:3], symbol[3:6]
    else:
        print(f"Skipping {filename}: Invalid symbol format")
        continue
        
    try:
        # Read the CSV file - first just read the raw data to inspect it
        print(f"Reading {filepath}")
        
        # Now let's read the actual data
        df = pd.read_csv(filepath)
        
        # Print the columns and first few rows for debugging
        print("\nColumns in the dataframe:")
        print(df.columns.tolist())
        
        print("\nFirst few rows:")
        print(df.head().to_string())

        # Try to identify the value column - could be 'Open', 'Price', etc.
        value_column = None
        possible_columns = ['Open', 'Price', 'Close', 'Value', 'Rate']
        
        for col in possible_columns:
            if col in df.columns:
                value_column = col
                print(f"\nUsing {value_column} as the value column")
                break
        
        # If we still don't have a value column, try to find numerical columns
        if value_column is None:
            for col in df.columns:
                try:
                    # Check if this column can be converted to numeric values
                    test_val = pd.to_numeric(df[col], errors='coerce')
                    if not test_val.isna().all():
                        value_column = col
                        print(f"\nUsing {value_column} as the value column (detected as numeric)")
                        break
                except:
                    continue
        
        # Special handling for specific format seen in your example
        if value_column is None and 'Ticker' in df.iloc[0].values:
            print("\nDetected ticker-style format, rearranging data")
            # Create a new dataframe with the first row as column headers
            new_headers = df.iloc[0]
            df = df.iloc[1:]
            df.columns = new_headers
            print("\nNew columns:")
            print(df.columns.tolist())
            
            # Try again to find the value column
            for col in possible_columns:
                if col in df.columns:
                    value_column = col
                    print(f"\nUsing {value_column} as the value column after rearrangement")
                    break
                    
        # If we still don't have a value column, try to use the second column (common pattern)
        if value_column is None and len(df.columns) > 1:
            value_column = df.columns[1]  # Use second column as a fallback
            print(f"\nFallback: Using {value_column} as the value column")
        
        # If still no value column found, skip this file
        if value_column is None:
            print(f"\nCould not find a suitable value column for {filename}, skipping.")
            continue
            
        # Now process the datetime index
        # Check if we have a column called Datetime or similar
        datetime_col = None
        for col in df.columns:
            if 'date' in col.lower() or 'time' in col.lower():
                datetime_col = col
                break
        
        if datetime_col:
            # Convert the column to datetime and set as index
            print(f"\nUsing {datetime_col} as datetime column")
            df[datetime_col] = pd.to_datetime(df[datetime_col], errors='coerce')
            df.set_index(datetime_col, inplace=True)
        else:
            # Try using the index if it looks like datetime strings
            try:
                df.index = pd.to_datetime(df.index, errors='coerce')
                print("\nConverted index to datetime")
            except:
                print("\nCould not convert index to datetime")
                # If we can't convert the index, create a dummy index
                if not isinstance(df.index, pd.DatetimeIndex):
                    print("\nCreating artificial datetime index")
                    now = datetime.now()
                    df.index = pd.date_range(start=now, periods=len(df), freq='1min')
        
        # Drop any rows where datetime parsing failed
        df = df[~df.index.isna()]
        
        # Sort by datetime to ensure we get the latest data
        df = df.sort_index()
        
        # Convert value column to numeric
        df[value_column] = pd.to_numeric(df[value_column], errors='coerce')
        
        # Get the most recent data point
        if not df.empty:
            latest_row = df.tail(1)
            latest_timestamp = latest_row.index[0]
            
            # Extract the rate at the latest time
            if value_column in latest_row.columns and not latest_row[value_column].isna().all():
                rate = latest_row[value_column].iloc[0]
                if pd.notna(rate):
                    print(f"\nAdding edge: {base} -> {quote} with rate {rate} at {latest_timestamp}")
                    G.add_edge(base, quote, weight=float(rate), title=f"{base}→{quote}: {float(rate):.4f}")
                    
                    # Store the timestamp for reference
                    latest_timestamps[f"{base}-{quote}"] = latest_timestamp
            else:
                print(f"\nNo valid rate found for {symbol} in column {value_column}")
        else:
            print(f"\nNo data found for {symbol}")
            
    except Exception as e:
        print(f"\nError processing {filename}: {e}")
        import traceback
        traceback.print_exc()

# Get the overall latest timestamp for the title
most_recent_time = "Unknown"
if latest_timestamps:
    most_recent_time = max(latest_timestamps.values()).strftime("%Y-%m-%d %H:%M:%S")

# If we have edges in our graph, proceed with visualization
if len(G.edges()) > 0:
    print(f"\nCreating graph with {len(G.nodes())} nodes and {len(G.edges())} edges")
    
    # Create Pyvis network
    net = Network(height="600px", width="100%", notebook=True, directed=True)
    
    # Configure physics for better layout
    net.barnes_hut(gravity=-80000, central_gravity=0.3, spring_length=250, spring_strength=0.001)
    
    # Configure other display options
    net.set_options("""
    const options = {
      "nodes": {
        "font": {"size": 16, "face": "Tahoma", "color": "black"},
        "color": {"border": "#222222", "background": "#6EA7FF"},
        "shape": "circle",
        "size": 30
      },
      "edges": {
        "color": {"inherit": false, "color": "#666666"},
        "arrows": {"to": {"enabled": true, "scaleFactor": 0.5}},
        "smooth": {"enabled": true, "type": "dynamic"},
        "font": {"size": 12, "face": "Arial", "color": "#555555", "align": "middle"}
      },
      "physics": {
        "barnesHut": {
          "gravitationalConstant": -80000,
          "centralGravity": 0.3,
          "springLength": 250,
          "springConstant": 0.001,
          "damping": 0.09
        },
        "minVelocity": 0.75,
        "solver": "barnesHut"
      },
      "interaction": {
        "navigationButtons": true,
        "keyboard": true,
        "hover": true
      }
    }
    """)

    # Add nodes with currency codes
    for node in G.nodes():
        net.add_node(node, label=node, title=node, font={"size": 20, "bold": True})
    
    # Add edges with rate information
    for source, target, data in G.edges(data=True):
        rate = data['weight']
        net.add_edge(source, target, value=1, 
                     title=f"{source} → {target}: {rate:.6f}", 
                     label=f"{rate:.4f}")
    
    # Save and display
    html_file = "forex_network.html"
    net.show(html_file)
    
    # Print summary of the exchange network
    print("\nCurrency Exchange Network Summary:")
    print(f"Number of currencies: {len(G.nodes())}")
    print(f"Number of direct exchange rates: {len(G.edges())}")
    
    # Create a DataFrame of all exchange rates for easy reference
    exchange_rates = []
    for u, v, data in G.edges(data=True):
        exchange_rates.append({
            'Base Currency': u,
            'Quote Currency': v,
            'Exchange Rate': data['weight'],
            'Last Updated': latest_timestamps.get(f"{u}-{v}", "Unknown")
        })
    
    rates_df = pd.DataFrame(exchange_rates)
    display(rates_df)
    
else:
    print("\nNo valid edges found for the graph. Check your data and parsing logic.")


--- Processing CNYNZD=X_2025-04-18.csv ---
Reading forex_data/CNYNZD=X_2025-04-18.csv

Columns in the dataframe:
['Price', 'Close', 'Close.1', 'High', 'High.1', 'Low', 'Low.1', 'Open', 'Open.1', 'Volume', 'Volume.1']

First few rows:
                       Price               Close            Close.1                High             High.1                 Low              Low.1                Open             Open.1    Volume  Volume.1
0                     Ticker            USDEUR=X           USDHKD=X            USDEUR=X           USDHKD=X            USDEUR=X           USDHKD=X            USDEUR=X           USDHKD=X  USDEUR=X  USDHKD=X
1                   Datetime                 NaN                NaN                 NaN                NaN                 NaN                NaN                 NaN                NaN       NaN       NaN
2  2025-04-17 23:00:00+00:00  0.8792999982833862  7.763299942016602  0.8792999982833862  7.763299942016602  0.8792999982833862  7.762899875640869  0.8

Unnamed: 0,Base Currency,Quote Currency,Exchange Rate,Last Updated
0,CNY,NZD,0.87750,1970-01-01 00:00:00.000001351
1,CNY,USD,0.87750,1970-01-01 00:00:00.000001351
2,CNY,CAD,0.87750,1970-01-01 00:00:00.000001351
3,CNY,MXN,0.87750,1970-01-01 00:00:00.000001351
4,CNY,RUB,0.87750,1970-01-01 00:00:00.000001351
...,...,...,...,...
374,CAD,JPY,10.47167,1970-01-01 00:00:00.000001321
375,CAD,ZAR,10.47167,1970-01-01 00:00:00.000001321
376,CAD,NOK,10.47167,1970-01-01 00:00:00.000001321
377,CAD,INR,10.47167,1970-01-01 00:00:00.000001321


In [8]:
import networkx as nx
from pyvis.network import Network
import pandas as pd
import os
from datetime import datetime
from IPython.display import display, HTML

# Directory where forex data is saved
data_dir = "forex_data"

# Create a directed graph
G = nx.DiGraph()

# Dictionary to store the latest timestamp for each currency pair
latest_timestamps = {}

for filename in os.listdir(data_dir):
    if not filename.endswith(".csv"):
        continue
        
    filepath = os.path.join(data_dir, filename)
    
    # Extract currency pair from filename
    symbol = filename.split('_')[0]  # e.g., "USDJPY=X"
    print(f"\n--- Processing {filename} ---")
    
    # Make sure we have a valid currency pair before proceeding
    if len(symbol) >= 6:
        base, quote = symbol[:3], symbol[3:6]
    else:
        print(f"Skipping {filename}: Invalid symbol format")
        continue
        
    try:
        print(f"Reading {filepath}")
        df = pd.read_csv(filepath)
        
        print("\nColumns in the dataframe:")
        print(df.columns.tolist())
        
        print("\nFirst few rows:")
        print(df.head().to_string())

        value_column = None
        possible_columns = ['Open', 'Price', 'Close', 'Value', 'Rate']
        
        for col in possible_columns:
            if col in df.columns:
                value_column = col
                print(f"\nUsing {value_column} as the value column")
                break
        
        if value_column is None:
            for col in df.columns:
                try:
                    test_val = pd.to_numeric(df[col], errors='coerce')
                    if not test_val.isna().all():
                        value_column = col
                        print(f"\nUsing {value_column} as the value column (detected as numeric)")
                        break
                except:
                    continue
        
        if value_column is None and 'Ticker' in df.iloc[0].values:
            print("\nDetected ticker-style format, rearranging data")
            new_headers = df.iloc[0]
            df = df.iloc[1:]
            df.columns = new_headers
            print("\nNew columns:")
            print(df.columns.tolist())
            
            for col in possible_columns:
                if col in df.columns:
                    value_column = col
                    print(f"\nUsing {value_column} as the value column after rearrangement")
                    break
                    
        if value_column is None and len(df.columns) > 1:
            value_column = df.columns[1]
            print(f"\nFallback: Using {value_column} as the value column")
        
        if value_column is None:
            print(f"\nCould not find a suitable value column for {filename}, skipping.")
            continue
            
        datetime_col = None
        for col in df.columns:
            if 'date' in col.lower() or 'time' in col.lower():
                datetime_col = col
                break
        
        if datetime_col:
            print(f"\nUsing {datetime_col} as datetime column")
            df[datetime_col] = pd.to_datetime(df[datetime_col], errors='coerce')
            df.set_index(datetime_col, inplace=True)
        else:
            try:
                df.index = pd.to_datetime(df.index, errors='coerce')
                print("\nConverted index to datetime")
            except:
                print("\nCould not convert index to datetime")
                if not isinstance(df.index, pd.DatetimeIndex):
                    print("\nCreating artificial datetime index")
                    now = datetime.now()
                    df.index = pd.date_range(start=now, periods=len(df), freq='1min')
        
        df = df[~df.index.isna()]
        df = df.sort_index()
        df[value_column] = pd.to_numeric(df[value_column], errors='coerce')
        
        if not df.empty:
            latest_row = df.tail(1)
            latest_timestamp = latest_row.index[0]
            
            if value_column in latest_row.columns and not latest_row[value_column].isna().all():
                rate = latest_row[value_column].iloc[0]
                if pd.notna(rate):
                    print(f"\nAdding edge: {base} -> {quote} with rate {rate} at {latest_timestamp}")
                    G.add_edge(base, quote, weight=float(rate), title=f"{base}→{quote}: {float(rate):.4f}")
                    latest_timestamps[f"{base}-{quote}"] = latest_timestamp
            else:
                print(f"\nNo valid rate found for {symbol} in column {value_column}")
        else:
            print(f"\nNo data found for {symbol}")
            
    except Exception as e:
        print(f"\nError processing {filename}: {e}")
        import traceback
        traceback.print_exc()

most_recent_time = "Unknown"
if latest_timestamps:
    most_recent_time = max(latest_timestamps.values()).strftime("%Y-%m-%d %H:%M:%S")

if len(G.edges()) > 0:
    print(f"\nCreating graph with {len(G.nodes())} nodes and {len(G.edges())} edges")
    
    # Full-screen Pyvis network
    net = Network(height="100vh", width="100%", notebook=True, directed=True)
    
    net.barnes_hut(gravity=-80000, central_gravity=0.3, spring_length=250, spring_strength=0.001)
    
    net.set_options("""
    const options = {
      "nodes": {
        "font": {"size": 16, "face": "Tahoma", "color": "black"},
        "color": {"border": "#222222", "background": "#6EA7FF"},
        "shape": "circle",
        "size": 30
      },
      "edges": {
        "color": {"inherit": false, "color": "#666666"},
        "arrows": {"to": {"enabled": true, "scaleFactor": 0.5}},
        "smooth": {"enabled": true, "type": "dynamic"},
        "font": {"size": 12, "face": "Arial", "color": "#555555", "align": "middle"}
      },
      "physics": {
        "barnesHut": {
          "gravitationalConstant": -80000,
          "centralGravity": 0.3,
          "springLength": 250,
          "springConstant": 0.001,
          "damping": 0.09
        },
        "minVelocity": 0.75,
        "solver": "barnesHut"
      },
      "interaction": {
        "navigationButtons": true,
        "keyboard": true,
        "hover": true
      }
    }
    """)

    for node in G.nodes():
        net.add_node(node, label=node, title=node, font={"size": 20, "bold": True})
    
    for source, target, data in G.edges(data=True):
        rate = data['weight']
        net.add_edge(source, target, value=1, 
                     title=f"{source} → {target}: {rate:.6f}", 
                     label=f"{rate:.4f}")
    
    html_file = "forex_network.html"
    net.show(html_file)
    
    print("\nCurrency Exchange Network Summary:")
    print(f"Number of currencies: {len(G.nodes())}")
    print(f"Number of direct exchange rates: {len(G.edges())}")
    
    exchange_rates = []
    for u, v, data in G.edges(data=True):
        exchange_rates.append({
            'Base Currency': u,
            'Quote Currency': v,
            'Exchange Rate': data['weight'],
            'Last Updated': latest_timestamps.get(f"{u}-{v}", "Unknown")
        })
    
    rates_df = pd.DataFrame(exchange_rates)
    display(rates_df)
    
else:
    print("\nNo valid edges found for the graph. Check your data and parsing logic.")



--- Processing CNYNZD=X_2025-04-18.csv ---
Reading forex_data/CNYNZD=X_2025-04-18.csv

Columns in the dataframe:
['Price', 'Close', 'Close.1', 'High', 'High.1', 'Low', 'Low.1', 'Open', 'Open.1', 'Volume', 'Volume.1']

First few rows:
                       Price               Close            Close.1                High             High.1                 Low              Low.1                Open             Open.1    Volume  Volume.1
0                     Ticker            USDEUR=X           USDHKD=X            USDEUR=X           USDHKD=X            USDEUR=X           USDHKD=X            USDEUR=X           USDHKD=X  USDEUR=X  USDHKD=X
1                   Datetime                 NaN                NaN                 NaN                NaN                 NaN                NaN                 NaN                NaN       NaN       NaN
2  2025-04-17 23:00:00+00:00  0.8792999982833862  7.763299942016602  0.8792999982833862  7.763299942016602  0.8792999982833862  7.762899875640869  0.8

Unnamed: 0,Base Currency,Quote Currency,Exchange Rate,Last Updated
0,CNY,NZD,0.87750,1970-01-01 00:00:00.000001351
1,CNY,USD,0.87750,1970-01-01 00:00:00.000001351
2,CNY,CAD,0.87750,1970-01-01 00:00:00.000001351
3,CNY,MXN,0.87750,1970-01-01 00:00:00.000001351
4,CNY,RUB,0.87750,1970-01-01 00:00:00.000001351
...,...,...,...,...
374,CAD,JPY,10.47167,1970-01-01 00:00:00.000001321
375,CAD,ZAR,10.47167,1970-01-01 00:00:00.000001321
376,CAD,NOK,10.47167,1970-01-01 00:00:00.000001321
377,CAD,INR,10.47167,1970-01-01 00:00:00.000001321
