In [15]:
import pandas as pd
import numpy as np
import glob
import networkx as nx
import os

def load_forex_data(file_pattern="forex_intraday_*.csv"):
    """
    Loads and concatenates all CSVs matching the given pattern.
    """
    all_files = glob.glob(file_pattern)
    df_list = []

    for file in sorted(all_files):
        print(f"Loading: {file}")
        df = pd.read_csv(file, parse_dates=["timestamp"])
        df_list.append(df)

    full_df = pd.concat(df_list, ignore_index=True)
    return full_df


def pivot_currency_data(df):
    """
    Pivots the forex dataframe to a timestamp-indexed wide format of currency pairs.
    """
    df = df[["timestamp", "pair", "close"]]
    df_pivot = df.pivot(index="timestamp", columns="pair", values="close")
    return df_pivot


def convert_to_negative_log(df_rates):
    """
    Converts exchange rates to -log(rate) for use in arbitrage detection.
    """
    return -np.log(df_rates)


def build_graphs(df_log_rates):
    """
    Builds a directed graph per timestamp using -log exchange rates as edge weights.
    """
    graphs_by_time = {}

    for timestamp, row in df_log_rates.iterrows():
        G = nx.DiGraph()
        for pair, weight in row.items():
            if pd.notna(weight):
                from_currency = pair[:3]
                to_currency = pair[3:]
                G.add_edge(from_currency, to_currency, weight=weight)
        graphs_by_time[timestamp] = G

    return graphs_by_time

print("Loading forex data...")
df_all = load_forex_data("forex_intraday_*.csv")

print("Pivoting data to currency pair matrix...")
df_pivot = pivot_currency_data(df_all)

print("Converting to -log(rates)...")
df_log = convert_to_negative_log(df_pivot)

print("Building graph for each timestamp...")
graphs_by_time = build_graphs(df_log)

print(f"Built {len(graphs_by_time)} graphs, one per timestamp.")


Loading forex data...
Loading: forex_intraday_2025-01-01.csv
Pivoting data to currency pair matrix...
Converting to -log(rates)...
Building graph for each timestamp...
Built 179 graphs, one per timestamp.


In [16]:
import numpy as np
import pandas as pd

def normalize_cycle(cycle):
    cycle = cycle[:-1]  # Remove duplicate closing node
    min_idx = min(range(len(cycle)), key=lambda i: cycle[i])
    normalized = cycle[min_idx:] + cycle[:min_idx]
    return tuple(normalized)

def find_all_negative_cycles(graph):
    all_cycles = set()
    cycles_info = []

    for source in graph.nodes:
        nodes = list(graph.nodes())
        distance = {node: float('inf') for node in nodes}
        predecessor = {node: None for node in nodes}
        distance[source] = 0

        for _ in range(len(nodes) - 1):
            for u, v, data in graph.edges(data=True):
                weight = data["weight"]
                if distance[u] + weight < distance[v]:
                    distance[v] = distance[u] + weight
                    predecessor[v] = u

        for u, v, data in graph.edges(data=True):
            weight = data["weight"]
            if distance[u] + weight < distance[v]:
                curr = v
                for _ in range(len(nodes)):
                    curr = predecessor.get(curr)
                    if curr is None:
                        break
                if curr is None:
                    continue

                cycle = []
                visited = set()
                start = curr
                while True:
                    if curr is None or curr in visited:
                        break
                    visited.add(curr)
                    cycle.append(curr)
                    curr = predecessor.get(curr)
                    if curr == start and len(cycle) > 1:
                        cycle.append(curr)
                        break

                if len(cycle) > 1 and curr == start:
                    cycle = cycle[::-1]
                    norm = normalize_cycle(cycle)
                    if norm not in all_cycles:
                        all_cycles.add(norm)
                        profit_log = sum(graph[u][v]["weight"] for u, v in zip(cycle, cycle[1:]))
                        profit = np.exp(-profit_log)
                        cycles_info.append({
                            "cycle": " → ".join(cycle),
                            "log_sum": profit_log,
                            "profit_multiplier": profit
                        })

    return cycles_info

# === Apply to your graphs ===

all_results = []

for timestamp, G in graphs_by_time.items():
    cycle_info_list = find_all_negative_cycles(G)
    for cycle_info in cycle_info_list:
        cycle_info["timestamp"] = timestamp
        all_results.append(cycle_info)

df_arbs = pd.DataFrame(all_results)

df_arbs.to_csv("all_arbitrage_cycles.csv", index=False)

print(f"Found {len(df_arbs)} arbitrage opportunities.")
df_arbs.sort_values(by="profit_multiplier", ascending=False, inplace=True)
df_arbs

Found 93 arbitrage opportunities.


Unnamed: 0,cycle,log_sum,profit_multiplier,timestamp
40,JPY → HKD → USD → CAD → SGD → CHF → JPY,-3.679010e-03,1.003686,2025-01-01 21:53:00
41,SGD → CHF → JPY → USD → CAD → SGD,-3.666471e-03,1.003673,2025-01-01 21:53:00
45,EUR → CHF → SGD → GBP → JPY → EUR,-2.918862e-03,1.002923,2025-01-01 21:57:00
39,USD → SGD → CHF → JPY → USD,-1.716102e-03,1.001718,2025-01-01 21:53:00
53,CAD → SGD → USD → CAD,-1.660782e-03,1.001662,2025-01-01 22:06:00
...,...,...,...,...
67,HKD → USD → HKD,-1.287076e-05,1.000013,2025-01-01 22:27:00
19,EUR → GBP → USD → EUR,-8.321665e-06,1.000008,2025-01-01 21:30:00
80,USD → JPY → USD,-6.357259e-06,1.000006,2025-01-01 23:22:00
87,JPY → HKD → USD → JPY,-3.001562e-06,1.000003,2025-01-01 23:49:00
