In [1]:
import pandas as pd
import numpy as np
import glob
import networkx as nx
import os
import matplotlib.pyplot as plt

In [2]:
def load_forex_files_by_day(folder_pattern="./Data/forex_intraday_*.csv"):
    files = sorted(glob.glob(folder_pattern))
    day_data = {}

    for file in files:
        df = pd.read_csv(file, parse_dates=["timestamp"])
        if df.empty:
            continue
        date = pd.to_datetime(df["timestamp"].iloc[0]).date()
        day_data[date] = df

    return day_data

all_days_data = load_forex_files_by_day()

In [3]:
def build_graphs(df):
    df = df[["timestamp", "pair", "close"]]
    df_pivot = df.pivot(index="timestamp", columns="pair", values="close")
    df_log = -np.log(df_pivot)

    graphs = {}
    for timestamp, row in df_log.iterrows():
        G = nx.DiGraph()
        for pair, weight in row.items():
            if pd.notna(weight):
                from_currency = pair[:3]
                to_currency = pair[3:]
                G.add_edge(from_currency, to_currency, weight=weight)
        graphs[timestamp] = G
    return graphs

In [5]:
def normalize_cycle(cycle):
    cycle = cycle[:-1]
    min_idx = min(range(len(cycle)), key=lambda i: cycle[i])
    normalized = cycle[min_idx:] + cycle[:min_idx]
    return tuple(normalized)

def find_all_negative_cycles(graph):
    all_cycles = set()
    cycles_info = []

    for source in graph.nodes:
        nodes = list(graph.nodes())
        distance = {node: float('inf') for node in nodes}
        predecessor = {node: None for node in nodes}
        distance[source] = 0
        # Bellman-Ford algorithm to find shortest paths and detect negative cycles
        for _ in range(len(nodes) - 1):
            for u, v, data in graph.edges(data=True):
                weight = data["weight"]
                if distance[u] + weight < distance[v]:
                    distance[v] = distance[u] + weight
                    predecessor[v] = u
        for u, v, data in graph.edges(data=True):
            weight = data["weight"]
            if distance[u] + weight < distance[v]:
                curr = v
                for _ in range(len(nodes)):
                    curr = predecessor.get(curr)
                    if curr is None:
                        break
                if curr is None:
                    continue

                cycle = []
                visited = set()
                start = curr
                while True:
                    if curr is None or curr in visited:
                        break
                    visited.add(curr)
                    cycle.append(curr)
                    curr = predecessor.get(curr)
                    if curr == start and len(cycle) > 1:
                        cycle.append(curr)
                        break

                if len(cycle) > 1 and curr == start:
                    cycle = cycle[::-1]
                    norm = normalize_cycle(cycle)
                    if norm not in all_cycles:
                        all_cycles.add(norm)
                        profit_log = sum(graph[u][v]["weight"] for u, v in zip(cycle, cycle[1:]))
                        profit = np.exp(-profit_log)
                        cycles_info.append({
                            "cycle": " â†’ ".join(cycle),
                            "log_sum": profit_log,
                            "profit_multiplier": profit
                        })
    return cycles_info

In [6]:
output_folder = "Arbitrage_Cycles"
os.makedirs(output_folder, exist_ok=True)
for date, df in all_days_data.items():
    print(f"Processing {date}...")
    graphs_by_time = build_graphs(df)
    day_results = []
    for timestamp, G in graphs_by_time.items():
        cycles = find_all_negative_cycles(G)
        for cycle in cycles:
            cycle["timestamp"] = timestamp
            cycle["date"] = date
            day_results.append(cycle)
    df_day = pd.DataFrame(day_results)
    out_file = os.path.join(output_folder, f"arbitrage_{date}.csv")
    df_day.to_csv(out_file, index=False)
    print(f"{len(df_day)} arbitrage cycles saved to {out_file}")

Processing 2025-01-01...
93 arbitrage cycles saved to Arbitrage_Cycles\arbitrage_2025-01-01.csv
Processing 2025-01-02...
1064 arbitrage cycles saved to Arbitrage_Cycles\arbitrage_2025-01-02.csv
Processing 2025-01-03...
366 arbitrage cycles saved to Arbitrage_Cycles\arbitrage_2025-01-03.csv
Processing 2025-01-05...
9 arbitrage cycles saved to Arbitrage_Cycles\arbitrage_2025-01-05.csv
