In [None]:
import pandas as pd
import random
from tqdm.notebook import tqdm

In [None]:
df = pd.read_csv("Horse-300-27.csv",sep=";",header=None).iloc[:,:28]

In [None]:
def unique_counts(x: pd.Series):
    return len(set(x))

def isOdValid(df: pd.DataFrame,lhs: tuple[str], lhsDirections: tuple[bool], rhs: tuple[str], rhsDirections: tuple[bool]):
    # no swaps
    sorted_by_lhs = df.sort_values(lhs, ascending=lhsDirections)
    sorted_by_rhs = sorted_by_lhs.sort_values(rhs, ascending=rhsDirections,kind="stable")
    
    # no splits
    df_fd_check = df.groupby(lhs).agg({col: unique_counts for col in rhs})
    return (sorted_by_lhs.index == sorted_by_rhs.index).all() and (df_fd_check == 1).all(axis=None)

In [None]:
def number_to_excel_column(n):
    result = ""
    while n > 0:
        n, remainder = divmod(n - 1, 26)
        result = chr(65 + remainder) + result  # 65 is the ASCII code for 'A'
    return result

In [None]:
def colsToString(cols: tuple[str], directions: tuple[bool]):
    return f",".join([number_to_excel_column(col + 1) + ("↑" if direction else "↓") for col, direction in zip(cols, directions)])

def odToString(lhs: tuple[str], lhsDirections: tuple[bool], rhs: tuple[str], rhsDirections: tuple[bool]):
    result = "["
    result += colsToString(lhs, lhsDirections)
    result += "] -> ["
    result += colsToString(rhs, rhsDirections)
    result += "]"
    return result    

In [None]:
for lhsSize in tqdm([1,2,3,5,10]):
    rhsSize = lhsSize
    valids = []
    invalids = []

    for i in tqdm(range(2000),leave=False):
        lhs = random.sample(sorted(df.columns), lhsSize)
        rhs = random.sample(sorted(df.columns), rhsSize)
        lhsDirection = [random.choice([True, False]) for _ in range(lhsSize)]
        rhsDirection = [random.choice([True, False]) for _ in range(rhsSize)]
        isValid = isOdValid(df,lhs, lhsDirection, rhs, rhsDirection)
        odString = odToString(lhs, lhsDirection, rhs, rhsDirection)

        if isValid:
            valids.append(odString)
        elif len(invalids) < 1000:
            invalids.append(odString)

    with open(f"valid_{lhsSize}_{rhsSize}.txt","w+") as f:
        f.write("\n".join(valids))

    with open(f"invalid_{lhsSize}_{rhsSize}.txt","w+") as f:
        f.write("\n".join(invalids))
