In [2]:
import pandas as pd


def split_tsv_file(file_path, split_ratio=(0.33, 0.33, 0.34), output_prefix="split_"):
    """
    Reads a TSV file, splits it into three parts, and saves each part to a new file.

    Args:
      file_path: Path to the TSV file.
      split_ratio: Tuple of three floats representing the ratio for each part.
                    Defaults to (0.7, 0.15, 0.15) for train, validation, and test.
      output_prefix: Prefix for the output file names. Defaults to 'split_'.

    Returns:
      None
    """

    df = pd.read_csv(file_path, sep="\t")

    n_rows = len(df)
    split_points = [int(n_rows * sum(split_ratio[:i])) for i in range(1, len(split_ratio) + 1)]

    df_part1 = df.iloc[: split_points[0]]
    df_part2 = df.iloc[split_points[0] : split_points[1]]
    df_part3 = df.iloc[split_points[1] :]

    df_part1.to_csv(f"{output_prefix}part1.tsv", sep="\t", index=False)
    df_part2.to_csv(f"{output_prefix}part2.tsv", sep="\t", index=False)
    df_part3.to_csv(f"{output_prefix}part3.tsv", sep="\t", index=False)


# Example usage:
file_path = "original_validation.tsv"  # Replace with your actual file path
split_tsv_file(file_path, output_prefix="original_validation_")  # Will save files as 'split_part1.tsv', etc.