In [None]:
import pandas as pd

# ----------------------------
# User-defined Configuration
# ----------------------------
# Name of your CSV datafile you want to split into a training and testing set
input_file = "ANN_TotalData"    #Do not include ".csv"

# Name of the training file after the split
train_file = "train"    #Do not include ".csv"

# Name of the testing file after the split
test_file = "test"    #Do not include ".csv"

# Define training data ratio. 
# 0.7 means 70% of the data will be training data (30% left for testing)
split_ratio = 0.7          

# (Optional) random seed for reproducibility
# For truly random shuffling input: None
seed = 42


# ----------------------------
# Dataset splitter Function
# ----------------------------


def split_csv(file_path, ratio, output1, output2, random_state=None):
    """
    Splits a CSV file into two separate CSV files based on the given ratio.
    
    Parameters:
    - file_path: Path to the input CSV file.
    - ratio: A float between 0 and 1 indicating the fraction of rows for the first dataset.
    - output1: Path for the first output CSV file.
    - output2: Path for the second output CSV file.
    - random_state: Optional; an integer seed for reproducible shuffling.
    """
    # Read the CSV file into a DataFrame.
    df = pd.read_csv(f"{file_path}.csv")
    
    # Shuffle the DataFrame to randomize the rows.
    df_shuffled = df.sample(frac=1, random_state=random_state).reset_index(drop=True)
    
    # Calculate the split index based on the provided ratio.
    split_index = int(len(df_shuffled) * ratio)
    
    # Split the DataFrame into two parts.
    df1 = df_shuffled.iloc[:split_index]
    df2 = df_shuffled.iloc[split_index:]
    
    # Save the two datasets into separate CSV files.
    df1.to_csv(f"{output1}.csv", index=False)
    df2.to_csv(f"{output2}.csv", index=False)
    print(f"Data successfully split: {len(df1)} rows in '{output1}.csv' and {len(df2)} rows in '{output2}.csv'.")


#Running Splitting Function

split_csv(input_file, split_ratio, train_file, test_file, random_state=seed)
