In [None]:
import os
import pandas as pd


def create_folders_from_tsv(tsv_file):
  """
  Separates tsv into preprocessed folder structure. All classes are separated into separate txt files for usage in later dataset fns.
  """

    # Read TSV file into a pandas DataFrame
    df = pd.read_csv(tsv_file, sep='\t')

    # Replace all unique propaganda labels with one general propaganda label
    df['label'] = df['label'].mask(df['label'] != 'not_propaganda', 'propaganda')


    # Create folder to store subfolders
    output_folder = os.path.join(os.path.splitext(tsv_file)[0])  # Get output folder name
    os.makedirs(output_folder, exist_ok=True)


    # Iterate over unique labels
    for label in df['label'].unique():
        # create subfolder for each label
        label_folder = os.path.join(output_folder, str(label))
        os.makedirs(label_folder, exist_ok=True)

        # get rows with current label
        label_rows = df[df['label'] == label]

        #iterate over rows and write contents to text files
        for index, row in label_rows.iterrows():
            #create a text file for each row
            file_name = f"{index}.txt"
            file_path = os.path.join(label_folder, file_name)

            #write the contents of the 'tagged_in_context' column to the text file
            with open(file_path, 'w') as file:
                file.write(row['tagged_in_context'])

    print(f"Folder structure created successfully in '{output_folder}'")

In [None]:
create_folders_from_tsv("/content/drive/MyDrive/ANLE_Coursework/Data/Task-1/propaganda_train.tsv")
create_folders_from_tsv("/content/drive/MyDrive/ANLE_Coursework/Data/Task-1/propaganda_val.tsv")
