reforming the data into something potentially easier to leaf through: 
separating context from the csv/xlsx into their own entries so they can be separately opened which will hopefully make it easier to read and rate by hand
(is this actually any useful?)

In [1]:
import pandas as pd
import xlsxwriter
import os
import openpyxl

def split_contexts(input_file, output_excel_file, contexts_directory, subfolder_name):
    """
    Split contexts from an Excel file into separate text files and save a reduced version of the data.
   
    Parameters:
    - input_file (str): Path to the input Excel file with contexts.
    - output_excel_file (str): Path to the output Excel file without the context column.
    - contexts_directory (str): Directory where the context text files will be saved.
    - subfolder_name (str): Name of the subfolder within the contexts_directory where the context files will be saved.
    """
    df = pd.read_excel(input_file)
   
    df['ID'] = range(1, len(df) + 1)
   
    df_no_context = df[['ID', 'Date', 'Filename', 'Keyword', 'Author', 'Title']]
    df_no_context.to_excel(output_excel_file, index=False)
   
    full_contexts_directory = os.path.join(contexts_directory, subfolder_name)
    os.makedirs(full_contexts_directory, exist_ok=True)
   
    context_files_count = 0
   
    for _, row in df.iterrows():
        context_id = row['ID']
        context_text = row['Context']
        text_filename = os.path.join(full_contexts_directory, f'{context_id}.txt')
       
        with open(text_filename, 'w', encoding='utf-8') as file:
            file.write(context_text)
        
        context_files_count += 1
   
    # If no context files were created, add a file indicating no keywords were found
    if context_files_count == 0:
        no_keyword_file = os.path.join(full_contexts_directory, "no_keywords_found.txt")
        with open(no_keyword_file, 'w', encoding='utf-8') as file:
            file.write("No keywords were found :(")
   
    print("Processing complete.")

In [2]:
contexts_directory = '../data_to_view/separated_contexts'

#split contexts rsta
split_contexts(
    input_file = '../data_to_view/contexts_all_together/excel_ver/contexts_RSTA.xlsx',
    output_excel_file = '../data_to_view/separated_contexts/RSTA.xlsx',
    contexts_directory = contexts_directory,
    subfolder_name = 'royal_society_RSTA'
)

#split contexts rstb
split_contexts(
    input_file = '../data_to_view/contexts_all_together/excel_ver/contexts_RSTB.xlsx',
    output_excel_file = '../data_to_view/separated_contexts/RSTB.xlsx',
    contexts_directory = contexts_directory,
    subfolder_name = 'royal_society_RSTB'
)

#split contexts rstl
split_contexts(
    input_file = '../data_to_view/contexts_all_together/excel_ver/contexts_RSTL.xlsx',
    output_excel_file = '../data_to_view/separated_contexts/RSTL.xlsx',
    contexts_directory = contexts_directory,
    subfolder_name = 'royal_society_RSTL'
)

#spectator
split_contexts(
    input_file = '../data_to_view/contexts_all_together/excel_ver/contexts_spectator.xlsx',
    output_excel_file = '../data_to_view/separated_contexts/spectator.xlsx',
    contexts_directory = contexts_directory,
    subfolder_name = 'spectator'
)

#general magazine (albeit empty!!)
split_contexts(
    input_file = '../data_to_view/contexts_all_together/excel_ver/contexts_general_magazine.xlsx',
    output_excel_file = '../data_to_view/separated_contexts/general_magazine.xlsx',
    contexts_directory = contexts_directory,
    subfolder_name = 'general_magazine'
)

Processing complete.
Processing complete.
Processing complete.
Processing complete.
Processing complete.
