In [None]:
import pandas as pd
from statistics import mean
import csv

# Load the data
df = pd.read_csv('/content/MSA_22_Texts.csv')

# Initialize a list to hold the processed data
processed_data = []

# A dictionary to collect the recordings for subparagraphs
subparagraphs_data = {}

# Iterate over the DataFrame
for index, row in df.iterrows():
    eye_metric = row['Eye_Metric']
    recording = row['All_Recordings']

    # Check for subparagraph indicator
    if '_SUBp' in eye_metric:

        # Extract the base paragraph name
        base_paragraph = eye_metric.rsplit('_', 1)[0]

        # If this is the first time we're seeing this base paragraph, initialize it in the dictionary
        if base_paragraph not in subparagraphs_data:
            subparagraphs_data[base_paragraph] = []

        # Append the recording to the list for this base paragraph
        subparagraphs_data[base_paragraph].append(recording)

    else:
        # It's a main paragraph, just add it directly to the processed data
        processed_data.append({'Eye_Metric': eye_metric, 'All_Recordings': recording})

# Process the subparagraphs and add their averages to the processed data
for base_paragraph, recordings in subparagraphs_data.items():
    average_recording = mean(recordings)
    processed_data.append({'Eye_Metric': base_paragraph, 'All_Recordings': average_recording})

# Create a new DataFrame with the processed data
result_df = pd.DataFrame(processed_data)

# Sort the DataFrame by Eye_Metric to maintain order
result_df.sort_values(by='Eye_Metric', inplace=True)

# Write the results to a new file
result_df.to_csv('processed_file.csv', index=False)

print("Processing complete. The processed data is saved in 'processed_file.csv'.")


Processing complete. The processed data is saved in 'processed_file.csv'.


In [None]:
import pandas as pd

# Load the data
df = pd.read_csv('/content/processed_file.csv')

# Define the desired order of topics
topic_order = ['GraAndMor', 'LitAndElo', 'History', 'Biography', 'GeoAndTra', 'HelAndNut', 'Philosophy', 'Politics', 'Sociology', 'Technology', 'Psychology','Business', 'Art']

def get_topic_order(entry):
    # Create a list with topic orders, initialized with a high number for sorting non-matching entries at the end
    topic_indices = [len(topic_order) + 1]

    # Find the lowest order for topics that appear in the entry
    for topic in topic_order:
        if topic.lower() in entry.lower():
            topic_index = topic_order.index(topic)
            topic_indices.append(topic_index)

    # Return the minimum order found, or len(topic_order) + 1 if no topic was found
    return min(topic_indices)

# Extract the topic from each Eye_Metric entry and assign it an order
df['topic_order'] = df['Eye_Metric'].apply(get_topic_order)

# Sort the DataFrame by the topic_order and then by Eye_Metric
df.sort_values(by=['topic_order', 'Eye_Metric'], inplace=True)

# Drop the topic_order column as it's no longer needed
df.drop(columns=['topic_order'], inplace=True)

# Save the sorted DataFrame
df.to_csv('Transfer.csv', index=False)
