## Merge the results from the topic and sentiment analysis

In [None]:
# Written by Magnus Olander

import pandas as pd
import json
import os

# Get parlamentary data from data directory
current_directory = os.getcwd()
parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
data_directory = os.path.join(parent_directory, "data")

sentiment_file_name = "sentiment_analysis.csv"
topic_file_name = "topic_analysis.json"

# Note: Currently, sentiment data is stored in a .csv, and topics in a json file
path_sentiment_data = os.path.join(data_directory, sentiment_file_name)
path_topic_data = os.path.join(data_directory, topic_file_name)

# Get sentiment data
data_sentiment = pd.read_csv(path_sentiment_data)
data_sentiment = json.loads(data_sentiment.to_json(orient='records'))

# Get topic data
with open(path_topic_data, "r") as file:
    data_topic = json.load(file)

#### Combine the two datasets and handle missmatched entries
Which means: If an entry is only present in either datafiles, they are removed

In [None]:
print("Initial length of data_topic:", len(data_topic))
print("Initial length of data_sentiment:", len(data_sentiment))

index = 0
data_final = []

while index < min(len(data_topic), len(data_sentiment)):
    id_sentiment = data_sentiment[index]["id_"]
    id_topic = data_topic[index]["id_"]
    
    if id_sentiment != id_topic:
        # Remove the mismatched entry from the longer list
        if len(data_topic) > len(data_sentiment):
            data_topic.pop(index)
        else:
            data_sentiment.pop(index)
    else:
        data_sentiment[index]['topic_combined'] = data_topic[index]['top_10_words_combined']
        data_sentiment[index]['topic_question'] = data_topic[index]['top_10_words_question']
        data_sentiment[index]['topic_answer'] = data_topic[index]['top_10_words_answer']
        index += 1

# If one list is longer than the other, handle the remaining elements
if len(data_topic) > len(data_sentiment):
    data_topic = data_topic[len(data_sentiment):]
elif len(data_sentiment) > len(data_topic):
    data_sentiment = data_sentiment[len(data_topic):]

# Final data file
data = data_sentiment
print("Final length of data file:", len(data))

#### Save the merged file

In [None]:
# Output name for the JSON file
json_file_name = "sentiment_topic_data.json"

# Full path to save the JSON file
json_file_path = os.path.join(data_directory, json_file_name)

# Save the JSON data to the file
with open(json_file_path, 'w') as json_file:
    json.dump(data, json_file, indent=2)