In [5]:
import json
import pandas as pd

# Function to load the data from the JSON lines file
def load_json_data(file_path):
    data = []
    # Open the file and read each line as a JSON object
    with open(file_path, 'r') as file:
        for line in file:
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError:
                continue  # In case a line is not properly formatted, skip it
    return data

# Paths to the JSONL files
file_path_anti = '../Data/raw/antiracist.jsonl'
file_path_neutral = '../Data/raw/neutral.jsonl'
file_path_racist = '../Data/raw/racist.jsonl'

# Paths to the processed data
processed_anti_path = '../Data/processed/'
processed_racist_path = '../Data/processed/'
processed_neutral_path = '../Data/processed/'

# Load the datasets
anti_data = load_json_data(file_path_anti)
neutral_data = load_json_data(file_path_neutral)
racist_data = load_json_data(file_path_racist)

# Convert the data to Pandas DataFrames
anti_df = pd.DataFrame(anti_data)
neutral_df = pd.DataFrame(neutral_data)
racist_df = pd.DataFrame(racist_data)

anti_df = anti_df.drop(anti_df.columns[3:], axis=1)
neutral_df = neutral_df.drop(neutral_df.columns[3:], axis=1)
racist_df = racist_df.drop(racist_df.columns[3:], axis=1)

# Save the DataFrames as CSV files
anti_df.to_csv(processed_anti_path + 'antiracist_data.csv', index=False)
neutral_df.to_csv(processed_neutral_path + 'neutral_data.csv', index=False)
racist_df.to_csv(processed_racist_path + 'racist_data.csv', index=False)

print("CSV files created: antiracist_data.csv, neutral_data.csv, and racist_data.csv")


CSV files created: antiracist_data.csv, neutral_data.csv, and racist_data.csv
