In [2]:
import json
import os

# Ensure the file is read with the correct encoding
encoding = 'utf-8'

# Load data from data/raw.json
file_path = '../data/raw.json'
if os.path.exists(file_path):
    try:
        with open(file_path, 'r', encoding=encoding) as file:
            data = json.load(file)
    except UnicodeDecodeError as e:
        print(f"Encoding error: {e}")
        data = None
else:
    print(f"File not found: {file_path}")
    data = None

# Display the loaded data
print(len(data))

4


In [6]:
# Extract the required fields from data['accounts']
accounts_list = []
for account_id, account_data in data['accounts'].items():
    account_info = {
        "name": account_data.get("name"),
        "profile_url": account_data.get("profile_url"),
        "stats": {
            "likes": account_data.get("stats", {}).get("likes"),
            "followers": account_data.get("stats", {}).get("followers"),
        },
        "post_count": len(account_data.get("posts", []))
    }
    accounts_list.append(account_info)

# Resulting data structure
# "date": "27 aprilie 2025", this is how date looks like in data.date
# format date as "YYYY-MM-DD"
# Extract the date from the data
extracted_date = data.get('date', None)

if extracted_date != None:
    # Assuming the date is in the format "27 aprilie 2025"
    month_mapping = {
        "ianuarie": "01",
        "februarie": "02",
        "martie": "03",
        "aprilie": "04",
        "mai": "05",
        "iunie": "06",
        "iulie": "07",
        "august": "08",
        "septembrie": "09",
        "octombrie": "10",
        "noiembrie": "11",
        "decembrie": "12"
    }
    day, month_str, year = extracted_date.split()
    month = month_mapping.get(month_str.lower(), '00')
    extracted_date = f"{year}-{month}-{day.zfill(2)}"
else:
    extracted_date = None

result = {
    "date": extracted_date,
    "accounts": accounts_list
}

# save the extracted data to a new JSON file
output_file_path = '../output/accounts.json'
with open(output_file_path, 'w', encoding=encoding) as output_file:
    json.dump(result, output_file, indent=4, ensure_ascii=False)