In [3]:
import pandas as pd
import json

# Step 1: Load the JSON Data
with open('sessions.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Step 2: Extract Relevant Information
callouts_data = []

for session in data:
    for section in session["sections"]:
        for callout in section["callouts"]:
            if "caller" in callout:  # Ensure there's a caller identified
                callouts_data.append({
                    "date": session["date"],
                    "period": session["period"],
                    "session": session["sessionNumber"],
                    "speaker_id": section["speaker"],
                    "caller_id": callout.get("caller")
                })

# Step 3: Create the DataFrame
df_callouts = pd.DataFrame(callouts_data)

# Display the first few rows of the DataFrame to verify
print(df_callouts.head())

                  date period  session speaker_id caller_id
0  2023-09-20T00:00:00  XXVII      230      14795     52687
1  2023-09-20T00:00:00  XXVII      230      14795     35514
2  2023-09-20T00:00:00  XXVII      230      14795     35514
3  2023-09-20T00:00:00  XXVII      230       6486     35520
4  2023-09-20T00:00:00  XXVII      230       6486     35520


In [4]:
# Load the persons data (assuming it's in a file named 'persons.json')
with open('persons.json', 'r', encoding='utf-8') as file:
    persons_data = json.load(file)

# Create the lookup tables
persons_lookup = {
    person["id"]: {
        "name": person["name"],
        "parties": person["parties"]
    } for person in persons_data
}

# Update the DataFrame with caller_name, speaker_name, caller_parties, and speaker_parties
df_callouts['caller_name'] = df_callouts['caller_id'].map(lambda x: persons_lookup.get(x, {}).get('name', 'Unknown'))
df_callouts['speaker_name'] = df_callouts['speaker_id'].map(lambda x: persons_lookup.get(x, {}).get('name', 'Unknown'))
df_callouts['caller_parties'] = df_callouts['caller_id'].map(lambda x: ', '.join(persons_lookup.get(x, {}).get('parties', [])))
df_callouts['speaker_parties'] = df_callouts['speaker_id'].map(lambda x: ', '.join(persons_lookup.get(x, {}).get('parties', [])))

# Display the updated DataFrame
print(df_callouts.head())

                  date period  session speaker_id caller_id  \
0  2023-09-20T00:00:00  XXVII      230      14795     52687   
1  2023-09-20T00:00:00  XXVII      230      14795     35514   
2  2023-09-20T00:00:00  XXVII      230      14795     35514   
3  2023-09-20T00:00:00  XXVII      230       6486     35520   
4  2023-09-20T00:00:00  XXVII      230       6486     35520   

             caller_name            speaker_name caller_parties  \
0  Alois Stöger, diplômé         August Wöginger            SPÖ   
1        Wolfgang Zanger         August Wöginger            FPÖ   
2        Wolfgang Zanger         August Wöginger            FPÖ   
3          Herbert Kickl  MMag. Dr. Susanne Raab            FPÖ   
4          Herbert Kickl  MMag. Dr. Susanne Raab            FPÖ   

  speaker_parties  
0             ÖVP  
1             ÖVP  
2             ÖVP  
3             ÖVP  
4             ÖVP  
