In [None]:
import requests

# URL of the JSON data
url = 'https://zpravy.udhpsh.cz/zpravy/vfz2022.json'

# Fetch the data from the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Extract the JSON data
    data = response.json()
    print(data)
else:
    print(f"Failed to retrieve data: {response.status_code}")

In [None]:
import pandas as pd
# Flatten only the 'parties' part of the JSON response
parties_data = data['parties']
# Flatten the JSON response
flattened_data = pd.json_normalize(parties_data,record_path=['files'])
print(flattened_data)

In [None]:
# Extract the required information for each party
party_data = []
for party in data['parties']:
    party_key = party.get('shortName', 'Unknown')  # Use shortName as key if available
    party_ic = party['ic']
    for file in party['files']:
        file_subject = file['subject']
        file_url = file['url']
        party_data.append({
            'Party Key': party_key,
            'IC': party_ic,
            'Subject': file_subject,
            'URL': file_url
        })
# Create a DataFrame from the extracted data
df = pd.DataFrame(party_data)
print(df)

In [None]:
for party in data['parties']:
    ic = party.get('ic', 'N/A')
    json_files_count = sum(1 for file in party.get('files', []) if file.get('format') == 'json')
    print(f"IC: {ic}, JSON Files Count: {json_files_count}")

In [10]:
import requests
import json
import pandas as pd

# List of years
years = [2017, 2018, 2019, 2020, 2021, 2022, 2023]

# Initialize an empty list to store the extracted data
extracted_data = []

# Loop through each year
for year in years:
    # URL of the JSON data
    url = f'https://zpravy.udhpsh.cz/export/vfz{year}-index.json'

    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON data
        data = response.json()

        # Extract relevant data
        for party in data['parties']:
            for file in party['files']:
                extracted_data.append({
                    'year': year,
                    'key': data['election']['key'],
                    'ic': party['ic'],
                    'subject': file['subject'],
                    'url': file['url']
                })
    else:
        print(f"Failed to retrieve data for year {year}: {response.status_code}")

# Create DataFrame
df = pd.DataFrame(extracted_data)

# Print the DataFrame
print(len(df))

13040


In [11]:
df_cprijmy = df[df['subject'] == 'cprijmy']
df_cvydaje = df[df['subject'] == 'cvydaje']
df_zamest = df[df['subject'] == 'zamest']
df_polinst = df[df['subject'] == 'polinst']
df_podil = df[df['subject'] == 'podil']
df_penizefo = df[df['subject'] == 'penizefo']
df_bupfo = df[df['subject'] == 'bupfo']
df_penizepo = df[df['subject'] == 'penizepo']
df_buppo = df[df['subject'] == 'buppo']
df_dluhy = df[df['subject'] == 'dluhy']
df_dedictvi = df[df['subject'] == 'dedictvi']
df_clenove = df[df['subject'] == 'clenove']


In [15]:
df_head = df_zamest.head(200)

In [None]:
print(df_head)

In [16]:
def fetch_json(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses
        return response.json()  # Return parsed JSON
    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL {url}: {e}")
        return None  # Return None in case of error

# Fetch JSON for each URL and store it in a new column
df_head['json_response'] = df_head['url'].apply(fetch_json)
df_head.to_csv('financovani.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_head['json_response'] = df_head['url'].apply(fetch_json)


In [17]:
# Step 1: Expand the JSON into a tabular format
exploded_df = df_head.explode('json_response')

# Step 2: Normalize the JSON data
expanded_df = pd.json_normalize(exploded_df['json_response'])

# Add meaningful context from the original DataFrame
result_df = exploded_df[['year',  'ic']].reset_index(drop=True)
result_df = pd.concat([result_df, expanded_df], axis=1)

print(result_df)

     year        ic                                              job  number
0    2017  05402450                                   administrativa     2.0
1    2017  04627695                                              NaN     NaN
2    2017  71443339  krajský manažer - průměrný přepočtený stav 2017     8.0
3    2017  71443339         asistent - průměrný přepočtený stav 2017     3.0
4    2017  71443339   PR specialista - průměrný přepočtený stav 2017     1.0
..    ...       ...                                              ...     ...
254  2018  26595940                                              NaN     NaN
255  2018  27026922                                              NaN     NaN
256  2018  06695205                                              NaN     NaN
257  2018  71339485                                              NaN     NaN
258  2018  70869553                                              NaN     NaN

[259 rows x 4 columns]


In [None]:
import requests
from tqdm import tqdm

def fetch_json(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses
        return response.json()  # Return parsed JSON
    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL {url}: {e}")
        return None  # Return None in case of error

# Fetch JSON for each URL and store it in a new column with a progress bar
df['json_response'] = df['url'].apply(lambda url: fetch_json(url))

# Using tqdm to display progress
for url in tqdm(df['url']):
    df.loc[df['url'] == url, 'json_response'] = fetch_json(url)

In [None]:
unique_subjects = df['subject'].unique()
print(unique_subjects)

In [None]:

subject_dfs = {subject: df[df['subject'] == subject] for subject in unique_subjects}


# Print the first few rows of each DataFrame to verify
for subject, subject_df in subject_dfs.items():
    print(f"Subject: {subject}")
    print(subject_df.head(), "\n")

In [None]:
for subject, subject_df in subject_dfs.items():
    filename = f"financovani_{subject}.csv"
    subject_df.to_csv(filename, index=False)
    print(f"Saved {filename}")