In [None]:
import os
import re
import time
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
import json

BASE_URL = "https://www.odok.gov.cz"
LISTING_URL = BASE_URL + "/portal/zvlady/jednani/{year}"
URLS = "links.json"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/124.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "cs,en-US;q=0.9,en;q=0.8",
    "Referer": "https://www.odok.gov.cz/",
    "Connection": "keep-alive",
}


def sanitize_filename(name):
    # Replace any character that is not a word character (a-zA-Z0-9_), dash, underscore, dot, or space
    # with an underscore. This helps avoid issues with illegal characters in filenames.
    return re.sub(r"[^\w\-_\. ]", "_", name)

## Scrape links to "vladne ujednani"

#### Scrape 2009-2025

In [None]:
def get_meeting_links(year):
    # Construct the URL for the given year using a template string
    url = LISTING_URL.format(year=year)

    # Send an HTTP GET request to retrieve the webpage for the specified year
    res = requests.get(url, headers=HEADERS)

    # If the request fails, log the status and return an empty list
    if res.status_code != 200:
        print(f"Failed to retrieve data for year {year}: {res.status_code}")
        return []

    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(res.content, "html.parser")
    links = []

    # Select all anchor tags that link to meeting detail pages
    for a in soup.select("a[href*='/portal/zvlady/jednani-detail/']"):
        href = a["href"]
        # Avoid duplicates and construct the full URL
        if href not in links:
            links.append(BASE_URL + href)

    # Return a list of unique meeting URLs for the given year
    return links

def scrape_all(start_year=2009, end_year=2025):
    # Dictionary to store meeting links per year
    links = dict()
    num_of_meetings = 0

    # Iterate over each year in the specified range
    for year in range(start_year, end_year + 1):
        print(f"\nProcessing year: {year}")
        meetings = get_meeting_links(year)

        # Store the links found for this year
        links[year] = meetings
        print(len(meetings), "meetings found")

        # Keep a running total of all meetings
        num_of_meetings += len(meetings)
    
    # Return a dictionary of all links and the total count
    return links, num_of_meetings

In [None]:
links, num_of_meetings = scrape_all()
print(f"\nTotal meetings found: {num_of_meetings}")

In [8]:

with open(URLS, "w", encoding="utf-8") as f:
    json.dump(links, f, ensure_ascii=False, indent=2)

#### Get statistics

In [None]:
def extract_usneseni_from_listing(year):
    # Construct the listing URL for the given year
    url = LISTING_URL.format(year=year)
    
    # Perform HTTP GET request to retrieve the HTML content
    res = requests.get(url, headers=HEADERS)
    if res.status_code != 200:
        print(f"❌ Failed to load {url}")
        return {}

    # Parse the HTML response
    soup = BeautifulSoup(res.content, "html.parser")
    year_data = {}

    # Select all anchor tags inside the blocks (they contain the date)
    for a in soup.select("div.mx-2 a"):
        # Extract text and strip whitespace
        date_text = a.get_text(strip=True)

        # Look for a date in YYYY-MM-DD format
        date_match = re.match(r"\d{4}-\d{2}-\d{2}", date_text)
        if not date_match:
            continue

        # Extract standardized date string
        date_str = date_match.group(0)

        # Look for the nearest sibling that contains "Usnesení č."
        parent = a.find_parent("div")
        next_div = parent.find_next_sibling("div") if parent else None

        if next_div:
            # Extract the full text and search for the range pattern "Usnesení č. 1–14"
            usneseni_text = next_div.get_text(strip=True)
            match = re.search(r"Usnesení č\.\s*(\d+)[–-](\d+)", usneseni_text)
            # If match is found, compute the total number of resolutions for that date
            if match:
                start, end = int(match.group(1)), int(match.group(2))
                count = end - start + 1
                year_data[date_str] = count
                print(f"🗓 {date_str}: {count} usnesení")
    
    # Return dictionary mapping dates to number of resolutions
    return year_data


def extract_all_usneseni_counts(start_year=2009, end_year=2025):
    """
    Extracts resolution (usnesení) counts for each year in the specified range.
    For every year, it invokes extract_usneseni_from_listing(year) and aggregates the results.

    Returns:
        dict: Nested dictionary structured as {year: {date: count_of_resolutions}}
    """
    all_data = {}
    for year in range(start_year, end_year + 1):
        print(f"\n📅 Year: {year}")
        # Extract data for the given year and store it using the year as string key
        all_data[str(year)] = extract_usneseni_from_listing(year)
    return all_data


def save_json(data, filename="usneseni_by_year.json"):
    """
    Saves the given data dictionary to a JSON file.

    Args:
        data (dict): Data structure containing resolution counts by year and date
        filename (str): Output filename for the resulting JSON file
    """
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"\n✅ Saved to {filename}")
    
# Execute extraction and persist results to a file
data = extract_all_usneseni_counts()
save_json(data)


📅 Year: 2009
🗓 2009-12-21: 19 usnesení
🗓 2009-12-14: 21 usnesení
🗓 2009-12-07: 4 usnesení
🗓 2009-12-02: 63 usnesení
🗓 2009-11-30: 34 usnesení
🗓 2009-11-23: 1 usnesení
🗓 2009-11-19: 29 usnesení
🗓 2009-11-16: 1 usnesení
🗓 2009-11-11: 1 usnesení
🗓 2009-11-10: 14 usnesení
🗓 2009-11-09: 17 usnesení
🗓 2009-11-02: 2 usnesení
🗓 2009-10-28: 21 usnesení
🗓 2009-10-26: 31 usnesení
🗓 2009-10-19: 2 usnesení
🗓 2009-10-15: 25 usnesení
🗓 2009-10-12: 24 usnesení
🗓 2009-10-05: 14 usnesení
🗓 2009-09-29: 19 usnesení
🗓 2009-09-21: 37 usnesení
🗓 2009-09-16: 1 usnesení
🗓 2009-09-09: 40 usnesení
🗓 2009-09-03: 26 usnesení
🗓 2009-08-31: 55 usnesení
🗓 2009-08-26: 1 usnesení
🗓 2009-08-21: 43 usnesení
🗓 2009-08-17: 40 usnesení
🗓 2009-07-27: 41 usnesení
🗓 2009-07-20: 1 usnesení
🗓 2009-07-14: 57 usnesení
🗓 2009-07-13: 36 usnesení
🗓 2009-06-29: 1 usnesení
🗓 2009-06-25: 43 usnesení
🗓 2009-06-22: 1 usnesení
🗓 2009-06-17: 34 usnesení
🗓 2009-06-15: 41 usnesení
🗓 2009-06-08: 34 usnesení
🗓 2009-06-01: 35 usnesení
🗓 2009-05

### Download files

In [None]:
def extract_date_from_url(url):
    """
    Extracts the date string from a meeting detail URL.

    Args:
        url (str): URL containing the meeting date in the format /jednani-detail/YYYY-MM-DD

    Returns:
        str or None: Extracted date string (YYYY-MM-DD), or None if not found.
    """
    match = re.search(r'/jednani-detail/(\d{4}-\d{2}-\d{2})', url)
    if match:
        return match.group(1)
    return None

def download_meeting_records(year, meeting_url):
    """
    Downloads government meeting recording files ("Záznam z jednání") from a specific meeting page.

    Args:
        year (int): The year of the meeting (used for folder organization).
        meeting_url (str): Full URL to the meeting detail page.
    """
    date_str = extract_date_from_url(meeting_url)
    if not date_str:
        print(f"Could not extract date from URL: {meeting_url}")
        return

    # Request the content of the meeting detail page
    res = requests.get(meeting_url, headers=HEADERS)
    soup = BeautifulSoup(res.content, "html.parser")

    # Find all mentions of "Záznam z jednání" (Record of meeting)
    records = soup.find_all("span", string=re.compile("Záznam z jednání"))
    if not records:
        # Find the parent element containing the download link
        print(f"No Záznam z jednání in: {meeting_url}")
        return

    # Find a download link within the parent that matches the expected pattern
    for record in records:
        parent = record.find_parent()
        if not parent:
            continue

        a_tag = parent.find("a", href=re.compile("/portal/services/download/attachment/"))
        if a_tag:
            file_url = BASE_URL + a_tag["href"]  # Construct full download URL
            extension = os.path.splitext(file_url)[1]  # Get file extension e.g. ".doc", ".pdf"
            doc_type = a_tag.text.strip().lower() # Text label for the download link 

            folder = f"vlada_zaznamy/{year}"
            os.makedirs(folder, exist_ok=True)
            filepath = os.path.join(folder, f"{date_str}.doc")

            # Avoid re-downloading if the file already exists
            if not os.path.exists(filepath):
                file_res = requests.get(file_url, headers=HEADERS)
                with open(filepath, "wb") as f:
                    f.write(file_res.content)
                print(f"Downloaded: {filepath}")
                time.sleep(0.5) # be polite

In [None]:
def download_all_records(links_dict):
    """
    Iterates through all years and meeting URLs, downloading the official records
    ("Záznam z jednání") for each government meeting.

    Args:
        links_dict (dict): Dictionary mapping years to lists of meeting URLs,
                           e.g., {"2012": [url1, url2, ...], ...}
    """
    for year, urls in links_dict.items():
        print(f"\nDownloading year: {year}")
        print(urls)
        # Use tqdm to display progress bar for each year's downloads
        for url in tqdm(urls, desc=f"Year {year}"):
            try:
                download_meeting_records(year, url)
            except Exception as e:
                print(f"Error in {url}: {e}")
            time.sleep(0.5) # be polite

In [None]:
with open(URLS, "r", encoding="utf-8") as f:
    links = json.load(f)
download_all_records(links)

## Scrape 1991-2008

In [37]:
import os
import requests
from bs4 import BeautifulSoup

BASE_INDEX_URL = "https://albatros.odok.gov.cz/usneseni/usneseni_webtest.nsf/web/cs?Open&"
BASE_PAGE_URL = "https://albatros.odok.gov.cz/usneseni/usneseni_webtest.nsf/web/"
BASE_DOC_ROOT = "https://albatros.odok.gov.cz/usneseni/usneseni_webtest.nsf"

def extract_meeting_links(year):
    index_url = BASE_INDEX_URL + str(year)
    res = requests.get(index_url, headers=HEADERS)
    soup = BeautifulSoup(res.content, "html.parser")

    links = []
    for a in soup.find_all("a", href=True):
        if re.match(r"cs\?Open&\d{4}&\d{2}-\d{2}", a["href"]):
            full_url = requests.compat.urljoin(BASE_PAGE_URL, a["href"])
            date_str = a.text.strip()  # e.g. 1991-12-20
            links.append((date_str, full_url))
    return links

def download_zaznam_from_detail(date_str, year, detail_url):
    res = requests.get(detail_url, headers=HEADERS)
    soup = BeautifulSoup(res.content, "html.parser")

    # Find link with exact label
    link = soup.find("a", string=re.compile(r"Záznam z jednání", re.IGNORECASE))
    if not link:
        print(f"❌ No Záznam z jednání found on {detail_url}")
        return

    href = link.get("href")
    if href.startswith("../0/"):
        doc_id = href.split("../0/")[1]
        file_url = f"{BASE_DOC_ROOT}/0/{doc_id}"
    elif href.startswith("/0/"):
        file_url = f"{BASE_DOC_ROOT}{href}"
    else:
        print(f"⚠️ Unexpected href format: {href}")
        return

    folder = os.path.join("vlada_zaznamy", str(year))
    os.makedirs(folder, exist_ok=True)
    filename = f"{date_str}.doc"
    filepath = os.path.join(folder, filename)

    if not os.path.exists(filepath):
        try:
            file_res = requests.get(file_url, headers=HEADERS)
            if file_res.status_code == 200:
                with open(filepath, "wb") as f:
                    f.write(file_res.content)
                print(f"✅ Downloaded: {filepath}")
            else:
                print(f"❌ Failed to download {file_url} (status {file_res.status_code})")
        except Exception as e:
            print(f"❌ Error downloading {file_url}: {e}")
    time.sleep(0.5)  # be polite
                

def download_all(start_year=1991, end_year=2008):
    for year in range(start_year, end_year + 1):
        print(f"\n📅 Year {year}")
        try:
            meetings = extract_meeting_links(year)
            print(f"Found {len(meetings)} meetings")
            for date_str, url in meetings:
                download_zaznam_from_detail(date_str, year, url)
        except Exception as e:
            print(f"⚠️ Error for year {year}: {e}")
        time.sleep(0.5)
download_all()



📅 Year 1991
Found 51 meetings
Found link: ../0/9470577DAD234F34C12571B6006D930B
Found link: ../0/4E24E1AD5668F4DAC12571B6006CC485
✅ Downloaded: vlada_zaznamy\1991\1991-12-12.doc
Found link: ../0/17F0D82CC1783F8EC12571B60070EA36
✅ Downloaded: vlada_zaznamy\1991\1991-12-11.doc
Found link: ../0/6DA127793DC129E2C12571B6006C9B60
✅ Downloaded: vlada_zaznamy\1991\1991-12-04.doc
Found link: ../0/63DE5048F90B7805C12571B6006DDB86
✅ Downloaded: vlada_zaznamy\1991\1991-12-01.doc
Found link: ../0/F7BA7FE08CDF49FEC12571B6006FEC8E
✅ Downloaded: vlada_zaznamy\1991\1991-11-29.doc
Found link: ../0/E7A09789B9F3A601C12571B6006EF487
✅ Downloaded: vlada_zaznamy\1991\1991-11-27.doc
Found link: ../0/5D404A4F90BCDA05C12571B6006CE3F7
✅ Downloaded: vlada_zaznamy\1991\1991-11-20.doc
Found link: ../0/C811E3202598C95BC12571B6006C3C6C
✅ Downloaded: vlada_zaznamy\1991\1991-11-19.doc
Found link: ../0/32ABF3651188A08EC12571B6006D23B1
✅ Downloaded: vlada_zaznamy\1991\1991-11-18.doc
Found link: ../0/3028D1824C50A7F0C125

## Convert format

In [None]:
import os
import win32com.client

def convert_doc_to_docx(root_dir="vlada_zaznamy"):
    """
    Recursively traverses the given directory and converts all `.doc` files to `.docx` format
    using Microsoft Word via COM automation (Windows only).

    Args:
        root_dir (str): Root directory to scan for `.doc` files (default: "vlada_zaznamy")
    """
    # Launch an instance of Microsoft Word (hidden window)
    word = win32com.client.Dispatch("Word.Application")
    word.Visible = False # Prevent Word UI from showing

    # Recursively walk through the directory tree
    for root, _, files in os.walk(root_dir):
        for file in files:
            # Check for .doc files that are not already .docx
            if file.endswith(".doc") and not file.endswith(".docx"):
                doc_path = os.path.abspath(os.path.join(root, file))
                docx_path = doc_path + "x"  # .docx

                # Only convert if the target .docx doesn't already exist
                if not os.path.exists(docx_path):
                    try:
                        doc = word.Documents.Open(doc_path)
                        # Save as .docx format (16 = wdFormatDocumentDefault)
                        doc.SaveAs(docx_path, FileFormat=16)
                        doc.Close()
                        print(f"✅ Converted: {doc_path} → {docx_path}")
                    except Exception as e:
                        print(f"❌ Failed to convert {doc_path}: {e}")

    word.Quit()

## Search for president

In [None]:
from docx import Document
import os

def extract_context(text, keyword, window=3):
    """
    Extracts contextual snippets around a given keyword in the input text.
    For every occurrence of the keyword, it returns a window of N words before and after.

    Args:
        text (str): The full input text to search through.
        keyword (str): The word to find within the text.
        window (int): Number of words to include before and after the keyword.

    Returns:
        List[str]: A list of string snippets containing the keyword in context.
    """
    words = text.split()
    keyword = keyword.lower()
    results = []

    for i, word in enumerate(words):
        if keyword in word.lower():
            start = max(0, i - window)
            end = min(len(words), i + window + 1)
            snippet = " ".join(words[start:end])
            results.append(snippet)
    return results

def search_word_in_docx(root_dir="vlada_zaznamy", keyword="prezident", output_file="matches.txt"):
    """
    Searches all `.docx` files in the given directory tree for a keyword and extracts surrounding context.

    Args:
        root_dir (str): Root folder to search for `.docx` files.
        keyword (str): Keyword to search for (case-insensitive).
        output_file (str): Output text file where matches will be recorded.
    """
    keyword = keyword.lower()
    num_of_files = 0
    matches = []

    with open(output_file, "w", encoding="utf-8") as f:
        for root, _, files in os.walk(root_dir):
            for file in files:
                if file.endswith(".docx"):
                    num_of_files += 1
                    path = os.path.join(root, file)
                    try:
                        # Open the .docx file using python-docx
                        doc = Document(path)
                        # Extract non-empty paragraphs
                        paragraphs = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
                        context_matches = []

                        # Search for keyword in each paragraph
                        for para in paragraphs:
                            if keyword in para.lower():
                                # Extract windowed context around keyword
                                context_matches.extend(extract_context(para, keyword))

                        # If keyword found, save match info to file
                        if context_matches:
                            matches.append(path)
                            match_str = " | ".join(context_matches)
                            f.write(f"{path} : {match_str}\n")
                    except Exception as e:
                        print(f"⚠️ Error reading {path}: {e}")

    print(f"\nTotal documents containing '{keyword}': {len(matches)}")
    print(f"Number of files: {num_of_files}")
    print(f"Percentage: {len(matches) / num_of_files * 100:.2f}%")
    print(f"\n✅ Saved contextual matches to: {output_file}")

    return matches

# convert_doc_to_docx()
search_word_in_docx()


Total documents containing 'nil prezident republiky': 0
Number of files: 1878
Percentage: 0.00%

✅ Saved contextual matches to: matches_nil prezident republiky.txt


[]

In [None]:
def filter_matches_by_phrase(input_file="matches_prezident.txt", phrase="prezident republiky"):
    """
    Filters lines from a keyword match file that contain a specific phrase (case-insensitive),
    and writes them to a new output file.

    Args:
        input_file (str): File containing raw keyword context matches.
        phrase (str): Specific phrase to search for (e.g., 'prezident republiky').

    Output:
        A new file named 'matches_prezident_republiky.txt' (based on phrase) is created with filtered results.
    """
    # Construct output file name by replacing spaces with underscores
    output_file = f"matches_{phrase.replace(' ', '_')}.txt"
    phrase = phrase.lower()

    with open(input_file, "r", encoding="utf-8") as infile, \
         open(output_file, "w", encoding="utf-8") as outfile:
        
        kept = 0
        for line in infile:
            # If the phrase appears in the line (case-insensitive), write it to the output
            if phrase in line.lower():
                outfile.write(line)
                kept += 1

    print(f"✅ Done. Kept {kept} lines containing '{phrase}'.")
    print(f"📝 Saved to: {output_file}")
    
filter_matches_by_phrase("matches_prezident.txt", "nil prezident republiky")

✅ Done. Kept 27 lines containing 'nil prezident republiky'.
📝 Saved to: matches_nil_prezident_republiky.txt


In [None]:
import os
from docx import Document

def extract_context(text, focus_keyword, window=3):
    """Extracts context around a specific keyword from a paragraph."""
    words = text.split()
    focus_keyword = focus_keyword.lower()
    results = []

    for i, word in enumerate(words):
        if focus_keyword in word.lower():
            start = max(0, i - window)
            end = min(len(words), i + window + 1)
            snippet = " ".join(words[start:end])
            results.append(snippet)
    return results

def paragraph_contains(text, keywords):
    """Returns True if the paragraph contains all keywords."""
    text_lower = text.lower()
    return any(kw.lower() in text_lower for kw in keywords)


def process_docx_file(path, match_keywords, focus_keyword, window=3):
    """Processes a .docx file. Returns contextual matches and whether it contains all match keywords."""
    try:
        doc = Document(path)
        paragraphs = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
        full_text = " ".join(paragraphs)
        file_contains_all = paragraph_contains(full_text, match_keywords)

        context_snippets = []
        if file_contains_all:
            for para in paragraphs:
                if focus_keyword.lower() in para.lower():
                    context_snippets.extend(extract_context(para, focus_keyword, window))

        return file_contains_all, context_snippets
    except Exception as e:
        print(f"⚠️ Error reading {path}: {e}")
        return False, []


def search_keywords_in_docx(
    root_dir="vlada_zaznamy",
    match_keywords=["prezident", "účast"],
    focus_keyword="prezident",
    output_file="matches_structured.txt",
    window=3
):
    """
    Searches all `.docx` files in a directory for occurrences of one or more keywords.
    It extracts context around a specific focus keyword if any of the match keywords are found.

    Args:
        root_dir (str): Path to the root directory containing `.docx` files.
        match_keywords (list): List of keywords to trigger search (e.g., ['prezident', 'účast']).
        focus_keyword (str): The word around which contextual windows are extracted (e.g., 'prezident').
        output_file (str): Path to output text file where matches will be recorded.
        window (int): Number of words before and after the focus keyword to include in each snippet.
    """
    match_keywords = [kw.lower() for kw in match_keywords]
    focus_keyword = focus_keyword.lower()

    matches = []
    num_of_files = 0

    with open(output_file, "w", encoding="utf-8") as f:
        for root, _, files in os.walk(root_dir):
            for file in files:
                if file.endswith(".docx"):
                    num_of_files += 1
                    path = os.path.join(root, file)
                    # Process each document to extract keyword-based snippets
                    contains_any, snippets = process_docx_file(path, match_keywords, focus_keyword, window)
                    # Save if any of the match keywords were found and context around focus_keyword was extracted
                    if contains_any and snippets:
                        matches.append(path)
                        f.write(f"{path} : {' | '.join(snippets)}\n")

    print(f"\nTotal documents containing any of {match_keywords}: {len(matches)}")
    print(f"Number of files scanned: {num_of_files}")
    print(f"Percentage: {len(matches) / num_of_files * 100:.2f}%")
    print(f"✅ Contextual matches (focused on '{focus_keyword}') saved to: {output_file}")

    return matches

# for prezident in ["havel", "klaus", "zeman", "pavel"]:
for prezident in [""]:
    search_keywords_in_docx(
        root_dir="vlada_zaznamy",
        match_keywords=["účasti prezidenta republiky", "účastnil prezident republiky", "vystoupil prezident", "přítomen prezident republiky"],
        focus_keyword="prezident",
        output_file=f"matches_structured_{prezident}.txt"
    )


Total documents containing any of ['účasti prezidenta republiky', 'účastnil prezident republiky', 'vystoupil prezident', 'přítomen prezident republiky']: 50
Number of files scanned: 1878
Percentage: 2.66%
✅ Contextual matches (focused on 'prezident') saved to: matches_structured_.txt


### Basic statistics

In [14]:
import re
from datetime import datetime
from collections import defaultdict
import matplotlib.pyplot as plt

# --- Define presidential terms ---
presidents = [
    ("Václav Havel", datetime(1993, 2, 2), datetime(2003, 2, 2)),
    ("Václav Klaus", datetime(2003, 3, 7), datetime(2013, 3, 7)),
    ("Miloš Zeman", datetime(2013, 3, 8), datetime(2023, 3, 8)),
    ("Petr Pavel", datetime(2023, 3, 9), datetime.now()),
]

# --- Initialize counters ---
year_counts = defaultdict(int)
month_counts = defaultdict(int)
president_counts = defaultdict(int)

# --- Regex for extracting date ---
date_pattern = re.compile(r"\d{4}-\d{2}-\d{2}")

# --- Read file and collect stats ---
with open("matches_final.txt", "r", encoding="utf-8") as file:
    for line in file:
        match = date_pattern.search(line)
        if match:
            date_str = match.group()
            try:
                date = datetime.strptime(date_str, "%Y-%m-%d")
                year_counts[date.year] += 1
                month_counts[date.month] += 1
                for name, start, end in presidents:
                    if start <= date <= end:
                        president_counts[name] += 1
                        break
            except ValueError:
                continue

# --- Generic plotting function ---
def plot_histogram(labels, values, title, xlabel, ylabel, filename):
    plt.figure(figsize=(10, 6))
    plt.bar(labels, values, color='skyblue')
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(filename)
    plt.close()

# --- 1. Visits per year ---
years, year_vals = zip(*sorted(year_counts.items()))
plot_histogram(years, year_vals,
               "Number of Presidential Visits per Year",
               "Year", "Number of Visits", "visits_per_year.png")

# --- 2. Visits per month (ordered) ---
month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
month_vals = [month_counts[i] for i in range(1, 13)]
plot_histogram(month_labels, month_vals,
               "Monthly Distribution of Presidential Visits",
               "Month", "Number of Visits", "visits_per_month.png")

# --- 3. Visits per president ---
pres_names, pres_vals = zip(*president_counts.items())
plot_histogram(pres_names, pres_vals,
               "Number of Meetings Attended by Each President",
               "President", "Number of Visits", "visits_per_president.png")
