In [3]:
import re
import requests
import pandas as pd
import json
from bs4 import BeautifulSoup as bs
import time
import sqlite3
from urllib.parse import quote_plus
import yaml
from datetime import datetime
import os
from KBDownloader import search_swedish_newspapers, fetch_newspaper_data, save_checkpoint, load_checkpoint
from dotenv import load_dotenv

# Load the YAML configuration file
with open('config.yaml', 'r') as file:
    config = yaml.safe_load(file)

# Load environment variables from .env file
load_dotenv()
kb_key = os.getenv('KB_API_KEY')

# Assign variables from the YAML configuration
venue_list = config['venue_list']
start_year = config['start_year']
years_to_crawl = config['years_to_crawl']
newspaper = config['newspaper']
db_path = config['db_path']
rate_limit = config['rate_limit']
num_composed_blocks = config.get('composed_blocks_context', 1)  # Default to 1 if not specified
years = config.get('years_to_crawl', [])  # Use 'years_to_crawl' instead of 'years'
if not years:
    raise ValueError("No years specified in the configuration file.")

# Define the newspaper collection IDs
NEWSPAPER_COLLECTION_IDS = {
    'Dagens nyheter': 'https://libris.kb.se/m5z2w4lz3m2zxpk#it',
    'Svenska Dagbladet': 'https://libris.kb.se/2ldhmx8d4mcrlq9#it',
    'Aftonbladet': 'https://libris.kb.se/dwpgqn5q03ft91j#it',
    'Dagligt Allehanda': 'https://libris.kb.se/9tmqzv3m32xfzcz#it',
    'Nya Dagligt Allehanda': 'https://libris.kb.se/2ldqsh7d0gp04wb#it'
}

# Get the correct collection ID for the specified newspaper
collection_id = NEWSPAPER_COLLECTION_IDS.get(newspaper)

if not collection_id:
    raise ValueError(f"Invalid newspaper name: {newspaper}")

# Ensure the database file exists
if not os.path.exists(db_path):
    conn = sqlite3.connect(db_path)
    conn.close()

# Create a connection to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Create the newspaper_data table if it doesn't exist
cursor.execute('''
    CREATE TABLE IF NOT EXISTS newspaper_data (
        Date TEXT,
        [Package ID] TEXT,
        Part INTEGER,
        Page INTEGER,
        [ComposedBlock ID] TEXT,
        [ComposedBlock Content] TEXT,
        [Raw API Result] TEXT,
        [Full Prompt] TEXT
    )
''')

# Commit the changes and close the connection
conn.commit()
conn.close()

# Print out all the settings from the YAML configuration file
print("Configuration Settings:")
for key, value in config.items():
    print(f"{key}: {value}")
print(f"Collection ID: {collection_id}")

# Load the venue list
df = pd.read_excel(venue_list)

Configuration Settings:
venue_list: /Users/brandonfarnsworth/Library/Mobile Documents/com~apple~CloudDocs/Post-Phd/Published Texts/Method Article Collecting Trace Data using LLM/Venues_and_Search_Terms.xlsx
years: 1908
start_year: 1908
years_to_crawl: [1848, 1858, 1868]
rate_limit: 10
composed_blocks_context: 10
newspaper: Aftonbladet
db_path: Datasets/28.08.24_Dataset.db
prompt_filepath: llm_prompt_for_deployment.txt
JSON_schema_path: JSON_Schema.txt
llm_model: gpt-4o-mini-2024-07-18
max_tokens: 1000
Stockholm_Concert_Database_Path: Datasets/All_Concerts_1908_filtered_until_June_30.xlsx
columns_to_compare: ['normalized_date', 'name', 'venue']
column_mapping: {'konsert_datum': 'date', 'konsert_namn': 'name', 'lokal_namn': 'venue', 'arrangör': 'organiser'}
Collection ID: https://libris.kb.se/dwpgqn5q03ft91j#it


In [4]:
# Load checkpoint if it exists
checkpoint = load_checkpoint()

# Main loop
for year in years:
    for half in range(2):
        if half == 0:
            from_date = datetime(year, 1, 1)
            to_date = datetime(year, 6, 30)
        else:
            from_date = datetime(year, 7, 1)
            to_date = datetime(year, 12, 31)

        print(f"Processing data from {from_date} to {to_date}")

        for index in range(len(df)):
            row = df.iloc[index]
            query = row['Lokal']

            try:
                result = fetch_newspaper_data(
                    query=query,
                    from_date=from_date.strftime('%Y-%m-%d'),
                    to_date=to_date.strftime('%Y-%m-%d'),
                    newspaper=collection_id,
                    config=config,
                    db_path=db_path,
                    kb_key=kb_key,
                    rate_limit=rate_limit,
                    num_composed_blocks=num_composed_blocks
                )

                if result.get('success'):
                    print(f"Processed query '{query}' successfully.")
                else:
                    print(f"Failed to process query '{query}': {result.get('message')}")

                # Save checkpoint after each query, successful or not
                save_checkpoint(year, half, index + 1)

            except Exception as e:
                print(f"Error processing query '{query}': {str(e)}")
                save_checkpoint(year, half, index)

        print(f"Waiting. Currently at {from_date} to {to_date}")
        time.sleep(0)  # in seconds

2024-08-29 15:17:41,034 - INFO - Starting fetch_newspaper_data for query: Konsert, dates: 1848-01-01 to 1848-06-30


Checkpoint loaded: Year 1908, Half 1, Index 2
Processing data from 1848-01-01 00:00:00 to 1848-06-30 00:00:00


2024-08-29 15:17:41,370 - INFO - Search results received. Hits: 41
2024-08-29 15:17:41,370 - INFO - Extracted 41 URLs from search results
2024-08-29 15:17:41,371 - INFO - Processing URL: https://data.kb.se/dark-37728/part/1/page/1
2024-08-29 15:17:41,508 - INFO - Extracted XML URL for page 1
2024-08-29 15:17:41,509 - INFO - Extracted 1 XML URLs
2024-08-29 15:17:41,814 - INFO - Fetched XML content for 1 pages
2024-08-29 15:17:42,032 - INFO - Processed URL: https://data.kb.se/dark-37728/part/1/page/1
2024-08-29 15:17:42,032 - INFO - Processing URL: https://data.kb.se/dark-39542/part/1/page/2
2024-08-29 15:17:42,159 - INFO - Extracted XML URL for page 2
2024-08-29 15:17:42,159 - INFO - Extracted 1 XML URLs
2024-08-29 15:17:42,494 - INFO - Fetched XML content for 1 pages
2024-08-29 15:17:42,704 - INFO - Processed URL: https://data.kb.se/dark-39542/part/1/page/2
2024-08-29 15:17:42,704 - INFO - Processing URL: https://data.kb.se/dark-37725/part/1/page/3
2024-08-29 15:17:42,829 - INFO - Extr

Processed query 'Konsert' successfully.
Checkpoint saved: Year 1848, Half 0, Index 1


2024-08-29 15:18:15,147 - INFO - Search results received. Hits: 78
2024-08-29 15:18:15,148 - INFO - Extracted 78 URLs from search results
2024-08-29 15:18:15,148 - INFO - Processing URL: https://data.kb.se/dark-39289/part/1/page/1
2024-08-29 15:18:15,340 - INFO - Extracted XML URL for page 1
2024-08-29 15:18:15,341 - INFO - Extracted 1 XML URLs
2024-08-29 15:18:15,673 - INFO - Fetched XML content for 1 pages
2024-08-29 15:18:15,874 - INFO - Processed URL: https://data.kb.se/dark-39289/part/1/page/1
2024-08-29 15:18:15,874 - INFO - Processing URL: https://data.kb.se/dark-39515/part/1/page/2
2024-08-29 15:18:15,994 - INFO - Extracted XML URL for page 2
2024-08-29 15:18:15,995 - INFO - Extracted 1 XML URLs
2024-08-29 15:18:16,299 - INFO - Fetched XML content for 1 pages
2024-08-29 15:18:16,636 - INFO - Processed URL: https://data.kb.se/dark-39515/part/1/page/2
2024-08-29 15:18:16,636 - INFO - Processing URL: https://data.kb.se/dark-39866/part/1/page/1
2024-08-29 15:18:16,762 - INFO - Extr

Processed query 'La Croix salong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 2


2024-08-29 15:19:16,650 - INFO - Search results received. Hits: 0
2024-08-29 15:19:16,651 - INFO - Extracted 0 URLs from search results
2024-08-29 15:19:16,651 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:19:16,654 - INFO - Starting fetch_newspaper_data for query: Wallmans lokal, dates: 1848-01-01 to 1848-06-30


Processed query 'Norra paviljongen i Trädgårdsföreningens lokal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 3


2024-08-29 15:19:17,003 - INFO - Search results received. Hits: 24
2024-08-29 15:19:17,004 - INFO - Extracted 24 URLs from search results
2024-08-29 15:19:17,005 - INFO - Processing URL: https://data.kb.se/dark-37782/part/1/page/1
2024-08-29 15:19:17,232 - INFO - Extracted XML URL for page 1
2024-08-29 15:19:17,233 - INFO - Extracted 1 XML URLs
2024-08-29 15:19:17,602 - INFO - Fetched XML content for 1 pages
2024-08-29 15:19:17,921 - INFO - Processed URL: https://data.kb.se/dark-37782/part/1/page/1
2024-08-29 15:19:17,921 - INFO - Processing URL: https://data.kb.se/dark-39498/part/1/page/1
2024-08-29 15:19:18,264 - INFO - Extracted XML URL for page 1
2024-08-29 15:19:18,265 - INFO - Extracted 1 XML URLs
2024-08-29 15:19:18,749 - INFO - Fetched XML content for 1 pages
2024-08-29 15:19:18,995 - INFO - Processed URL: https://data.kb.se/dark-39498/part/1/page/1
2024-08-29 15:19:18,996 - INFO - Processing URL: https://data.kb.se/dark-37742/part/1/page/1
2024-08-29 15:19:19,272 - INFO - Extr

Processed query 'Wallmans lokal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 4


2024-08-29 15:19:35,479 - INFO - Search results received. Hits: 0
2024-08-29 15:19:35,480 - INFO - Extracted 0 URLs from search results
2024-08-29 15:19:35,480 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:19:35,483 - INFO - Starting fetch_newspaper_data for query: Kungliga opera, dates: 1848-01-01 to 1848-06-30


Processed query 'Mäster Samuels gränd 11' successfully.
Checkpoint saved: Year 1848, Half 0, Index 5


2024-08-29 15:19:35,725 - INFO - Search results received. Hits: 14
2024-08-29 15:19:35,725 - INFO - Extracted 14 URLs from search results
2024-08-29 15:19:35,726 - INFO - Processing URL: https://data.kb.se/dark-39495/part/1/page/3
2024-08-29 15:19:35,863 - INFO - Extracted XML URL for page 3
2024-08-29 15:19:35,863 - INFO - Extracted 1 XML URLs
2024-08-29 15:19:36,174 - INFO - Fetched XML content for 1 pages
2024-08-29 15:19:36,355 - INFO - Processed URL: https://data.kb.se/dark-39495/part/1/page/3
2024-08-29 15:19:36,355 - INFO - Processing URL: https://data.kb.se/dark-37763/part/1/page/2
2024-08-29 15:19:36,482 - INFO - Extracted XML URL for page 2
2024-08-29 15:19:36,482 - INFO - Extracted 1 XML URLs
2024-08-29 15:19:36,798 - INFO - Fetched XML content for 1 pages
2024-08-29 15:19:37,107 - INFO - Processed URL: https://data.kb.se/dark-37763/part/1/page/2
2024-08-29 15:19:37,107 - INFO - Processing URL: https://data.kb.se/dark-39499/part/1/page/2
2024-08-29 15:19:37,242 - INFO - Extr

Processed query 'Kungliga opera' successfully.
Checkpoint saved: Year 1848, Half 0, Index 6


2024-08-29 15:19:46,049 - INFO - Search results received. Hits: 57
2024-08-29 15:19:46,049 - INFO - Extracted 57 URLs from search results
2024-08-29 15:19:46,049 - INFO - Processing URL: https://data.kb.se/dark-39483/part/1/page/1
2024-08-29 15:19:46,199 - INFO - Extracted XML URL for page 1
2024-08-29 15:19:46,200 - INFO - Extracted 1 XML URLs
2024-08-29 15:19:46,563 - INFO - Fetched XML content for 1 pages
2024-08-29 15:19:46,869 - INFO - Processed URL: https://data.kb.se/dark-39483/part/1/page/1
2024-08-29 15:19:46,869 - INFO - Processing URL: https://data.kb.se/dark-39540/part/1/page/1
2024-08-29 15:19:47,091 - INFO - Extracted XML URL for page 1
2024-08-29 15:19:47,091 - INFO - Extracted 1 XML URLs
2024-08-29 15:19:47,644 - INFO - Fetched XML content for 1 pages
2024-08-29 15:19:47,798 - INFO - No matching content found for query 'Kungliga teater' on page 1
2024-08-29 15:19:47,799 - INFO - Processed URL: https://data.kb.se/dark-39540/part/1/page/1
2024-08-29 15:19:47,799 - INFO - 

Processed query 'Kungliga teater' successfully.
Checkpoint saved: Year 1848, Half 0, Index 7


2024-08-29 15:20:29,576 - INFO - Search results received. Hits: 0
2024-08-29 15:20:29,576 - INFO - Extracted 0 URLs from search results
2024-08-29 15:20:29,577 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:20:29,578 - INFO - Starting fetch_newspaper_data for query: Hotel W6, dates: 1848-01-01 to 1848-06-30


Processed query 'F.d. Kirsteinska huset (vid Clara)' successfully.
Checkpoint saved: Year 1848, Half 0, Index 8


2024-08-29 15:20:29,830 - INFO - Search results received. Hits: 0
2024-08-29 15:20:29,831 - INFO - Extracted 0 URLs from search results
2024-08-29 15:20:29,831 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:20:29,832 - INFO - Starting fetch_newspaper_data for query: Hotel Continental, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:20:30,022 - INFO - Search results received. Hits: 0
2024-08-29 15:20:30,023 - INFO - Extracted 0 URLs from search results
2024-08-29 15:20:30,023 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:20:30,026 - INFO - Starting fetch_newspaper_data for query: La Croix mindre salong, dates: 1848-01-01 to 1848-06-30


Processed query 'Hotel W6' successfully.
Checkpoint saved: Year 1848, Half 0, Index 9
Processed query 'Hotel Continental' successfully.
Checkpoint saved: Year 1848, Half 0, Index 10


2024-08-29 15:20:30,579 - INFO - Search results received. Hits: 76
2024-08-29 15:20:30,580 - INFO - Extracted 76 URLs from search results
2024-08-29 15:20:30,581 - INFO - Processing URL: https://data.kb.se/dark-39300/part/1/page/1
2024-08-29 15:20:30,720 - INFO - Extracted XML URL for page 1
2024-08-29 15:20:30,721 - INFO - Extracted 1 XML URLs
2024-08-29 15:20:31,201 - INFO - Fetched XML content for 1 pages
2024-08-29 15:20:31,714 - INFO - Processed URL: https://data.kb.se/dark-39300/part/1/page/1
2024-08-29 15:20:31,715 - INFO - Processing URL: https://data.kb.se/dark-39259/part/1/page/1
2024-08-29 15:20:31,924 - INFO - Extracted XML URL for page 1
2024-08-29 15:20:31,925 - INFO - Extracted 1 XML URLs
2024-08-29 15:20:32,290 - INFO - Fetched XML content for 1 pages
2024-08-29 15:20:32,567 - INFO - Processed URL: https://data.kb.se/dark-39259/part/1/page/1
2024-08-29 15:20:32,568 - INFO - Processing URL: https://data.kb.se/dark-39273/part/1/page/1
2024-08-29 15:20:32,746 - INFO - Extr

Processed query 'La Croix mindre salong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 11
Processed query 'Södra teatern i Stadshuset' successfully.
Checkpoint saved: Year 1848, Half 0, Index 12


2024-08-29 15:21:28,240 - INFO - Search results received. Hits: 12
2024-08-29 15:21:28,241 - INFO - Extracted 12 URLs from search results
2024-08-29 15:21:28,242 - INFO - Processing URL: https://data.kb.se/dark-37782/part/1/page/1
2024-08-29 15:21:28,397 - INFO - Extracted XML URL for page 1
2024-08-29 15:21:28,397 - INFO - Extracted 1 XML URLs
2024-08-29 15:21:28,722 - INFO - Fetched XML content for 1 pages
2024-08-29 15:21:28,893 - INFO - Processed URL: https://data.kb.se/dark-37782/part/1/page/1
2024-08-29 15:21:28,894 - INFO - Processing URL: https://data.kb.se/dark-37766/part/1/page/1
2024-08-29 15:21:29,031 - INFO - Extracted XML URL for page 1
2024-08-29 15:21:29,032 - INFO - Extracted 1 XML URLs
2024-08-29 15:21:29,348 - INFO - Fetched XML content for 1 pages
2024-08-29 15:21:29,617 - INFO - Processed URL: https://data.kb.se/dark-37766/part/1/page/1
2024-08-29 15:21:29,618 - INFO - Processing URL: https://data.kb.se/dark-39866/part/1/page/1
2024-08-29 15:21:29,748 - INFO - Extr

Processed query 'Stadshus salongen' successfully.
Checkpoint saved: Year 1848, Half 0, Index 13
Processed query 'Stora börssalen' successfully.
Checkpoint saved: Year 1848, Half 0, Index 14


2024-08-29 15:21:36,843 - INFO - Search results received. Hits: 4
2024-08-29 15:21:36,843 - INFO - Extracted 4 URLs from search results
2024-08-29 15:21:36,844 - INFO - Processing URL: https://data.kb.se/dark-39259/part/1/page/1
2024-08-29 15:21:36,970 - INFO - Extracted XML URL for page 1
2024-08-29 15:21:36,970 - INFO - Extracted 1 XML URLs
2024-08-29 15:21:37,252 - INFO - Fetched XML content for 1 pages
2024-08-29 15:21:37,571 - INFO - Processed URL: https://data.kb.se/dark-39259/part/1/page/1
2024-08-29 15:21:37,572 - INFO - Processing URL: https://data.kb.se/dark-41314/part/1/page/3
2024-08-29 15:21:37,708 - INFO - Extracted XML URL for page 3
2024-08-29 15:21:37,709 - INFO - Extracted 1 XML URLs
2024-08-29 15:21:38,076 - INFO - Fetched XML content for 1 pages
2024-08-29 15:21:38,350 - INFO - Processed URL: https://data.kb.se/dark-41314/part/1/page/3
2024-08-29 15:21:38,350 - INFO - Processing URL: https://data.kb.se/dark-41309/part/1/page/3
2024-08-29 15:21:38,480 - INFO - Extrac

Processed query 'St Nicolai kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 15


2024-08-29 15:21:40,344 - INFO - Search results received. Hits: 0
2024-08-29 15:21:40,345 - INFO - Extracted 0 URLs from search results
2024-08-29 15:21:40,345 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:21:40,347 - INFO - Starting fetch_newspaper_data for query: Hedvig Eleonora, dates: 1848-01-01 to 1848-06-30


Processed query 'Ladugårdslands kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 16


2024-08-29 15:21:40,671 - INFO - Search results received. Hits: 4
2024-08-29 15:21:40,672 - INFO - Extracted 4 URLs from search results
2024-08-29 15:21:40,672 - INFO - Processing URL: https://data.kb.se/dark-39257/part/1/page/3
2024-08-29 15:21:40,798 - INFO - Extracted XML URL for page 3
2024-08-29 15:21:40,798 - INFO - Extracted 1 XML URLs
2024-08-29 15:21:41,307 - INFO - Fetched XML content for 1 pages
2024-08-29 15:21:41,613 - INFO - Processed URL: https://data.kb.se/dark-39257/part/1/page/3
2024-08-29 15:21:41,613 - INFO - Processing URL: https://data.kb.se/dark-39292/part/1/page/3
2024-08-29 15:21:41,832 - INFO - Extracted XML URL for page 3
2024-08-29 15:21:41,833 - INFO - Extracted 1 XML URLs
2024-08-29 15:21:42,399 - INFO - Fetched XML content for 1 pages
2024-08-29 15:21:42,644 - INFO - Processed URL: https://data.kb.se/dark-39292/part/1/page/3
2024-08-29 15:21:42,644 - INFO - Processing URL: https://data.kb.se/dark-41284/part/1/page/4
2024-08-29 15:21:42,861 - INFO - Extrac

Processed query 'Hedvig Eleonora' successfully.
Checkpoint saved: Year 1848, Half 0, Index 17


2024-08-29 15:21:44,670 - INFO - Search results received. Hits: 0
2024-08-29 15:21:44,670 - INFO - Extracted 0 URLs from search results
2024-08-29 15:21:44,671 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:21:44,672 - INFO - Starting fetch_newspaper_data for query: Trädgårdsföreningens lokal, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:21:44,869 - INFO - Search results received. Hits: 0
2024-08-29 15:21:44,870 - INFO - Extracted 0 URLs from search results
2024-08-29 15:21:44,870 - INFO - Data processing completed. Total rows saved: 0


Processed query 'Östermalms ka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 18
Processed query 'Trädgårdsföreningens lokal' successfully.


2024-08-29 15:21:44,873 - INFO - Starting fetch_newspaper_data for query: Tyska bryggeriets trädgård (Tullportsgatan 42 på Söder), dates: 1848-01-01 to 1848-06-30


Checkpoint saved: Year 1848, Half 0, Index 19


2024-08-29 15:21:45,198 - INFO - Search results received. Hits: 0
2024-08-29 15:21:45,199 - INFO - Extracted 0 URLs from search results
2024-08-29 15:21:45,200 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:21:45,203 - INFO - Starting fetch_newspaper_data for query: Tullportsgatan 42 på Söder, dates: 1848-01-01 to 1848-06-30


Processed query 'Tyska bryggeriets trädgård (Tullportsgatan 42 på Söder)' successfully.
Checkpoint saved: Year 1848, Half 0, Index 20


2024-08-29 15:21:45,408 - INFO - Search results received. Hits: 0
2024-08-29 15:21:45,409 - INFO - Extracted 0 URLs from search results
2024-08-29 15:21:45,409 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:21:45,412 - INFO - Starting fetch_newspaper_data for query: Stora mosebacke trädgård, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:21:45,559 - INFO - Search results received. Hits: 0
2024-08-29 15:21:45,560 - INFO - Extracted 0 URLs from search results
2024-08-29 15:21:45,560 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:21:45,563 - INFO - Starting fetch_newspaper_data for query: Kungl. Humlegården, dates: 1848-01-01 to 1848-06-30


Processed query 'Tullportsgatan 42 på Söder' successfully.
Checkpoint saved: Year 1848, Half 0, Index 21
Processed query 'Stora mosebacke trädgård' successfully.
Checkpoint saved: Year 1848, Half 0, Index 22


2024-08-29 15:21:45,721 - INFO - Search results received. Hits: 0
2024-08-29 15:21:45,722 - INFO - Extracted 0 URLs from search results
2024-08-29 15:21:45,722 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:21:45,725 - INFO - Starting fetch_newspaper_data for query: Tivoli vid Nortullsgatan, dates: 1848-01-01 to 1848-06-30


Processed query 'Kungl. Humlegården' successfully.
Checkpoint saved: Year 1848, Half 0, Index 23


2024-08-29 15:21:45,966 - INFO - Search results received. Hits: 0
2024-08-29 15:21:45,966 - INFO - Extracted 0 URLs from search results
2024-08-29 15:21:45,967 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:21:45,969 - INFO - Starting fetch_newspaper_data for query: Novilla på Kungl. Djurgården, dates: 1848-01-01 to 1848-06-30


Processed query 'Tivoli vid Nortullsgatan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 24


2024-08-29 15:21:46,240 - INFO - Search results received. Hits: 0
2024-08-29 15:21:46,242 - INFO - Extracted 0 URLs from search results
2024-08-29 15:21:46,242 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:21:46,243 - INFO - Starting fetch_newspaper_data for query: Kungl. Djurgårdsbrunns salong, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:21:46,440 - INFO - Search results received. Hits: 0
2024-08-29 15:21:46,442 - INFO - Extracted 0 URLs from search results
2024-08-29 15:21:46,442 - INFO - Data processing completed. Total rows saved: 0


Processed query 'Novilla på Kungl. Djurgården' successfully.
Checkpoint saved: Year 1848, Half 0, Index 25
Processed query 'Kungl. Djurgårdsbrunns salong' successfully.


2024-08-29 15:21:46,445 - INFO - Starting fetch_newspaper_data for query: Mindre teatern, dates: 1848-01-01 to 1848-06-30


Checkpoint saved: Year 1848, Half 0, Index 26


2024-08-29 15:21:47,352 - INFO - Search results received. Hits: 212
2024-08-29 15:21:47,353 - INFO - Extracted 212 URLs from search results
2024-08-29 15:21:47,354 - INFO - Processing URL: https://data.kb.se/dark-37684/part/1/page/2
2024-08-29 15:21:47,581 - INFO - Extracted XML URL for page 2
2024-08-29 15:21:47,582 - INFO - Extracted 1 XML URLs
2024-08-29 15:21:48,044 - INFO - Fetched XML content for 1 pages
2024-08-29 15:21:48,425 - INFO - Processed URL: https://data.kb.se/dark-37684/part/1/page/2
2024-08-29 15:21:48,425 - INFO - Processing URL: https://data.kb.se/dark-37750/part/1/page/1
2024-08-29 15:21:48,645 - INFO - Extracted XML URL for page 1
2024-08-29 15:21:48,646 - INFO - Extracted 1 XML URLs
2024-08-29 15:21:49,121 - INFO - Fetched XML content for 1 pages
2024-08-29 15:21:49,370 - INFO - Processed URL: https://data.kb.se/dark-37750/part/1/page/1
2024-08-29 15:21:49,370 - INFO - Processing URL: https://data.kb.se/dark-37761/part/1/page/1
2024-08-29 15:21:49,596 - INFO - Ex

Processed query 'Mindre teatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 27


2024-08-29 15:24:23,480 - INFO - Search results received. Hits: 0
2024-08-29 15:24:23,480 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:23,481 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:24:23,482 - INFO - Starting fetch_newspaper_data for query: Nedre Börssalen, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:24:23,678 - INFO - Search results received. Hits: 0
2024-08-29 15:24:23,679 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:23,679 - INFO - Data processing completed. Total rows saved: 0


Processed query 'Strömsborg' successfully.
Checkpoint saved: Year 1848, Half 0, Index 28
Processed query 'Nedre Börssalen' successfully.


2024-08-29 15:24:23,682 - INFO - Starting fetch_newspaper_data for query: Mosebacke, dates: 1848-01-01 to 1848-06-30


Checkpoint saved: Year 1848, Half 0, Index 29


2024-08-29 15:24:23,892 - INFO - Search results received. Hits: 3
2024-08-29 15:24:23,892 - INFO - Extracted 3 URLs from search results
2024-08-29 15:24:23,892 - INFO - Processing URL: https://data.kb.se/dark-41306/part/1/page/1
2024-08-29 15:24:24,015 - INFO - Extracted XML URL for page 1
2024-08-29 15:24:24,015 - INFO - Extracted 1 XML URLs
2024-08-29 15:24:24,438 - INFO - Fetched XML content for 1 pages
2024-08-29 15:24:24,668 - INFO - Processed URL: https://data.kb.se/dark-41306/part/1/page/1
2024-08-29 15:24:24,668 - INFO - Processing URL: https://data.kb.se/dark-41302/part/1/page/1
2024-08-29 15:24:24,922 - INFO - Extracted XML URL for page 1
2024-08-29 15:24:24,923 - INFO - Extracted 1 XML URLs
2024-08-29 15:24:25,298 - INFO - Fetched XML content for 1 pages
2024-08-29 15:24:25,529 - INFO - Processed URL: https://data.kb.se/dark-41302/part/1/page/1
2024-08-29 15:24:25,529 - INFO - Processing URL: https://data.kb.se/dark-41284/part/1/page/1
2024-08-29 15:24:25,652 - INFO - Extrac

Processed query 'Mosebacke' successfully.
Checkpoint saved: Year 1848, Half 0, Index 30


2024-08-29 15:24:26,638 - INFO - Search results received. Hits: 37
2024-08-29 15:24:26,638 - INFO - Extracted 37 URLs from search results
2024-08-29 15:24:26,638 - INFO - Processing URL: https://data.kb.se/dark-102254/part/1/page/1
2024-08-29 15:24:26,857 - INFO - Extracted XML URL for page 1
2024-08-29 15:24:26,858 - INFO - Extracted 1 XML URLs
2024-08-29 15:24:27,215 - INFO - Fetched XML content for 1 pages
2024-08-29 15:24:27,514 - INFO - Processed URL: https://data.kb.se/dark-102254/part/1/page/1
2024-08-29 15:24:27,514 - INFO - Processing URL: https://data.kb.se/dark-39563/part/1/page/1
2024-08-29 15:24:27,660 - INFO - Extracted XML URL for page 1
2024-08-29 15:24:27,660 - INFO - Extracted 1 XML URLs
2024-08-29 15:24:28,019 - INFO - Fetched XML content for 1 pages
2024-08-29 15:24:28,243 - INFO - Processed URL: https://data.kb.se/dark-39563/part/1/page/1
2024-08-29 15:24:28,243 - INFO - Processing URL: https://data.kb.se/dark-39514/part/1/page/1
2024-08-29 15:24:28,457 - INFO - Ex

Processed query 'Storkyrkan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 31


2024-08-29 15:24:51,484 - INFO - Search results received. Hits: 0
2024-08-29 15:24:51,485 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:51,486 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:24:51,488 - INFO - Starting fetch_newspaper_data for query: Södra teatern, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:24:51,597 - INFO - Search results received. Hits: 0
2024-08-29 15:24:51,598 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:51,598 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:24:51,601 - INFO - Starting fetch_newspaper_data for query: Värdshuset Johannshov, dates: 1848-01-01 to 1848-06-30


Processed query 'Pavillion du bazar på Norrbron' successfully.
Checkpoint saved: Year 1848, Half 0, Index 32
Processed query 'Södra teatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 33


2024-08-29 15:24:51,788 - INFO - Search results received. Hits: 0
2024-08-29 15:24:51,789 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:51,789 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:24:51,790 - INFO - Starting fetch_newspaper_data for query: Värdshuset Claës på Hörnet, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:24:51,969 - INFO - Search results received. Hits: 0
2024-08-29 15:24:51,971 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:51,972 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:24:51,974 - INFO - Starting fetch_newspaper_data for query: Vinterträdgården i Novilla, dates: 1848-01-01 to 1848-06-30


Processed query 'Värdshuset Johannshov' successfully.
Checkpoint saved: Year 1848, Half 0, Index 34
Processed query 'Värdshuset Claës på Hörnet' successfully.
Checkpoint saved: Year 1848, Half 0, Index 35


2024-08-29 15:24:52,147 - INFO - Search results received. Hits: 0
2024-08-29 15:24:52,148 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:52,148 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:24:52,150 - INFO - Starting fetch_newspaper_data for query: Stadssmedsgatan nr 9, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:24:52,326 - INFO - Search results received. Hits: 0
2024-08-29 15:24:52,327 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:52,327 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:24:52,328 - INFO - Starting fetch_newspaper_data for query: Beridarbansgatan nr 18, dates: 1848-01-01 to 1848-06-30


Processed query 'Vinterträdgården i Novilla' successfully.
Checkpoint saved: Year 1848, Half 0, Index 36
Processed query 'Stadssmedsgatan nr 9' successfully.
Checkpoint saved: Year 1848, Half 0, Index 37


2024-08-29 15:24:52,543 - INFO - Search results received. Hits: 1
2024-08-29 15:24:52,544 - INFO - Extracted 1 URLs from search results
2024-08-29 15:24:52,545 - INFO - Processing URL: https://data.kb.se/dark-39530/part/1/page/4
2024-08-29 15:24:52,684 - INFO - Extracted XML URL for page 4
2024-08-29 15:24:52,685 - INFO - Extracted 1 XML URLs
2024-08-29 15:24:53,003 - INFO - Fetched XML content for 1 pages
2024-08-29 15:24:53,165 - INFO - Processed URL: https://data.kb.se/dark-39530/part/1/page/4
2024-08-29 15:24:53,306 - INFO - Inserted final batch of 6 rows. Total rows inserted: 6
2024-08-29 15:24:53,307 - INFO - Data processing completed. Total rows saved: 6
2024-08-29 15:24:53,307 - INFO - Starting fetch_newspaper_data for query: Davidssons södra paviljong, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:24:53,480 - INFO - Search results received. Hits: 0
2024-08-29 15:24:53,480 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:53,481 - INFO - Data processing completed. 

Processed query 'Beridarbansgatan nr 18' successfully.
Checkpoint saved: Year 1848, Half 0, Index 38
Processed query 'Davidssons södra paviljong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 39


2024-08-29 15:24:53,723 - INFO - Search results received. Hits: 6
2024-08-29 15:24:53,723 - INFO - Extracted 6 URLs from search results
2024-08-29 15:24:53,724 - INFO - Processing URL: https://data.kb.se/dark-37784/part/1/page/1
2024-08-29 15:24:53,862 - INFO - Extracted XML URL for page 1
2024-08-29 15:24:53,862 - INFO - Extracted 1 XML URLs
2024-08-29 15:24:54,134 - INFO - Fetched XML content for 1 pages
2024-08-29 15:24:54,457 - INFO - Processed URL: https://data.kb.se/dark-37784/part/1/page/1
2024-08-29 15:24:54,458 - INFO - Processing URL: https://data.kb.se/dark-39594/part/1/page/3
2024-08-29 15:24:54,584 - INFO - Extracted XML URL for page 3
2024-08-29 15:24:54,584 - INFO - Extracted 1 XML URLs
2024-08-29 15:24:54,905 - INFO - Fetched XML content for 1 pages
2024-08-29 15:24:55,143 - INFO - Processed URL: https://data.kb.se/dark-39594/part/1/page/3
2024-08-29 15:24:55,144 - INFO - Processing URL: https://data.kb.se/dark-37761/part/1/page/1
2024-08-29 15:24:55,269 - INFO - Extrac

Processed query 'Kungliga musikaliska akademiens lokal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 40


2024-08-29 15:24:57,971 - INFO - Search results received. Hits: 0
2024-08-29 15:24:57,971 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:57,971 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:24:57,972 - INFO - Starting fetch_newspaper_data for query: Lilla Catharineberg, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:24:58,112 - INFO - Search results received. Hits: 0
2024-08-29 15:24:58,112 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:58,113 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:24:58,114 - INFO - Starting fetch_newspaper_data for query: Vid prins Gustafs byst vid Haga, dates: 1848-01-01 to 1848-06-30


Processed query 'Konsertsalongen på Kungliga Djurgården' successfully.
Checkpoint saved: Year 1848, Half 0, Index 41
Processed query 'Lilla Catharineberg' successfully.
Checkpoint saved: Year 1848, Half 0, Index 42


2024-08-29 15:24:58,350 - INFO - Search results received. Hits: 0
2024-08-29 15:24:58,351 - INFO - Extracted 0 URLs from search results
2024-08-29 15:24:58,351 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:24:58,352 - INFO - Starting fetch_newspaper_data for query: Waxholms kyrka, dates: 1848-01-01 to 1848-06-30


Processed query 'Vid prins Gustafs byst vid Haga' successfully.
Checkpoint saved: Year 1848, Half 0, Index 43


2024-08-29 15:24:58,587 - INFO - Search results received. Hits: 17
2024-08-29 15:24:58,588 - INFO - Extracted 17 URLs from search results
2024-08-29 15:24:58,588 - INFO - Processing URL: https://data.kb.se/dark-66947/part/1/page/4
2024-08-29 15:24:58,726 - INFO - Extracted XML URL for page 4
2024-08-29 15:24:58,727 - INFO - Extracted 1 XML URLs
2024-08-29 15:24:59,023 - INFO - Fetched XML content for 1 pages
2024-08-29 15:24:59,172 - INFO - Processed URL: https://data.kb.se/dark-66947/part/1/page/4
2024-08-29 15:24:59,173 - INFO - Processing URL: https://data.kb.se/dark-37729/part/1/page/4
2024-08-29 15:24:59,297 - INFO - Extracted XML URL for page 4
2024-08-29 15:24:59,297 - INFO - Extracted 1 XML URLs
2024-08-29 15:24:59,590 - INFO - Fetched XML content for 1 pages
2024-08-29 15:24:59,869 - INFO - Processed URL: https://data.kb.se/dark-37729/part/1/page/4
2024-08-29 15:24:59,870 - INFO - Processing URL: https://data.kb.se/dark-37767/part/1/page/3
2024-08-29 15:24:59,991 - INFO - Extr

Processed query 'Waxholms kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 44
Processed query 'Södertelje kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 45


2024-08-29 15:25:09,880 - INFO - Search results received. Hits: 2
2024-08-29 15:25:09,880 - INFO - Extracted 2 URLs from search results
2024-08-29 15:25:09,880 - INFO - Processing URL: https://data.kb.se/dark-37725/part/1/page/1
2024-08-29 15:25:10,013 - INFO - Extracted XML URL for page 1
2024-08-29 15:25:10,013 - INFO - Extracted 1 XML URLs
2024-08-29 15:25:10,260 - INFO - Fetched XML content for 1 pages
2024-08-29 15:25:10,466 - INFO - Processed URL: https://data.kb.se/dark-37725/part/1/page/1
2024-08-29 15:25:10,467 - INFO - Processing URL: https://data.kb.se/dark-41272/part/1/page/1
2024-08-29 15:25:10,589 - INFO - Extracted XML URL for page 1
2024-08-29 15:25:10,589 - INFO - Extracted 1 XML URLs
2024-08-29 15:25:10,857 - INFO - Fetched XML content for 1 pages
2024-08-29 15:25:10,983 - INFO - No matching content found for query 'Kungshatt' on page 1
2024-08-29 15:25:10,984 - INFO - Processed URL: https://data.kb.se/dark-41272/part/1/page/1
2024-08-29 15:25:11,029 - INFO - Inserted

Processed query 'Kungshatt' successfully.
Checkpoint saved: Year 1848, Half 0, Index 46
Processed query 'Djurgården' successfully.
Checkpoint saved: Year 1848, Half 0, Index 47


2024-08-29 15:25:11,382 - INFO - Search results received. Hits: 0
2024-08-29 15:25:11,382 - INFO - Extracted 0 URLs from search results
2024-08-29 15:25:11,383 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:25:11,384 - INFO - Starting fetch_newspaper_data for query: Hasselbacken, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:25:11,543 - INFO - Search results received. Hits: 0
2024-08-29 15:25:11,543 - INFO - Extracted 0 URLs from search results
2024-08-29 15:25:11,543 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:25:11,545 - INFO - Starting fetch_newspaper_data for query: Djurgårdsteatern, dates: 1848-01-01 to 1848-06-30


Processed query 'Vaxholm (nya salongen)' successfully.
Checkpoint saved: Year 1848, Half 0, Index 48
Processed query 'Hasselbacken' successfully.
Checkpoint saved: Year 1848, Half 0, Index 49


2024-08-29 15:25:11,728 - INFO - Search results received. Hits: 0
2024-08-29 15:25:11,728 - INFO - Extracted 0 URLs from search results
2024-08-29 15:25:11,728 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:25:11,729 - INFO - Starting fetch_newspaper_data for query: Katolska kapellet, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:25:11,877 - INFO - Search results received. Hits: 3
2024-08-29 15:25:11,877 - INFO - Extracted 3 URLs from search results
2024-08-29 15:25:11,877 - INFO - Processing URL: https://data.kb.se/dark-39263/part/1/page/1


Processed query 'Djurgårdsteatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 50


2024-08-29 15:25:12,027 - INFO - Extracted XML URL for page 1
2024-08-29 15:25:12,028 - INFO - Extracted 1 XML URLs
2024-08-29 15:25:12,282 - INFO - Fetched XML content for 1 pages
2024-08-29 15:25:12,505 - INFO - Processed URL: https://data.kb.se/dark-39263/part/1/page/1
2024-08-29 15:25:12,506 - INFO - Processing URL: https://data.kb.se/dark-39567/part/1/page/1
2024-08-29 15:25:12,630 - INFO - Extracted XML URL for page 1
2024-08-29 15:25:12,630 - INFO - Extracted 1 XML URLs
2024-08-29 15:25:12,889 - INFO - Fetched XML content for 1 pages
2024-08-29 15:25:13,123 - INFO - Processed URL: https://data.kb.se/dark-39567/part/1/page/1
2024-08-29 15:25:13,124 - INFO - Processing URL: https://data.kb.se/dark-37731/part/1/page/1
2024-08-29 15:25:13,252 - INFO - Extracted XML URL for page 1
2024-08-29 15:25:13,252 - INFO - Extracted 1 XML URLs
2024-08-29 15:25:13,511 - INFO - Fetched XML content for 1 pages
2024-08-29 15:25:13,664 - INFO - Processed URL: https://data.kb.se/dark-37731/part/1/pa

Processed query 'Katolska kapellet' successfully.
Checkpoint saved: Year 1848, Half 0, Index 51


2024-08-29 15:25:14,385 - INFO - Search results received. Hits: 147
2024-08-29 15:25:14,386 - INFO - Extracted 147 URLs from search results
2024-08-29 15:25:14,386 - INFO - Processing URL: https://data.kb.se/dark-41259/part/1/page/4
2024-08-29 15:25:14,508 - INFO - Extracted XML URL for page 4
2024-08-29 15:25:14,508 - INFO - Extracted 1 XML URLs
2024-08-29 15:25:14,787 - INFO - Fetched XML content for 1 pages
2024-08-29 15:25:15,095 - INFO - Processed URL: https://data.kb.se/dark-41259/part/1/page/4
2024-08-29 15:25:15,095 - INFO - Processing URL: https://data.kb.se/dark-39259/part/1/page/4
2024-08-29 15:25:15,221 - INFO - Extracted XML URL for page 4
2024-08-29 15:25:15,222 - INFO - Extracted 1 XML URLs
2024-08-29 15:25:15,516 - INFO - Fetched XML content for 1 pages
2024-08-29 15:25:15,730 - INFO - Processed URL: https://data.kb.se/dark-39259/part/1/page/4
2024-08-29 15:25:15,731 - INFO - Processing URL: https://data.kb.se/dark-39489/part/1/page/4
2024-08-29 15:25:15,858 - INFO - Ex

Processed query 'Drottninggatan 80' successfully.
Checkpoint saved: Year 1848, Half 0, Index 52


2024-08-29 15:26:59,801 - INFO - Search results received. Hits: 1
2024-08-29 15:26:59,801 - INFO - Extracted 1 URLs from search results
2024-08-29 15:26:59,801 - INFO - Processing URL: https://data.kb.se/dark-66947/part/1/page/1
2024-08-29 15:27:00,093 - INFO - Extracted XML URL for page 1
2024-08-29 15:27:00,093 - INFO - Extracted 1 XML URLs
2024-08-29 15:27:00,466 - INFO - Fetched XML content for 1 pages
2024-08-29 15:27:00,822 - INFO - Processed URL: https://data.kb.se/dark-66947/part/1/page/1
2024-08-29 15:27:00,866 - INFO - Inserted final batch of 12 rows. Total rows inserted: 12
2024-08-29 15:27:00,866 - INFO - Data processing completed. Total rows saved: 12
2024-08-29 15:27:00,867 - INFO - Starting fetch_newspaper_data for query: Hagströms konditori/schweizeri, Beridaregatan 18, dates: 1848-01-01 to 1848-06-30


Processed query 'Robert Kahns lokal Drottninggatan 5' successfully.
Checkpoint saved: Year 1848, Half 0, Index 53


2024-08-29 15:27:01,323 - INFO - Search results received. Hits: 0
2024-08-29 15:27:01,323 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:01,324 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:01,327 - INFO - Starting fetch_newspaper_data for query: Berzeli park, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:27:01,487 - INFO - Search results received. Hits: 0
2024-08-29 15:27:01,488 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:01,488 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:01,489 - INFO - Starting fetch_newspaper_data for query: Bährs lokal (Vid Riddarhustorget), dates: 1848-01-01 to 1848-06-30


Processed query 'Hagströms konditori/schweizeri, Beridaregatan 18' successfully.
Checkpoint saved: Year 1848, Half 0, Index 54
Processed query 'Berzeli park' successfully.
Checkpoint saved: Year 1848, Half 0, Index 55


2024-08-29 15:27:01,646 - INFO - Search results received. Hits: 0
2024-08-29 15:27:01,646 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:01,647 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:01,647 - INFO - Starting fetch_newspaper_data for query: Lilla Ingermarshof, dates: 1848-01-01 to 1848-06-30


Processed query 'Bährs lokal (Vid Riddarhustorget)' successfully.
Checkpoint saved: Year 1848, Half 0, Index 56


2024-08-29 15:27:01,857 - INFO - Search results received. Hits: 0
2024-08-29 15:27:01,881 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:01,881 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:01,882 - INFO - Starting fetch_newspaper_data for query: Tivoli å Kongl Djurgården, dates: 1848-01-01 to 1848-06-30


Processed query 'Lilla Ingermarshof' successfully.
Checkpoint saved: Year 1848, Half 0, Index 57


2024-08-29 15:27:02,160 - INFO - Search results received. Hits: 0
2024-08-29 15:27:02,161 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:02,161 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:02,162 - INFO - Starting fetch_newspaper_data for query: Kungl. Humlegårdens Rotunda, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:27:02,350 - INFO - Search results received. Hits: 0
2024-08-29 15:27:02,351 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:02,352 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:02,354 - INFO - Starting fetch_newspaper_data for query: Ladugårdslandsteaterns trädgård, dates: 1848-01-01 to 1848-06-30


Processed query 'Tivoli å Kongl Djurgården' successfully.
Checkpoint saved: Year 1848, Half 0, Index 58
Processed query 'Kungl. Humlegårdens Rotunda' successfully.
Checkpoint saved: Year 1848, Half 0, Index 59


2024-08-29 15:27:02,537 - INFO - Search results received. Hits: 0
2024-08-29 15:27:02,538 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:02,539 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:02,542 - INFO - Starting fetch_newspaper_data for query: Blå porten på kungl. Djurgården, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:27:02,715 - INFO - Search results received. Hits: 0
2024-08-29 15:27:02,716 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:02,717 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:02,719 - INFO - Starting fetch_newspaper_data for query: Schweizeriet på Mosebacke, dates: 1848-01-01 to 1848-06-30


Processed query 'Ladugårdslandsteaterns trädgård' successfully.
Checkpoint saved: Year 1848, Half 0, Index 60
Processed query 'Blå porten på kungl. Djurgården' successfully.
Checkpoint saved: Year 1848, Half 0, Index 61


2024-08-29 15:27:03,030 - INFO - Search results received. Hits: 0
2024-08-29 15:27:03,030 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:03,031 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:03,032 - INFO - Starting fetch_newspaper_data for query: Värdshuset Lübeck, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:27:03,177 - INFO - Search results received. Hits: 0
2024-08-29 15:27:03,178 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:03,179 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:03,180 - INFO - Starting fetch_newspaper_data for query: Davidssons norra paviljong, dates: 1848-01-01 to 1848-06-30


Processed query 'Schweizeriet på Mosebacke' successfully.
Checkpoint saved: Year 1848, Half 0, Index 62
Processed query 'Värdshuset Lübeck' successfully.
Checkpoint saved: Year 1848, Half 0, Index 63


2024-08-29 15:27:03,380 - INFO - Search results received. Hits: 2
2024-08-29 15:27:03,381 - INFO - Extracted 2 URLs from search results
2024-08-29 15:27:03,382 - INFO - Processing URL: https://data.kb.se/dark-41277/part/1/page/2
2024-08-29 15:27:03,518 - INFO - Extracted XML URL for page 2
2024-08-29 15:27:03,519 - INFO - Extracted 1 XML URLs
2024-08-29 15:27:03,901 - INFO - Fetched XML content for 1 pages
2024-08-29 15:27:04,189 - INFO - Processed URL: https://data.kb.se/dark-41277/part/1/page/2
2024-08-29 15:27:04,190 - INFO - Processing URL: https://data.kb.se/dark-39533/part/1/page/1
2024-08-29 15:27:04,322 - INFO - Extracted XML URL for page 1
2024-08-29 15:27:04,323 - INFO - Extracted 1 XML URLs
2024-08-29 15:27:04,597 - INFO - Fetched XML content for 1 pages
2024-08-29 15:27:04,849 - INFO - Processed URL: https://data.kb.se/dark-39533/part/1/page/1
2024-08-29 15:27:04,899 - INFO - Inserted final batch of 7 rows. Total rows inserted: 7
2024-08-29 15:27:04,899 - INFO - Data proces

Processed query 'Davidssons norra paviljong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 64
Processed query 'Källaren Nya Norrmalm vid Hötorget' successfully.
Checkpoint saved: Year 1848, Half 0, Index 65


2024-08-29 15:27:05,232 - INFO - Search results received. Hits: 0
2024-08-29 15:27:05,240 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:05,251 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:05,254 - INFO - Starting fetch_newspaper_data for query: Vinterträdgården, Valhalla, Mäster Samuelsgata 51, dates: 1848-01-01 to 1848-06-30


Processed query 'Café chantant' successfully.
Checkpoint saved: Year 1848, Half 0, Index 66


2024-08-29 15:27:05,520 - INFO - Search results received. Hits: 0
2024-08-29 15:27:05,521 - INFO - Extracted 0 URLs from search results
2024-08-29 15:27:05,522 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:27:05,524 - INFO - Starting fetch_newspaper_data for query: Valhalla, Mäster Samuelsgata 51, dates: 1848-01-01 to 1848-06-30


Processed query 'Vinterträdgården, Valhalla, Mäster Samuelsgata 51' successfully.
Checkpoint saved: Year 1848, Half 0, Index 67


2024-08-29 15:28:20,531 - INFO - Starting fetch_newspaper_data for query: Kungsholms kyrka, dates: 1848-01-01 to 1848-06-30


Error processing query 'Valhalla, Mäster Samuelsgata 51': HTTPSConnectionPool(host='data.kb.se', port=443): Max retries exceeded with url: /search?to=1848-06-30&from=1848-01-01&isPartOf.%40id=https%3A%2F%2Flibris.kb.se%2Fdwpgqn5q03ft91j%23it&q=Valhalla%252C%2BM%25C3%25A4ster%2BSamuelsgata%2B51&searchGranularity=part&limit=100000 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x163c13850>, 'Connection to data.kb.se timed out. (connect timeout=None)'))
Checkpoint saved: Year 1848, Half 0, Index 67


2024-08-29 15:28:20,866 - INFO - Search results received. Hits: 45
2024-08-29 15:28:20,867 - INFO - Extracted 45 URLs from search results
2024-08-29 15:28:20,867 - INFO - Processing URL: https://data.kb.se/dark-39563/part/1/page/1
2024-08-29 15:28:21,020 - INFO - Extracted XML URL for page 1
2024-08-29 15:28:21,020 - INFO - Extracted 1 XML URLs
2024-08-29 15:28:21,313 - INFO - Fetched XML content for 1 pages
2024-08-29 15:28:21,646 - INFO - Processed URL: https://data.kb.se/dark-39563/part/1/page/1
2024-08-29 15:28:21,646 - INFO - Processing URL: https://data.kb.se/dark-39537/part/1/page/1
2024-08-29 15:28:21,771 - INFO - Extracted XML URL for page 1
2024-08-29 15:28:21,771 - INFO - Extracted 1 XML URLs
2024-08-29 15:28:22,069 - INFO - Fetched XML content for 1 pages
2024-08-29 15:28:22,220 - INFO - Processed URL: https://data.kb.se/dark-39537/part/1/page/1
2024-08-29 15:28:22,221 - INFO - Processing URL: https://data.kb.se/dark-41298/part/1/page/3
2024-08-29 15:28:22,346 - INFO - Extr

Processed query 'Kungsholms kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 69


2024-08-29 15:28:50,264 - INFO - Search results received. Hits: 178
2024-08-29 15:28:50,265 - INFO - Extracted 178 URLs from search results
2024-08-29 15:28:50,266 - INFO - Processing URL: https://data.kb.se/dark-39512/part/1/page/4
2024-08-29 15:28:50,431 - INFO - Extracted XML URL for page 4
2024-08-29 15:28:50,431 - INFO - Extracted 1 XML URLs
2024-08-29 15:28:50,754 - INFO - Fetched XML content for 1 pages
2024-08-29 15:28:50,924 - INFO - Processed URL: https://data.kb.se/dark-39512/part/1/page/4
2024-08-29 15:28:50,924 - INFO - Processing URL: https://data.kb.se/dark-39492/part/1/page/4
2024-08-29 15:28:51,047 - INFO - Extracted XML URL for page 4
2024-08-29 15:28:51,047 - INFO - Extracted 1 XML URLs
2024-08-29 15:28:51,330 - INFO - Fetched XML content for 1 pages
2024-08-29 15:28:51,635 - INFO - Processed URL: https://data.kb.se/dark-39492/part/1/page/4
2024-08-29 15:28:51,636 - INFO - Processing URL: https://data.kb.se/dark-41298/part/1/page/4
2024-08-29 15:28:51,763 - INFO - Ex

Processed query 'Regeringsgatan 50' successfully.
Checkpoint saved: Year 1848, Half 0, Index 70


2024-08-29 15:31:00,184 - INFO - Search results received. Hits: 97
2024-08-29 15:31:00,185 - INFO - Extracted 97 URLs from search results
2024-08-29 15:31:00,186 - INFO - Processing URL: https://data.kb.se/dark-39545/part/1/page/1
2024-08-29 15:31:00,325 - INFO - Extracted XML URL for page 1
2024-08-29 15:31:00,325 - INFO - Extracted 1 XML URLs
2024-08-29 15:31:00,593 - INFO - Fetched XML content for 1 pages
2024-08-29 15:31:00,947 - INFO - Processed URL: https://data.kb.se/dark-39545/part/1/page/1
2024-08-29 15:31:00,947 - INFO - Processing URL: https://data.kb.se/dark-39520/part/1/page/1
2024-08-29 15:31:01,088 - INFO - Extracted XML URL for page 1
2024-08-29 15:31:01,089 - INFO - Extracted 1 XML URLs
2024-08-29 15:31:01,366 - INFO - Fetched XML content for 1 pages
2024-08-29 15:31:01,585 - INFO - Processed URL: https://data.kb.se/dark-39520/part/1/page/1
2024-08-29 15:31:01,585 - INFO - Processing URL: https://data.kb.se/dark-37762/part/1/page/4
2024-08-29 15:31:01,716 - INFO - Extr

Processed query 'Adolf Fredriks kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 71
Processed query 'Nya teatern (Hammers salong)' successfully.
Checkpoint saved: Year 1848, Half 0, Index 72


2024-08-29 15:32:10,082 - INFO - Search results received. Hits: 14
2024-08-29 15:32:10,083 - INFO - Extracted 14 URLs from search results
2024-08-29 15:32:10,083 - INFO - Processing URL: https://data.kb.se/dark-37762/part/1/page/4
2024-08-29 15:32:10,315 - INFO - Extracted XML URL for page 4
2024-08-29 15:32:10,316 - INFO - Extracted 1 XML URLs
2024-08-29 15:32:10,608 - INFO - Fetched XML content for 1 pages
2024-08-29 15:32:10,786 - INFO - Processed URL: https://data.kb.se/dark-37762/part/1/page/4
2024-08-29 15:32:10,786 - INFO - Processing URL: https://data.kb.se/dark-102245/part/1/page/1
2024-08-29 15:32:10,913 - INFO - Extracted XML URL for page 1
2024-08-29 15:32:10,913 - INFO - Extracted 1 XML URLs
2024-08-29 15:32:11,183 - INFO - Fetched XML content for 1 pages
2024-08-29 15:32:11,487 - INFO - Processed URL: https://data.kb.se/dark-102245/part/1/page/1
2024-08-29 15:32:11,487 - INFO - Processing URL: https://data.kb.se/dark-41316/part/1/page/2
2024-08-29 15:32:11,637 - INFO - Ex

Processed query 'Jakobs kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 73


2024-08-29 15:32:19,894 - INFO - Search results received. Hits: 14
2024-08-29 15:32:19,894 - INFO - Extracted 14 URLs from search results
2024-08-29 15:32:19,895 - INFO - Processing URL: https://data.kb.se/dark-39520/part/1/page/1
2024-08-29 15:32:20,026 - INFO - Extracted XML URL for page 1
2024-08-29 15:32:20,026 - INFO - Extracted 1 XML URLs
2024-08-29 15:32:20,301 - INFO - Fetched XML content for 1 pages
2024-08-29 15:32:20,565 - INFO - Processed URL: https://data.kb.se/dark-39520/part/1/page/1
2024-08-29 15:32:20,565 - INFO - Processing URL: https://data.kb.se/dark-39537/part/1/page/1
2024-08-29 15:32:20,696 - INFO - Extracted XML URL for page 1
2024-08-29 15:32:20,696 - INFO - Extracted 1 XML URLs
2024-08-29 15:32:21,005 - INFO - Fetched XML content for 1 pages
2024-08-29 15:32:21,189 - INFO - Processed URL: https://data.kb.se/dark-39537/part/1/page/1
2024-08-29 15:32:21,190 - INFO - Processing URL: https://data.kb.se/dark-39268/part/1/page/1
2024-08-29 15:32:21,314 - INFO - Extr

Processed query 'Berns salong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 74


2024-08-29 15:32:29,809 - INFO - Search results received. Hits: 151
2024-08-29 15:32:29,810 - INFO - Extracted 151 URLs from search results
2024-08-29 15:32:29,811 - INFO - Processing URL: https://data.kb.se/dark-39514/part/1/page/1
2024-08-29 15:32:29,946 - INFO - Extracted XML URL for page 1
2024-08-29 15:32:29,947 - INFO - Extracted 1 XML URLs
2024-08-29 15:32:30,219 - INFO - Fetched XML content for 1 pages
2024-08-29 15:32:30,518 - INFO - Processed URL: https://data.kb.se/dark-39514/part/1/page/1
2024-08-29 15:32:30,518 - INFO - Processing URL: https://data.kb.se/dark-39520/part/1/page/1
2024-08-29 15:32:30,648 - INFO - Extracted XML URL for page 1
2024-08-29 15:32:30,649 - INFO - Extracted 1 XML URLs
2024-08-29 15:32:30,913 - INFO - Fetched XML content for 1 pages
2024-08-29 15:32:31,112 - INFO - Processed URL: https://data.kb.se/dark-39520/part/1/page/1
2024-08-29 15:32:31,112 - INFO - Processing URL: https://data.kb.se/dark-37785/part/1/page/4
2024-08-29 15:32:31,233 - INFO - Ex

Processed query 'Tyska kyrkan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 75


2024-08-29 15:34:17,672 - INFO - Search results received. Hits: 0
2024-08-29 15:34:17,673 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:17,673 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:17,674 - INFO - Starting fetch_newspaper_data for query: Slöjdskolans stora Hörsal, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:34:17,854 - INFO - Search results received. Hits: 0
2024-08-29 15:34:17,854 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:17,855 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:17,856 - INFO - Starting fetch_newspaper_data for query: Valhalla, Mäster Samuelsgata 51, dates: 1848-01-01 to 1848-06-30


Processed query 'Manegen å K. Djurgården' successfully.
Checkpoint saved: Year 1848, Half 0, Index 76
Processed query 'Slöjdskolans stora Hörsal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 77


2024-08-29 15:34:18,164 - INFO - Search results received. Hits: 0
2024-08-29 15:34:18,165 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:18,165 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:18,167 - INFO - Starting fetch_newspaper_data for query: Strömparterren, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:34:18,297 - INFO - Search results received. Hits: 0
2024-08-29 15:34:18,298 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:18,298 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:18,300 - INFO - Starting fetch_newspaper_data for query: Odeón-theatern, Regeringsgatan nr 28, dates: 1848-01-01 to 1848-06-30


Processed query 'Valhalla, Mäster Samuelsgata 51' successfully.
Checkpoint saved: Year 1848, Half 0, Index 78
Processed query 'Strömparterren' successfully.
Checkpoint saved: Year 1848, Half 0, Index 79


2024-08-29 15:34:18,612 - INFO - Search results received. Hits: 0
2024-08-29 15:34:18,613 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:18,613 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:18,615 - INFO - Starting fetch_newspaper_data for query: Katarina elementärskolas lokal, dates: 1848-01-01 to 1848-06-30


Processed query 'Odeón-theatern, Regeringsgatan nr 28' successfully.
Checkpoint saved: Year 1848, Half 0, Index 80


2024-08-29 15:34:18,870 - INFO - Search results received. Hits: 0
2024-08-29 15:34:18,870 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:18,871 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:18,873 - INFO - Starting fetch_newspaper_data for query: Piperska Muren, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:34:19,058 - INFO - Search results received. Hits: 0
2024-08-29 15:34:19,059 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:19,059 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:19,060 - INFO - Starting fetch_newspaper_data for query: Frimurarebarnhuset vid Christineberg, dates: 1848-01-01 to 1848-06-30


Processed query 'Katarina elementärskolas lokal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 81
Processed query 'Piperska Muren' successfully.
Checkpoint saved: Year 1848, Half 0, Index 82


2024-08-29 15:34:19,335 - INFO - Search results received. Hits: 0
2024-08-29 15:34:19,336 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:19,337 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:19,338 - INFO - Starting fetch_newspaper_data for query: Dalarö kyrka, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:34:19,522 - INFO - Search results received. Hits: 0
2024-08-29 15:34:19,523 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:19,524 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:19,526 - INFO - Starting fetch_newspaper_data for query: Tomteboda, strax bortom Carlberg, dates: 1848-01-01 to 1848-06-30


Processed query 'Frimurarebarnhuset vid Christineberg' successfully.
Checkpoint saved: Year 1848, Half 0, Index 83
Processed query 'Dalarö kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 84


2024-08-29 15:34:19,861 - INFO - Search results received. Hits: 0
2024-08-29 15:34:19,862 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:19,862 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:19,865 - INFO - Starting fetch_newspaper_data for query: Vetenskapsakademiens hörsal, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:34:20,044 - INFO - Search results received. Hits: 0
2024-08-29 15:34:20,045 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:20,046 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:20,047 - INFO - Starting fetch_newspaper_data for query: Södra salongen, dates: 1848-01-01 to 1848-06-30


Processed query 'Tomteboda, strax bortom Carlberg' successfully.
Checkpoint saved: Year 1848, Half 0, Index 85
Processed query 'Vetenskapsakademiens hörsal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 86


2024-08-29 15:34:20,169 - INFO - Search results received. Hits: 0
2024-08-29 15:34:20,170 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:20,170 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:20,171 - INFO - Starting fetch_newspaper_data for query: Alhambra Variteté-teatern, dates: 1848-01-01 to 1848-06-30


Processed query 'Södra salongen' successfully.
Checkpoint saved: Year 1848, Half 0, Index 87


2024-08-29 15:34:20,374 - INFO - Search results received. Hits: 0
2024-08-29 15:34:20,375 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:20,376 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:20,378 - INFO - Starting fetch_newspaper_data for query: Katarina kyrka, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:34:20,570 - INFO - Search results received. Hits: 0
2024-08-29 15:34:20,571 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:20,572 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:20,574 - INFO - Starting fetch_newspaper_data for query: Orangeriet på Ulriksdals slott, dates: 1848-01-01 to 1848-06-30


Processed query 'Alhambra Variteté-teatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 88
Processed query 'Katarina kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 89


2024-08-29 15:34:20,898 - INFO - Search results received. Hits: 0
2024-08-29 15:34:20,898 - INFO - Extracted 0 URLs from search results
2024-08-29 15:34:20,899 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:34:20,901 - INFO - Starting fetch_newspaper_data for query: Maria Kyrka, dates: 1848-01-01 to 1848-06-30


Processed query 'Orangeriet på Ulriksdals slott' successfully.
Checkpoint saved: Year 1848, Half 0, Index 90


2024-08-29 15:34:21,489 - INFO - Search results received. Hits: 103
2024-08-29 15:34:21,490 - INFO - Extracted 103 URLs from search results
2024-08-29 15:34:21,490 - INFO - Processing URL: https://data.kb.se/dark-39563/part/1/page/1
2024-08-29 15:34:21,719 - INFO - Extracted XML URL for page 1
2024-08-29 15:34:21,721 - INFO - Extracted 1 XML URLs
2024-08-29 15:34:22,194 - INFO - Fetched XML content for 1 pages
2024-08-29 15:34:22,459 - INFO - Processed URL: https://data.kb.se/dark-39563/part/1/page/1
2024-08-29 15:34:22,459 - INFO - Processing URL: https://data.kb.se/dark-39289/part/1/page/4
2024-08-29 15:34:23,748 - INFO - Extracted XML URL for page 4
2024-08-29 15:34:23,749 - INFO - Extracted 1 XML URLs
2024-08-29 15:34:24,105 - INFO - Fetched XML content for 1 pages
2024-08-29 15:34:24,955 - INFO - Processed URL: https://data.kb.se/dark-39289/part/1/page/4
2024-08-29 15:34:24,955 - INFO - Processing URL: https://data.kb.se/dark-37760/part/1/page/4
2024-08-29 15:34:25,319 - INFO - Ex

Processed query 'Maria Kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 91
Processed query 'Hotell Phoenix' successfully.
Checkpoint saved: Year 1848, Half 0, Index 92


2024-08-29 15:35:37,979 - INFO - Search results received. Hits: 31
2024-08-29 15:35:37,980 - INFO - Extracted 31 URLs from search results
2024-08-29 15:35:37,981 - INFO - Processing URL: https://data.kb.se/dark-37761/part/1/page/1
2024-08-29 15:35:38,119 - INFO - Extracted XML URL for page 1
2024-08-29 15:35:38,120 - INFO - Extracted 1 XML URLs
2024-08-29 15:35:38,387 - INFO - Fetched XML content for 1 pages
2024-08-29 15:35:38,672 - INFO - Processed URL: https://data.kb.se/dark-37761/part/1/page/1
2024-08-29 15:35:38,673 - INFO - Processing URL: https://data.kb.se/dark-39518/part/1/page/3
2024-08-29 15:35:38,817 - INFO - Extracted XML URL for page 3
2024-08-29 15:35:38,817 - INFO - Extracted 1 XML URLs
2024-08-29 15:35:39,099 - INFO - Fetched XML content for 1 pages
2024-08-29 15:35:39,292 - INFO - Processed URL: https://data.kb.se/dark-39518/part/1/page/3
2024-08-29 15:35:39,292 - INFO - Processing URL: https://data.kb.se/dark-39260/part/1/page/2
2024-08-29 15:35:39,431 - INFO - Extr

Processed query 'Dramatiska teatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 93
Processed query 'Blanchs café' successfully.
Checkpoint saved: Year 1848, Half 0, Index 94


2024-08-29 15:36:03,049 - INFO - Search results received. Hits: 0
2024-08-29 15:36:03,049 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:03,050 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:03,051 - INFO - Starting fetch_newspaper_data for query: St Paulskapellet vid Adolf Fredriks torg, dates: 1848-01-01 to 1848-06-30


Processed query 'Bethelkapellet, Malmskillnadsgatan 48' successfully.
Checkpoint saved: Year 1848, Half 0, Index 95


2024-08-29 15:36:03,287 - INFO - Search results received. Hits: 0
2024-08-29 15:36:03,288 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:03,288 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:03,289 - INFO - Starting fetch_newspaper_data for query: Katolska kyrkan, dates: 1848-01-01 to 1848-06-30


Processed query 'St Paulskapellet vid Adolf Fredriks torg' successfully.
Checkpoint saved: Year 1848, Half 0, Index 96


2024-08-29 15:36:03,540 - INFO - Search results received. Hits: 26
2024-08-29 15:36:03,541 - INFO - Extracted 26 URLs from search results
2024-08-29 15:36:03,541 - INFO - Processing URL: https://data.kb.se/dark-37767/part/1/page/3
2024-08-29 15:36:03,676 - INFO - Extracted XML URL for page 3
2024-08-29 15:36:03,677 - INFO - Extracted 1 XML URLs
2024-08-29 15:36:03,974 - INFO - Fetched XML content for 1 pages
2024-08-29 15:36:04,431 - INFO - Processed URL: https://data.kb.se/dark-37767/part/1/page/3
2024-08-29 15:36:04,431 - INFO - Processing URL: https://data.kb.se/dark-37736/part/1/page/3
2024-08-29 15:36:04,562 - INFO - Extracted XML URL for page 3
2024-08-29 15:36:04,563 - INFO - Extracted 1 XML URLs
2024-08-29 15:36:04,845 - INFO - Fetched XML content for 1 pages
2024-08-29 15:36:05,052 - INFO - Processed URL: https://data.kb.se/dark-37736/part/1/page/3
2024-08-29 15:36:05,052 - INFO - Processing URL: https://data.kb.se/dark-37747/part/1/page/2
2024-08-29 15:36:05,187 - INFO - Extr

Processed query 'Katolska kyrkan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 97
Processed query 'Immanuelskyrkan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 98


2024-08-29 15:36:21,670 - INFO - Search results received. Hits: 13
2024-08-29 15:36:21,672 - INFO - Extracted 13 URLs from search results
2024-08-29 15:36:21,672 - INFO - Processing URL: https://data.kb.se/dark-39520/part/1/page/1
2024-08-29 15:36:21,827 - INFO - Extracted XML URL for page 1
2024-08-29 15:36:21,829 - INFO - Extracted 1 XML URLs
2024-08-29 15:36:22,105 - INFO - Fetched XML content for 1 pages
2024-08-29 15:36:22,290 - INFO - Processed URL: https://data.kb.se/dark-39520/part/1/page/1
2024-08-29 15:36:22,290 - INFO - Processing URL: https://data.kb.se/dark-39537/part/1/page/1
2024-08-29 15:36:22,428 - INFO - Extracted XML URL for page 1
2024-08-29 15:36:22,428 - INFO - Extracted 1 XML URLs
2024-08-29 15:36:22,719 - INFO - Fetched XML content for 1 pages
2024-08-29 15:36:23,032 - INFO - Processed URL: https://data.kb.se/dark-39537/part/1/page/1
2024-08-29 15:36:23,033 - INFO - Processing URL: https://data.kb.se/dark-102252/part/1/page/4
2024-08-29 15:36:23,152 - INFO - Ext

Processed query 'Berns nya salong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 99
Processed query 'Hôtel W6 stora salong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 100


2024-08-29 15:36:30,806 - INFO - Search results received. Hits: 0
2024-08-29 15:36:30,807 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:30,807 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:30,809 - INFO - Starting fetch_newspaper_data for query: Sveasalen, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:36:30,964 - INFO - Search results received. Hits: 0
2024-08-29 15:36:30,965 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:30,965 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:30,967 - INFO - Starting fetch_newspaper_data for query: Lutherska Missionshuset, Högbergsgatan 27, dates: 1848-01-01 to 1848-06-30


Processed query 'Vasateatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 101
Processed query 'Sveasalen' successfully.
Checkpoint saved: Year 1848, Half 0, Index 102


2024-08-29 15:36:31,191 - INFO - Search results received. Hits: 0
2024-08-29 15:36:31,191 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:31,192 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:31,194 - INFO - Starting fetch_newspaper_data for query: Östermalms kyrka, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:36:31,332 - INFO - Search results received. Hits: 0
2024-08-29 15:36:31,333 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:31,334 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:31,336 - INFO - Starting fetch_newspaper_data for query: Florakyrkan, dates: 1848-01-01 to 1848-06-30


Processed query 'Lutherska Missionshuset, Högbergsgatan 27' successfully.
Checkpoint saved: Year 1848, Half 0, Index 103
Processed query 'Östermalms kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 104


2024-08-29 15:36:31,538 - INFO - Search results received. Hits: 0
2024-08-29 15:36:31,539 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:31,540 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:31,542 - INFO - Starting fetch_newspaper_data for query: Sällskapet E. W:s stora sal, dates: 1848-01-01 to 1848-06-30


Processed query 'Florakyrkan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 105


2024-08-29 15:36:31,803 - INFO - Search results received. Hits: 0
2024-08-29 15:36:31,803 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:31,804 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:31,806 - INFO - Starting fetch_newspaper_data for query: E. W:s Teater, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:36:31,998 - INFO - Search results received. Hits: 0
2024-08-29 15:36:32,000 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:32,000 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:32,003 - INFO - Starting fetch_newspaper_data for query: Stockholms börs, dates: 1848-01-01 to 1848-06-30


Processed query 'Sällskapet E. W:s stora sal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 106
Processed query 'E. W:s Teater' successfully.
Checkpoint saved: Year 1848, Half 0, Index 107


2024-08-29 15:36:32,122 - INFO - Search results received. Hits: 0
2024-08-29 15:36:32,122 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:32,123 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:32,125 - INFO - Starting fetch_newspaper_data for query: Typografiska föreningens lokal, dates: 1848-01-01 to 1848-06-30


Processed query 'Stockholms börs' successfully.
Checkpoint saved: Year 1848, Half 0, Index 108


2024-08-29 15:36:32,349 - INFO - Search results received. Hits: 0
2024-08-29 15:36:32,349 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:32,350 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:32,352 - INFO - Starting fetch_newspaper_data for query: Sällskapet W.6 stora sal, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:36:32,533 - INFO - Search results received. Hits: 0
2024-08-29 15:36:32,534 - INFO - Extracted 0 URLs from search results
2024-08-29 15:36:32,534 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:36:32,536 - INFO - Starting fetch_newspaper_data for query: Stockholms Tivoli, dates: 1848-01-01 to 1848-06-30


Processed query 'Typografiska föreningens lokal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 109
Processed query 'Sällskapet W.6 stora sal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 110


2024-08-29 15:36:32,808 - INFO - Search results received. Hits: 8
2024-08-29 15:36:32,809 - INFO - Extracted 8 URLs from search results
2024-08-29 15:36:32,810 - INFO - Processing URL: https://data.kb.se/dark-102250/part/1/page/2
2024-08-29 15:36:32,951 - INFO - Extracted XML URL for page 2
2024-08-29 15:36:32,951 - INFO - Extracted 1 XML URLs
2024-08-29 15:36:33,210 - INFO - Fetched XML content for 1 pages
2024-08-29 15:36:33,522 - INFO - Processed URL: https://data.kb.se/dark-102250/part/1/page/2
2024-08-29 15:36:33,522 - INFO - Processing URL: https://data.kb.se/dark-39260/part/1/page/4
2024-08-29 15:36:33,644 - INFO - Extracted XML URL for page 4
2024-08-29 15:36:33,645 - INFO - Extracted 1 XML URLs
2024-08-29 15:36:33,915 - INFO - Fetched XML content for 1 pages
2024-08-29 15:36:34,061 - INFO - Processed URL: https://data.kb.se/dark-39260/part/1/page/4
2024-08-29 15:36:34,061 - INFO - Processing URL: https://data.kb.se/dark-39300/part/1/page/4
2024-08-29 15:36:34,184 - INFO - Extr

Processed query 'Stockholms Tivoli' successfully.
Checkpoint saved: Year 1848, Half 0, Index 111


2024-08-29 15:36:38,828 - INFO - Search results received. Hits: 124
2024-08-29 15:36:38,829 - INFO - Extracted 124 URLs from search results
2024-08-29 15:36:38,830 - INFO - Processing URL: https://data.kb.se/dark-39563/part/1/page/1
2024-08-29 15:36:38,971 - INFO - Extracted XML URL for page 1
2024-08-29 15:36:38,972 - INFO - Extracted 1 XML URLs
2024-08-29 15:36:39,305 - INFO - Fetched XML content for 1 pages
2024-08-29 15:36:39,544 - INFO - Processed URL: https://data.kb.se/dark-39563/part/1/page/1
2024-08-29 15:36:39,545 - INFO - Processing URL: https://data.kb.se/dark-39278/part/1/page/1
2024-08-29 15:36:39,666 - INFO - Extracted XML URL for page 1
2024-08-29 15:36:39,667 - INFO - Extracted 1 XML URLs
2024-08-29 15:36:39,932 - INFO - Fetched XML content for 1 pages
2024-08-29 15:36:40,256 - INFO - Processed URL: https://data.kb.se/dark-39278/part/1/page/1
2024-08-29 15:36:40,257 - INFO - Processing URL: https://data.kb.se/dark-39512/part/1/page/4
2024-08-29 15:36:40,380 - INFO - Ex

Processed query 'Clara kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 112


2024-08-29 15:38:06,637 - INFO - Search results received. Hits: 0
2024-08-29 15:38:06,637 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:06,637 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:06,639 - INFO - Starting fetch_newspaper_data for query: Ebenezerkapellet, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:06,810 - INFO - Search results received. Hits: 0
2024-08-29 15:38:06,810 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:06,810 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:06,812 - INFO - Starting fetch_newspaper_data for query: Salemkapellet, Folkungagatan 14, dates: 1848-01-01 to 1848-06-30


Processed query 'Vaxholms socitetssal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 113
Processed query 'Ebenezerkapellet' successfully.
Checkpoint saved: Year 1848, Half 0, Index 114


2024-08-29 15:38:07,124 - INFO - Search results received. Hits: 0
2024-08-29 15:38:07,125 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:07,126 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:07,129 - INFO - Starting fetch_newspaper_data for query: Riddarholmskyrkan, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:07,291 - INFO - Search results received. Hits: 3
2024-08-29 15:38:07,292 - INFO - Extracted 3 URLs from search results
2024-08-29 15:38:07,293 - INFO - Processing URL: https://data.kb.se/dark-41288/part/1/page/3


Processed query 'Salemkapellet, Folkungagatan 14' successfully.
Checkpoint saved: Year 1848, Half 0, Index 115


2024-08-29 15:38:07,477 - INFO - Extracted XML URL for page 3
2024-08-29 15:38:07,477 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:07,821 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:08,105 - INFO - Processed URL: https://data.kb.se/dark-41288/part/1/page/3
2024-08-29 15:38:08,105 - INFO - Processing URL: https://data.kb.se/dark-37726/part/1/page/3
2024-08-29 15:38:08,238 - INFO - Extracted XML URL for page 3
2024-08-29 15:38:08,239 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:08,552 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:08,780 - INFO - Processed URL: https://data.kb.se/dark-37726/part/1/page/3
2024-08-29 15:38:08,780 - INFO - Processing URL: https://data.kb.se/dark-66947/part/1/page/2
2024-08-29 15:38:08,902 - INFO - Extracted XML URL for page 2
2024-08-29 15:38:08,902 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:09,156 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:09,384 - INFO - No matching content found for query 'Riddarholmskyrkan

Processed query 'Riddarholmskyrkan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 116
Processed query 'Praktiska skolans stora hörsal i Brunkebergs hotell' successfully.
Checkpoint saved: Year 1848, Half 0, Index 117


2024-08-29 15:38:09,852 - INFO - Search results received. Hits: 0
2024-08-29 15:38:09,854 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:09,854 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:09,857 - INFO - Starting fetch_newspaper_data for query: Fru Thoréns Kafé, Gamla Kungsholmsgatan 36, dates: 1848-01-01 to 1848-06-30


Processed query 'Bergsunds Arbetarepaviljong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 118


2024-08-29 15:38:10,097 - INFO - Search results received. Hits: 0
2024-08-29 15:38:10,097 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:10,098 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:10,100 - INFO - Starting fetch_newspaper_data for query: Godtemplarsalen, Kronobergsgatan 11, dates: 1848-01-01 to 1848-06-30


Processed query 'Fru Thoréns Kafé, Gamla Kungsholmsgatan 36' successfully.
Checkpoint saved: Year 1848, Half 0, Index 119


2024-08-29 15:38:10,368 - INFO - Search results received. Hits: 0
2024-08-29 15:38:10,369 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:10,369 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:10,370 - INFO - Starting fetch_newspaper_data for query: Folkteatern, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:10,567 - INFO - Search results received. Hits: 0
2024-08-29 15:38:10,568 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:10,568 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:10,570 - INFO - Starting fetch_newspaper_data for query: Bijou-teatern, dates: 1848-01-01 to 1848-06-30


Processed query 'Godtemplarsalen, Kronobergsgatan 11' successfully.
Checkpoint saved: Year 1848, Half 0, Index 120
Processed query 'Folkteatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 121


2024-08-29 15:38:10,726 - INFO - Search results received. Hits: 0
2024-08-29 15:38:10,727 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:10,728 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:10,730 - INFO - Starting fetch_newspaper_data for query: Östermalmsteatern, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:10,854 - INFO - Search results received. Hits: 0
2024-08-29 15:38:10,855 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:10,855 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:10,857 - INFO - Starting fetch_newspaper_data for query: Ladugårdsteatern, dates: 1848-01-01 to 1848-06-30


Processed query 'Bijou-teatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 122
Processed query 'Östermalmsteatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 123


2024-08-29 15:38:11,014 - INFO - Search results received. Hits: 0
2024-08-29 15:38:11,015 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:11,015 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:11,017 - INFO - Starting fetch_newspaper_data for query: Arbetarepaviljongen i Bergsund, dates: 1848-01-01 to 1848-06-30


Processed query 'Ladugårdsteatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 124


2024-08-29 15:38:11,238 - INFO - Search results received. Hits: 0
2024-08-29 15:38:11,239 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:11,240 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:11,242 - INFO - Starting fetch_newspaper_data for query: Lilla börssalen, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:11,419 - INFO - Search results received. Hits: 0
2024-08-29 15:38:11,420 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:11,420 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:11,422 - INFO - Starting fetch_newspaper_data for query: Murarfackföreningens Nya lokal, Tunnelgatan 12, 2tr, dates: 1848-01-01 to 1848-06-30


Processed query 'Arbetarepaviljongen i Bergsund' successfully.
Checkpoint saved: Year 1848, Half 0, Index 125
Processed query 'Lilla börssalen' successfully.
Checkpoint saved: Year 1848, Half 0, Index 126


2024-08-29 15:38:11,707 - INFO - Search results received. Hits: 0
2024-08-29 15:38:11,708 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:11,708 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:11,711 - INFO - Starting fetch_newspaper_data for query: Kungssalen Brunkebergs Hotell, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:11,908 - INFO - Search results received. Hits: 0
2024-08-29 15:38:11,909 - INFO - Extracted 0 URLs from search results


Processed query 'Murarfackföreningens Nya lokal, Tunnelgatan 12, 2tr' successfully.
Checkpoint saved: Year 1848, Half 0, Index 127


2024-08-29 15:38:11,910 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:11,913 - INFO - Starting fetch_newspaper_data for query: Urvädersgränd 11, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:12,052 - INFO - Search results received. Hits: 0
2024-08-29 15:38:12,053 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:12,054 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:12,056 - INFO - Starting fetch_newspaper_data for query: Brunkebergstorg 2, [Dybeckska huset], dates: 1848-01-01 to 1848-06-30


Processed query 'Kungssalen Brunkebergs Hotell' successfully.
Checkpoint saved: Year 1848, Half 0, Index 128
Processed query 'Urvädersgränd 11' successfully.
Checkpoint saved: Year 1848, Half 0, Index 129


2024-08-29 15:38:12,235 - INFO - Search results received. Hits: 0
2024-08-29 15:38:12,236 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:12,237 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:12,240 - INFO - Starting fetch_newspaper_data for query: Frimurarelogens restaurant, dates: 1848-01-01 to 1848-06-30


Processed query 'Brunkebergstorg 2, [Dybeckska huset]' successfully.
Checkpoint saved: Year 1848, Half 0, Index 130


2024-08-29 15:38:12,463 - INFO - Search results received. Hits: 0
2024-08-29 15:38:12,464 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:12,464 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:12,468 - INFO - Starting fetch_newspaper_data for query: Brännkyrkagatan 42, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:12,613 - INFO - Search results received. Hits: 0
2024-08-29 15:38:12,614 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:12,614 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:12,616 - INFO - Starting fetch_newspaper_data for query: Folies Bergères, dates: 1848-01-01 to 1848-06-30


Processed query 'Frimurarelogens restaurant' successfully.
Checkpoint saved: Year 1848, Half 0, Index 131
Processed query 'Brännkyrkagatan 42' successfully.
Checkpoint saved: Year 1848, Half 0, Index 132


2024-08-29 15:38:12,779 - INFO - Search results received. Hits: 0
2024-08-29 15:38:12,780 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:12,780 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:12,782 - INFO - Starting fetch_newspaper_data for query: E. W:s stora salong, dates: 1848-01-01 to 1848-06-30


Processed query 'Folies Bergères' successfully.
Checkpoint saved: Year 1848, Half 0, Index 133


2024-08-29 15:38:12,983 - INFO - Search results received. Hits: 0
2024-08-29 15:38:12,983 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:12,984 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:12,987 - INFO - Starting fetch_newspaper_data for query: Tunnelgatan 12, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:13,140 - INFO - Search results received. Hits: 0
2024-08-29 15:38:13,140 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:13,141 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:13,143 - INFO - Starting fetch_newspaper_data for query: Kronobergsgatan 11, dates: 1848-01-01 to 1848-06-30


Processed query 'E. W:s stora salong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 134
Processed query 'Tunnelgatan 12' successfully.
Checkpoint saved: Year 1848, Half 0, Index 135


2024-08-29 15:38:13,304 - INFO - Search results received. Hits: 0
2024-08-29 15:38:13,305 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:13,305 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:13,307 - INFO - Starting fetch_newspaper_data for query: G.T. logens lokal, Mäster Samuelsgatan 51a, dates: 1848-01-01 to 1848-06-30


Processed query 'Kronobergsgatan 11' successfully.
Checkpoint saved: Year 1848, Half 0, Index 136


2024-08-29 15:38:13,569 - INFO - Search results received. Hits: 0
2024-08-29 15:38:13,570 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:13,570 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:13,572 - INFO - Starting fetch_newspaper_data for query: Arbetare-institutet, Klara Norra Kyrkogata 8, dates: 1848-01-01 to 1848-06-30


Processed query 'G.T. logens lokal, Mäster Samuelsgatan 51a' successfully.
Checkpoint saved: Year 1848, Half 0, Index 137


2024-08-29 15:38:13,930 - INFO - Search results received. Hits: 0
2024-08-29 15:38:13,931 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:13,932 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:13,934 - INFO - Starting fetch_newspaper_data for query: Skansen, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:14,098 - INFO - Search results received. Hits: 6
2024-08-29 15:38:14,099 - INFO - Extracted 6 URLs from search results
2024-08-29 15:38:14,099 - INFO - Processing URL: https://data.kb.se/dark-39521/part/1/page/2


Processed query 'Arbetare-institutet, Klara Norra Kyrkogata 8' successfully.
Checkpoint saved: Year 1848, Half 0, Index 138


2024-08-29 15:38:14,402 - INFO - Extracted XML URL for page 2
2024-08-29 15:38:14,403 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:14,804 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:14,956 - INFO - No matching content found for query 'Skansen' on page 2
2024-08-29 15:38:14,957 - INFO - Processed URL: https://data.kb.se/dark-39521/part/1/page/2
2024-08-29 15:38:14,957 - INFO - Processing URL: https://data.kb.se/dark-39545/part/1/page/2
2024-08-29 15:38:15,077 - INFO - Extracted XML URL for page 2
2024-08-29 15:38:15,078 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:15,522 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:15,809 - INFO - No matching content found for query 'Skansen' on page 2
2024-08-29 15:38:15,810 - INFO - Processed URL: https://data.kb.se/dark-39545/part/1/page/2
2024-08-29 15:38:15,810 - INFO - Processing URL: https://data.kb.se/dark-41289/part/1/page/2
2024-08-29 15:38:16,016 - INFO - Extracted XML URL for page 2
2024-08-29 15:38:16,017 - IN

Processed query 'Skansen' successfully.
Checkpoint saved: Year 1848, Half 0, Index 139


2024-08-29 15:38:20,212 - INFO - Search results received. Hits: 0
2024-08-29 15:38:20,213 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:20,213 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:20,215 - INFO - Starting fetch_newspaper_data for query: KFUM:s stora sal, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:20,397 - INFO - Search results received. Hits: 0
2024-08-29 15:38:20,397 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:20,398 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:20,400 - INFO - Starting fetch_newspaper_data for query: Trefaldighetskyrkan, dates: 1848-01-01 to 1848-06-30


Processed query 'Kristliga föreningen af unga mäns lokal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 140
Processed query 'KFUM:s stora sal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 141


2024-08-29 15:38:20,697 - INFO - Search results received. Hits: 0
2024-08-29 15:38:20,699 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:20,699 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:20,702 - INFO - Starting fetch_newspaper_data for query: Kristallsalongen, Stockholms tivoli, dates: 1848-01-01 to 1848-06-30


Processed query 'Trefaldighetskyrkan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 142


2024-08-29 15:38:20,908 - INFO - Search results received. Hits: 0
2024-08-29 15:38:20,909 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:20,909 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:20,911 - INFO - Starting fetch_newspaper_data for query: G.T:s lokal, Apelbergsgatan 48, dates: 1848-01-01 to 1848-06-30


Processed query 'Kristallsalongen, Stockholms tivoli' successfully.
Checkpoint saved: Year 1848, Half 0, Index 143


2024-08-29 15:38:21,225 - INFO - Search results received. Hits: 0
2024-08-29 15:38:21,227 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:21,227 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:21,230 - INFO - Starting fetch_newspaper_data for query: Hushållsskolan, Jakobsbergsgatan 15, 1tr, dates: 1848-01-01 to 1848-06-30


Processed query 'G.T:s lokal, Apelbergsgatan 48' successfully.
Checkpoint saved: Year 1848, Half 0, Index 144


2024-08-29 15:38:21,542 - INFO - Search results received. Hits: 0
2024-08-29 15:38:21,542 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:21,543 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:21,544 - INFO - Starting fetch_newspaper_data for query: Sveateatern, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:21,745 - INFO - Search results received. Hits: 0


Processed query 'Hushållsskolan, Jakobsbergsgatan 15, 1tr' successfully.
Checkpoint saved: Year 1848, Half 0, Index 145


2024-08-29 15:38:21,746 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:21,747 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:21,749 - INFO - Starting fetch_newspaper_data for query: Lantbruksakademiens lokal, Mäster-Samuelsgatan 43, 2tr, dates: 1848-01-01 to 1848-06-30


Processed query 'Sveateatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 146


2024-08-29 15:38:23,115 - INFO - Search results received. Hits: 0
2024-08-29 15:38:23,115 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:23,115 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:23,116 - INFO - Starting fetch_newspaper_data for query: Lokalen, Gamla Kungsholmsbrogatan 26 A, 1tr, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:23,314 - INFO - Search results received. Hits: 0
2024-08-29 15:38:23,315 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:23,315 - INFO - Data processing completed. Total rows saved: 0


Processed query 'Lantbruksakademiens lokal, Mäster-Samuelsgatan 43, 2tr' successfully.
Checkpoint saved: Year 1848, Half 0, Index 147
Processed query 'Lokalen, Gamla Kungsholmsbrogatan 26 A, 1tr' successfully.


2024-08-29 15:38:23,317 - INFO - Starting fetch_newspaper_data for query: Grand Restaurant Nationals stora salong, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:23,508 - INFO - Search results received. Hits: 0
2024-08-29 15:38:23,508 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:23,509 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:23,510 - INFO - Starting fetch_newspaper_data for query: Kungliga musikaliska akademiens lilla salong, dates: 1848-01-01 to 1848-06-30


Checkpoint saved: Year 1848, Half 0, Index 148
Processed query 'Grand Restaurant Nationals stora salong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 149


2024-08-29 15:38:23,854 - INFO - Search results received. Hits: 4
2024-08-29 15:38:23,855 - INFO - Extracted 4 URLs from search results
2024-08-29 15:38:23,856 - INFO - Processing URL: https://data.kb.se/dark-37784/part/1/page/1
2024-08-29 15:38:23,990 - INFO - Extracted XML URL for page 1
2024-08-29 15:38:23,991 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:24,447 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:24,749 - INFO - Processed URL: https://data.kb.se/dark-37784/part/1/page/1
2024-08-29 15:38:24,750 - INFO - Processing URL: https://data.kb.se/dark-37761/part/1/page/1
2024-08-29 15:38:24,949 - INFO - Extracted XML URL for page 1
2024-08-29 15:38:24,950 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:25,315 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:25,507 - INFO - Processed URL: https://data.kb.se/dark-37761/part/1/page/1
2024-08-29 15:38:25,507 - INFO - Processing URL: https://data.kb.se/dark-37728/part/1/page/1
2024-08-29 15:38:25,632 - INFO - Extrac

Processed query 'Kungliga musikaliska akademiens lilla salong' successfully.
Checkpoint saved: Year 1848, Half 0, Index 150


2024-08-29 15:38:27,506 - INFO - Search results received. Hits: 0
2024-08-29 15:38:27,506 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:27,507 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:27,508 - INFO - Starting fetch_newspaper_data for query: Hotel Continentals festsal, dates: 1848-01-01 to 1848-06-30


Processed query 'Arbetareföreningens stora sal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 151


2024-08-29 15:38:27,717 - INFO - Search results received. Hits: 0
2024-08-29 15:38:27,717 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:27,718 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:27,720 - INFO - Starting fetch_newspaper_data for query: Gustav Vasa-kyrkan, dates: 1848-01-01 to 1848-06-30


Processed query 'Hotel Continentals festsal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 152


2024-08-29 15:38:28,031 - INFO - Search results received. Hits: 0
2024-08-29 15:38:28,031 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:28,031 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:28,033 - INFO - Starting fetch_newspaper_data for query: Borgarskolan, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:28,171 - INFO - Search results received. Hits: 0
2024-08-29 15:38:28,172 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:28,172 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:28,174 - INFO - Starting fetch_newspaper_data for query: Bellios Taverna, dates: 1848-01-01 to 1848-06-30


Processed query 'Gustav Vasa-kyrkan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 153
Processed query 'Borgarskolan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 154


2024-08-29 15:38:28,358 - INFO - Search results received. Hits: 0
2024-08-29 15:38:28,358 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:28,358 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:28,359 - INFO - Starting fetch_newspaper_data for query: Brummerska skolan, Johannesgatan 18, dates: 1848-01-01 to 1848-06-30


Processed query 'Bellios Taverna' successfully.
Checkpoint saved: Year 1848, Half 0, Index 155


2024-08-29 15:38:28,575 - INFO - Search results received. Hits: 0
2024-08-29 15:38:28,576 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:28,577 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:28,580 - INFO - Starting fetch_newspaper_data for query: Södermalms högre allmänna läroverks aula, dates: 1848-01-01 to 1848-06-30


Processed query 'Brummerska skolan, Johannesgatan 18' successfully.
Checkpoint saved: Year 1848, Half 0, Index 156


2024-08-29 15:38:28,781 - INFO - Search results received. Hits: 0
2024-08-29 15:38:28,781 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:28,782 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:28,783 - INFO - Starting fetch_newspaper_data for query: Kungsholmens realskolas högtidssal, dates: 1848-01-01 to 1848-06-30


Processed query 'Södermalms högre allmänna läroverks aula' successfully.
Checkpoint saved: Year 1848, Half 0, Index 157


2024-08-29 15:38:29,091 - INFO - Search results received. Hits: 0
2024-08-29 15:38:29,092 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:29,092 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:29,094 - INFO - Starting fetch_newspaper_data for query: Högre allmnna läroverket på Norrmalm, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:29,278 - INFO - Search results received. Hits: 0
2024-08-29 15:38:29,278 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:29,279 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:29,281 - INFO - Starting fetch_newspaper_data for query: Sofiakyrkan, dates: 1848-01-01 to 1848-06-30


Processed query 'Kungsholmens realskolas högtidssal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 158
Processed query 'Högre allmnna läroverket på Norrmalm' successfully.
Checkpoint saved: Year 1848, Half 0, Index 159


2024-08-29 15:38:29,524 - INFO - Search results received. Hits: 0
2024-08-29 15:38:29,525 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:29,525 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:29,527 - INFO - Starting fetch_newspaper_data for query: KFUK:s stora hörsal, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:29,653 - INFO - Search results received. Hits: 0
2024-08-29 15:38:29,654 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:29,654 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:29,656 - INFO - Starting fetch_newspaper_data for query: Grand hotel, festsalen, dates: 1848-01-01 to 1848-06-30


Processed query 'Sofiakyrkan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 160
Processed query 'KFUK:s stora hörsal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 161


2024-08-29 15:38:29,842 - INFO - Search results received. Hits: 0
2024-08-29 15:38:29,843 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:29,843 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:29,845 - INFO - Starting fetch_newspaper_data for query: Oscarskyrkan, dates: 1848-01-01 to 1848-06-30


Processed query 'Grand hotel, festsalen' successfully.
Checkpoint saved: Year 1848, Half 0, Index 162


2024-08-29 15:38:30,085 - INFO - Search results received. Hits: 0
2024-08-29 15:38:30,085 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:30,086 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:30,087 - INFO - Starting fetch_newspaper_data for query: St Peters kyrka, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:30,276 - INFO - Search results received. Hits: 5
2024-08-29 15:38:30,277 - INFO - Extracted 5 URLs from search results
2024-08-29 15:38:30,278 - INFO - Processing URL: https://data.kb.se/dark-39259/part/1/page/4


Processed query 'Oscarskyrkan' successfully.
Checkpoint saved: Year 1848, Half 0, Index 163


2024-08-29 15:38:30,497 - INFO - Extracted XML URL for page 4
2024-08-29 15:38:30,497 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:30,856 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:31,001 - INFO - Processed URL: https://data.kb.se/dark-39259/part/1/page/4
2024-08-29 15:38:31,001 - INFO - Processing URL: https://data.kb.se/dark-39515/part/1/page/1
2024-08-29 15:38:31,121 - INFO - Extracted XML URL for page 1
2024-08-29 15:38:31,121 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:31,401 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:31,715 - INFO - Processed URL: https://data.kb.se/dark-39515/part/1/page/1
2024-08-29 15:38:31,716 - INFO - Processing URL: https://data.kb.se/dark-41279/part/1/page/2
2024-08-29 15:38:31,837 - INFO - Extracted XML URL for page 2
2024-08-29 15:38:31,838 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:32,212 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:32,456 - INFO - Processed URL: https://data.kb.se/dark-41279/part/1/pa

Processed query 'St Peters kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 164
Processed query 'Kungsholmens läroverks högtidssal' successfully.
Checkpoint saved: Year 1848, Half 0, Index 165


2024-08-29 15:38:34,563 - INFO - Search results received. Hits: 0
2024-08-29 15:38:34,564 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:34,564 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:34,565 - INFO - Starting fetch_newspaper_data for query: Höloftet på Skansen, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:34,704 - INFO - Search results received. Hits: 0
2024-08-29 15:38:34,704 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:34,705 - INFO - Data processing completed. Total rows saved: 0
2024-08-29 15:38:34,706 - INFO - Starting fetch_newspaper_data for query: Cirkus, dates: 1848-01-01 to 1848-06-30


Processed query 'St Matteus kyrka' successfully.
Checkpoint saved: Year 1848, Half 0, Index 166
Processed query 'Höloftet på Skansen' successfully.
Checkpoint saved: Year 1848, Half 0, Index 167


2024-08-29 15:38:34,818 - INFO - Search results received. Hits: 1
2024-08-29 15:38:34,818 - INFO - Extracted 1 URLs from search results
2024-08-29 15:38:34,819 - INFO - Processing URL: https://data.kb.se/dark-39866/part/1/page/3
2024-08-29 15:38:34,945 - INFO - Extracted XML URL for page 3
2024-08-29 15:38:34,945 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:35,250 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:35,428 - INFO - Processed URL: https://data.kb.se/dark-39866/part/1/page/3
2024-08-29 15:38:35,503 - INFO - Inserted final batch of 1 rows. Total rows inserted: 1
2024-08-29 15:38:35,504 - INFO - Data processing completed. Total rows saved: 1
2024-08-29 15:38:35,506 - INFO - Starting fetch_newspaper_data for query: Oscarsteatern, dates: 1848-01-01 to 1848-06-30
2024-08-29 15:38:35,702 - INFO - Search results received. Hits: 0
2024-08-29 15:38:35,702 - INFO - Extracted 0 URLs from search results
2024-08-29 15:38:35,703 - INFO - Data processing completed. Total rows sa

Processed query 'Cirkus' successfully.
Checkpoint saved: Year 1848, Half 0, Index 168
Processed query 'Oscarsteatern' successfully.
Checkpoint saved: Year 1848, Half 0, Index 169


2024-08-29 15:38:36,617 - INFO - Search results received. Hits: 250
2024-08-29 15:38:36,617 - INFO - Extracted 250 URLs from search results
2024-08-29 15:38:36,618 - INFO - Processing URL: https://data.kb.se/dark-39515/part/1/page/2
2024-08-29 15:38:36,744 - INFO - Extracted XML URL for page 2
2024-08-29 15:38:36,744 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:37,029 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:37,354 - INFO - Processed URL: https://data.kb.se/dark-39515/part/1/page/2
2024-08-29 15:38:37,355 - INFO - Processing URL: https://data.kb.se/dark-102244/part/1/page/3
2024-08-29 15:38:37,485 - INFO - Extracted XML URL for page 3
2024-08-29 15:38:37,485 - INFO - Extracted 1 XML URLs
2024-08-29 15:38:37,810 - INFO - Fetched XML content for 1 pages
2024-08-29 15:38:38,176 - INFO - Processed URL: https://data.kb.se/dark-102244/part/1/page/3
2024-08-29 15:38:38,176 - INFO - Processing URL: https://data.kb.se/dark-39493/part/1/page/1
2024-08-29 15:38:38,309 - INFO - 

# Load SQL Database into Dataframe

In [None]:
import sqlite3
import pandas as pd

# Connect to the SQLite database
conn = sqlite3.connect(db_path)

# SQL query to select all columns except 'Raw API Result'
query = """
SELECT Date, [Package ID], Part, Page, [ComposedBlock Content], [Full Prompt]
FROM newspaper_data
"""

# Read the query results into a pandas DataFrame
df = pd.read_sql_query(query, conn)

# Close the database connection
conn.close()

# Display the first few rows of the DataFrame
df.head(10)

# Optional: If you want to save this to a CSV file for further analysis:
# df.to_csv('newspaper_data_summary.csv', index=False)

# STEP 2: Generate Prompts
Note! This is NECESSARY for moving to the next notebook

In [None]:
# Load system message from the prompt file
with open(config['prompt_filepath'], 'r') as file:
    system_message_content = file.read().strip()

# Load the JSON schema from the file
with open(config['JSON_schema_path'], 'r') as file:
    json_schema = json.load(file)

def generate_full_prompt(row):
    date = row['Date']
    system_message = {"role": "system", "content": system_message_content.replace('{Newspaper_Date}', date)}
    user_content = str(row['ComposedBlock Content'])
    user_message = {"role": "user", "content": user_content}
    package_id = row['Package ID']
    part = row['Part']
    page = row['Page']

    # Get the row index to use as a sequential index
    row_index = row.name + 1

    custom_id = f"{package_id}-{part}-{page}-{row_index}"

    # Load the JSON schema from the file
    with open(config['JSON_schema_path'], 'r') as file:
        json_schema = json.load(file)

    # Prepare the full prompt JSON
    full_prompt = {
        "custom_id": custom_id,
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            "model": config['llm_model'],
            "messages": [system_message, user_message],
            "max_tokens": config['max_tokens'],
            "response_format": {
                "type": "json_schema",
                "json_schema": {
                    "name": "response_data",
                    "strict": True,
                    "schema": json_schema
                }
            }
        }
    }

    return json.dumps(full_prompt)

# Generate the Full Prompt column
df['Full Prompt'] = df.apply(generate_full_prompt, axis=1)

df.head()

# Commit prompts to database

In [None]:
import sqlite3
from sqlalchemy import create_engine

# Establish a connection to the database
engine = create_engine(f'sqlite:///{db_path}', echo=False)

# Update the existing table with the DataFrame containing the new 'Full Prompt' column
df.to_sql('newspaper_data', engine, if_exists='replace', index=False)

# Commit the changes and close the connection
conn = sqlite3.connect(db_path)
conn.commit()
conn.close()

print("Updated data committed to the 'newspaper_data' table. Rows: {}".format(len(df)))