In [8]:
import re
import requests
import pandas as pd
import json
from bs4 import BeautifulSoup as bs
import time
import sqlite3
from urllib.parse import quote_plus
import yaml
from datetime import datetime
import os
from KBDownloader import search_swedish_newspapers, fetch_newspaper_data, save_checkpoint, load_checkpoint

# Get today's date
today_date = datetime.today().strftime('%Y-%m-%d')

# Load the YAML configuration file
with open('config.yaml', 'r') as file:
    config = yaml.safe_load(file)

# Assign variables from the YAML configuration
venue_list = config['venue_list']
start_year = config['start_year']
years_to_crawl = config['years_to_crawl']
newspaper = config['newspaper']
db_path = config['db_path']

# Define the newspaper collection IDs
NEWSPAPER_COLLECTION_IDS = {
    'Dagens nyheter': 'https://libris.kb.se/m5z2w4lz3m2zxpk#it',
    'Svenska Dagbladet': 'https://libris.kb.se/2ldhmx8d4mcrlq9#it',
    'Aftonbladet': 'https://libris.kb.se/dwpgqn5q03ft91j#it',
    'Dagligt Allehanda': 'https://libris.kb.se/9tmqzv3m32xfzcz#it'
}

# Get the correct collection ID for the specified newspaper
collection_id = NEWSPAPER_COLLECTION_IDS.get(newspaper)
if not collection_id:
    raise ValueError(f"Invalid newspaper name: {newspaper}")

# Ensure the database file exists
if not os.path.exists(db_path):
    conn = sqlite3.connect(db_path)
    conn.close()

# Load checkpoint if it exists
checkpoint = load_checkpoint()
start_year = checkpoint['year'] if checkpoint else start_year
start_half = checkpoint['half'] if checkpoint else 0
start_index = checkpoint['index'] if checkpoint else 0

# Print out all the settings from the YAML configuration file
print("Configuration Settings:")
for key, value in config.items():
    print(f"{key}: {value}")
print(f"Collection ID: {collection_id}")

# Load the venue list
df = pd.read_excel(venue_list)

Configuration Settings:
venue_list: Datasets/Venues_plus_Konsert.xlsx
start_year: 1908
years_to_crawl: 1
newspaper: Dagligt Allehanda
prompt_filepath: llm_prompt_for_deployment.txt
db_path: Datasets/26.07_Deployment_Version_1.0.db
llm_model: gpt-4o-mini
max_tokens: 1000
Stockholm_Concert_Database_Path: Datasets/All_Concerts_1908_filtered.xlsx
columns_to_compare: ['normalized_date', 'name', 'venue']
column_mapping: {'konsert_datum': 'date', 'konsert_namn': 'name', 'lokal_namn': 'venue', 'arrangör': 'organiser'}
Collection ID: https://libris.kb.se/9tmqzv3m32xfzcz#it


In [9]:
# Load checkpoint if it exists
checkpoint = load_checkpoint()
start_year = checkpoint['year'] if checkpoint else start_year
start_half = checkpoint['half'] if checkpoint else 0
start_index = checkpoint['index'] if checkpoint else 0

# Main loop
for year in range(start_year, start_year + years_to_crawl):
    for half in range(start_half, 2):
        if half == 0:
            from_date = datetime(year, 1, 1)
            to_date = datetime(year, 6, 30)
        else:
            from_date = datetime(year, 7, 1)
            to_date = datetime(year, 12, 31)

        print(f"Processing data from {from_date} to {to_date}")

        for index in range(start_index, len(df)):
            row = df.iloc[index]
            query = row['Lokal']

            try:
                result = fetch_newspaper_data(
                    query=query,
                    from_date=from_date.strftime('%Y-%m-%d'),
                    to_date=to_date.strftime('%Y-%m-%d'),
                    newspaper=collection_id,
                    config=config,
                    db_path=db_path
                )

                if result.get('success'):
                    print(f"Processed query '{query}' successfully.")
                else:
                    print(f"Failed to process query '{query}': {result.get('message')}")

                # Save checkpoint after each query, successful or not
                save_checkpoint(year, half, index + 1)

            except Exception as e:
                print(f"Error processing query '{query}': {str(e)}")
                save_checkpoint(year, half, index)
                # Consider raising specific exceptions here instead of a broad Exception

        print(f"Waiting. Currently at {from_date} to {to_date}")
        time.sleep(3)  # in seconds

        # Reset start_index for the next half, but not start_half or start_year
        start_index = 0

    # Reset start_half for the next year, but not start_year
    start_half = 0

print("All queries processed for all years")

2024-07-25 14:21:06,346 - INFO - Starting fetch_newspaper_data for query: Konsert, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:06,475 - INFO - Search results received. Hits: 0
2024-07-25 14:21:06,475 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:06,476 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:06,477 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:06,478 - INFO - Starting fetch_newspaper_data for query: La Croix salong, dates: 1908-01-01 to 1908-06-30


Processing data from 1908-01-01 00:00:00 to 1908-06-30 00:00:00
Processed query 'Konsert' successfully.
Checkpoint saved: Year 1908, Half 0, Index 1


2024-07-25 14:21:06,624 - INFO - Search results received. Hits: 0
2024-07-25 14:21:06,624 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:06,625 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:06,625 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:06,626 - INFO - Starting fetch_newspaper_data for query: Norra paviljongen i Trädgårdsföreningens lokal, dates: 1908-01-01 to 1908-06-30


Processed query 'La Croix salong' successfully.
Checkpoint saved: Year 1908, Half 0, Index 2


2024-07-25 14:21:06,832 - INFO - Search results received. Hits: 0
2024-07-25 14:21:06,832 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:06,834 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:06,834 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:06,836 - INFO - Starting fetch_newspaper_data for query: Wallmans lokal (Mäster Samuels gränd 11), dates: 1908-01-01 to 1908-06-30


Processed query 'Norra paviljongen i Trädgårdsföreningens lokal' successfully.
Checkpoint saved: Year 1908, Half 0, Index 3


2024-07-25 14:21:07,112 - INFO - Search results received. Hits: 0
2024-07-25 14:21:07,112 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:07,113 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:07,114 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:07,115 - INFO - Starting fetch_newspaper_data for query: Kungliga opera, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:07,259 - INFO - Search results received. Hits: 0
2024-07-25 14:21:07,259 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:07,260 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:07,260 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:07,261 - INFO - Starting fetch_newspaper_data for query: Kungliga teater, dates: 1908-01-01 to 1908-06-30


Processed query 'Wallmans lokal (Mäster Samuels gränd 11)' successfully.
Checkpoint saved: Year 1908, Half 0, Index 4
Processed query 'Kungliga opera' successfully.
Checkpoint saved: Year 1908, Half 0, Index 5


2024-07-25 14:21:07,350 - INFO - Search results received. Hits: 0
2024-07-25 14:21:07,350 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:07,351 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:07,352 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:07,353 - INFO - Starting fetch_newspaper_data for query: F.d. Kirsteinska huset (vid Clara), dates: 1908-01-01 to 1908-06-30


Processed query 'Kungliga teater' successfully.
Checkpoint saved: Year 1908, Half 0, Index 6


2024-07-25 14:21:07,576 - INFO - Search results received. Hits: 0
2024-07-25 14:21:07,576 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:07,577 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:07,578 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:07,579 - INFO - Starting fetch_newspaper_data for query: Hotel W6 resp. Hotel Continental, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:07,708 - INFO - Search results received. Hits: 0
2024-07-25 14:21:07,709 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:07,710 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:07,710 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:07,711 - INFO - Starting fetch_newspaper_data for query: La Croix mindre salong, dates: 1908-01-01 to 1908-06-30


Processed query 'F.d. Kirsteinska huset (vid Clara)' successfully.
Checkpoint saved: Year 1908, Half 0, Index 7
Processed query 'Hotel W6 resp. Hotel Continental' successfully.
Checkpoint saved: Year 1908, Half 0, Index 8


2024-07-25 14:21:07,843 - INFO - Search results received. Hits: 0
2024-07-25 14:21:07,843 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:07,844 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:07,845 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:07,846 - INFO - Starting fetch_newspaper_data for query: Södra teatern i Stadshuset, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:07,980 - INFO - Search results received. Hits: 0
2024-07-25 14:21:07,981 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:07,982 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:07,982 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:07,983 - INFO - Starting fetch_newspaper_data for query: Stadshus salongen, dates: 1908-01-01 to 1908-06-30


Processed query 'La Croix mindre salong' successfully.
Checkpoint saved: Year 1908, Half 0, Index 9
Processed query 'Södra teatern i Stadshuset' successfully.
Checkpoint saved: Year 1908, Half 0, Index 10


2024-07-25 14:21:08,138 - INFO - Search results received. Hits: 0
2024-07-25 14:21:08,139 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:08,140 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:08,141 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:08,142 - INFO - Starting fetch_newspaper_data for query: Stora börssalen, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:08,270 - INFO - Search results received. Hits: 0
2024-07-25 14:21:08,270 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:08,272 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:08,273 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:08,274 - INFO - Starting fetch_newspaper_data for query: St Nicolai kyrka, dates: 1908-01-01 to 1908-06-30


Processed query 'Stadshus salongen' successfully.
Checkpoint saved: Year 1908, Half 0, Index 11
Processed query 'Stora börssalen' successfully.
Checkpoint saved: Year 1908, Half 0, Index 12


2024-07-25 14:21:08,503 - INFO - Search results received. Hits: 0
2024-07-25 14:21:08,504 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:08,506 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:08,507 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:08,509 - INFO - Starting fetch_newspaper_data for query: Ladugårdslands kyrka, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:08,686 - INFO - Search results received. Hits: 0
2024-07-25 14:21:08,687 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:08,688 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:08,689 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:08,691 - INFO - Starting fetch_newspaper_data for query: Hedvig Eleonora, dates: 1908-01-01 to 1908-06-30


Processed query 'St Nicolai kyrka' successfully.
Checkpoint saved: Year 1908, Half 0, Index 13
Processed query 'Ladugårdslands kyrka' successfully.
Checkpoint saved: Year 1908, Half 0, Index 14


2024-07-25 14:21:08,868 - INFO - Search results received. Hits: 0
2024-07-25 14:21:08,868 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:08,869 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:08,869 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:08,870 - INFO - Starting fetch_newspaper_data for query: Östermalms ka, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:08,990 - INFO - Search results received. Hits: 0
2024-07-25 14:21:08,990 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:08,991 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:08,992 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:08,993 - INFO - Starting fetch_newspaper_data for query: Trädgårdsföreningens lokal, dates: 1908-01-01 to 1908-06-30


Processed query 'Hedvig Eleonora' successfully.
Checkpoint saved: Year 1908, Half 0, Index 15
Processed query 'Östermalms ka' successfully.
Checkpoint saved: Year 1908, Half 0, Index 16


2024-07-25 14:21:09,126 - INFO - Search results received. Hits: 0
2024-07-25 14:21:09,127 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:09,129 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:09,129 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:09,130 - INFO - Starting fetch_newspaper_data for query: Tyska bryggeriets trädgård (Tullportsgatan 42 på Söder), dates: 1908-01-01 to 1908-06-30


Processed query 'Trädgårdsföreningens lokal' successfully.
Checkpoint saved: Year 1908, Half 0, Index 17


2024-07-25 14:21:09,376 - INFO - Search results received. Hits: 0
2024-07-25 14:21:09,377 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:09,378 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:09,379 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:09,381 - INFO - Starting fetch_newspaper_data for query: Stora mosebacke trädgård, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:09,519 - INFO - Search results received. Hits: 0
2024-07-25 14:21:09,520 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:09,521 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:09,522 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:09,524 - INFO - Starting fetch_newspaper_data for query: Kungl. Humlegården, dates: 1908-01-01 to 1908-06-30


Processed query 'Tyska bryggeriets trädgård (Tullportsgatan 42 på Söder)' successfully.
Checkpoint saved: Year 1908, Half 0, Index 18
Processed query 'Stora mosebacke trädgård' successfully.
Checkpoint saved: Year 1908, Half 0, Index 19


2024-07-25 14:21:09,688 - INFO - Search results received. Hits: 0
2024-07-25 14:21:09,689 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:09,690 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:09,690 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:09,692 - INFO - Starting fetch_newspaper_data for query: Tivoli vid Nortullsgatan, dates: 1908-01-01 to 1908-06-30


Processed query 'Kungl. Humlegården' successfully.
Checkpoint saved: Year 1908, Half 0, Index 20


2024-07-25 14:21:09,913 - INFO - Search results received. Hits: 0
2024-07-25 14:21:09,914 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:09,917 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:09,918 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:09,920 - INFO - Starting fetch_newspaper_data for query: Novilla på Kungl. Djurgården, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:10,105 - INFO - Search results received. Hits: 0
2024-07-25 14:21:10,106 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:10,108 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:10,109 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:10,110 - INFO - Starting fetch_newspaper_data for query: Kungl. Djurgårdsbrunns salong, dates: 1908-01-01 to 1908-06-30


Processed query 'Tivoli vid Nortullsgatan' successfully.
Checkpoint saved: Year 1908, Half 0, Index 21
Processed query 'Novilla på Kungl. Djurgården' successfully.
Checkpoint saved: Year 1908, Half 0, Index 22


2024-07-25 14:21:10,266 - INFO - Search results received. Hits: 0
2024-07-25 14:21:10,266 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:10,268 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:10,269 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:10,271 - INFO - Starting fetch_newspaper_data for query: Mindre teatern, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:10,397 - INFO - Search results received. Hits: 0
2024-07-25 14:21:10,398 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:10,398 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:10,399 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:10,400 - INFO - Starting fetch_newspaper_data for query: Strömsborg, dates: 1908-01-01 to 1908-06-30


Processed query 'Kungl. Djurgårdsbrunns salong' successfully.
Checkpoint saved: Year 1908, Half 0, Index 23
Processed query 'Mindre teatern' successfully.
Checkpoint saved: Year 1908, Half 0, Index 24


2024-07-25 14:21:10,510 - INFO - Search results received. Hits: 0
2024-07-25 14:21:10,511 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:10,512 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:10,513 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:10,514 - INFO - Starting fetch_newspaper_data for query: Nedre Börssalen, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:10,639 - INFO - Search results received. Hits: 0
2024-07-25 14:21:10,639 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:10,640 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:10,641 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:10,642 - INFO - Starting fetch_newspaper_data for query: Mosebacke, dates: 1908-01-01 to 1908-06-30


Processed query 'Strömsborg' successfully.
Checkpoint saved: Year 1908, Half 0, Index 25
Processed query 'Nedre Börssalen' successfully.
Checkpoint saved: Year 1908, Half 0, Index 26


2024-07-25 14:21:10,752 - INFO - Search results received. Hits: 0
2024-07-25 14:21:10,752 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:10,753 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:10,754 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:10,755 - INFO - Starting fetch_newspaper_data for query: Storkyrkan, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:10,907 - INFO - Search results received. Hits: 0
2024-07-25 14:21:10,908 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:10,909 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:10,910 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:10,911 - INFO - Starting fetch_newspaper_data for query: Pavillion du bazar på Norrbron, dates: 1908-01-01 to 1908-06-30


Processed query 'Mosebacke' successfully.
Checkpoint saved: Year 1908, Half 0, Index 27
Processed query 'Storkyrkan' successfully.
Checkpoint saved: Year 1908, Half 0, Index 28


2024-07-25 14:21:11,101 - INFO - Search results received. Hits: 0
2024-07-25 14:21:11,101 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:11,102 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:11,103 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:11,104 - INFO - Starting fetch_newspaper_data for query: Södra teatern, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:11,203 - INFO - Search results received. Hits: 0
2024-07-25 14:21:11,203 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:11,204 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:11,205 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:11,206 - INFO - Starting fetch_newspaper_data for query: Värdshuset Johannshov, dates: 1908-01-01 to 1908-06-30


Processed query 'Pavillion du bazar på Norrbron' successfully.
Checkpoint saved: Year 1908, Half 0, Index 29
Processed query 'Södra teatern' successfully.
Checkpoint saved: Year 1908, Half 0, Index 30


2024-07-25 14:21:11,365 - INFO - Search results received. Hits: 0
2024-07-25 14:21:11,366 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:11,367 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:11,367 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:11,369 - INFO - Starting fetch_newspaper_data for query: Värdshuset Claës på Hörnet, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:11,489 - INFO - Search results received. Hits: 0
2024-07-25 14:21:11,490 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:11,491 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:11,492 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:11,493 - INFO - Starting fetch_newspaper_data for query: Vinterträdgården i Novilla, dates: 1908-01-01 to 1908-06-30


Processed query 'Värdshuset Johannshov' successfully.
Checkpoint saved: Year 1908, Half 0, Index 31
Processed query 'Värdshuset Claës på Hörnet' successfully.
Checkpoint saved: Year 1908, Half 0, Index 32


2024-07-25 14:21:11,650 - INFO - Search results received. Hits: 0
2024-07-25 14:21:11,650 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:11,651 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:11,652 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:11,653 - INFO - Starting fetch_newspaper_data for query: Stadssmedsgatan nr 9, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:11,840 - INFO - Search results received. Hits: 0
2024-07-25 14:21:11,841 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:11,842 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:11,843 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:11,844 - INFO - Starting fetch_newspaper_data for query: Beridarbansgatan nr 18, dates: 1908-01-01 to 1908-06-30


Processed query 'Vinterträdgården i Novilla' successfully.
Checkpoint saved: Year 1908, Half 0, Index 33
Processed query 'Stadssmedsgatan nr 9' successfully.
Checkpoint saved: Year 1908, Half 0, Index 34


2024-07-25 14:21:11,997 - INFO - Search results received. Hits: 0
2024-07-25 14:21:11,998 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:12,000 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:12,001 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:12,002 - INFO - Starting fetch_newspaper_data for query: Davidssons södra paviljong, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:12,159 - INFO - Search results received. Hits: 0
2024-07-25 14:21:12,160 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:12,162 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:12,163 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:12,164 - INFO - Starting fetch_newspaper_data for query: Kungliga musikaliska akademiens lokal, dates: 1908-01-01 to 1908-06-30


Processed query 'Beridarbansgatan nr 18' successfully.
Checkpoint saved: Year 1908, Half 0, Index 35
Processed query 'Davidssons södra paviljong' successfully.
Checkpoint saved: Year 1908, Half 0, Index 36


2024-07-25 14:21:12,292 - INFO - Search results received. Hits: 0
2024-07-25 14:21:12,293 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:12,294 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:12,294 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:12,296 - INFO - Starting fetch_newspaper_data for query: Konsertsalongen på Kungliga Djurgården, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:12,477 - INFO - Search results received. Hits: 0
2024-07-25 14:21:12,478 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:12,479 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:12,480 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:12,482 - INFO - Starting fetch_newspaper_data for query: Lilla Catharineberg, dates: 1908-01-01 to 1908-06-30


Processed query 'Kungliga musikaliska akademiens lokal' successfully.
Checkpoint saved: Year 1908, Half 0, Index 37
Processed query 'Konsertsalongen på Kungliga Djurgården' successfully.
Checkpoint saved: Year 1908, Half 0, Index 38


2024-07-25 14:21:12,637 - INFO - Search results received. Hits: 0
2024-07-25 14:21:12,638 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:12,638 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:12,639 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:12,640 - INFO - Starting fetch_newspaper_data for query: Vid prins Gustafs byst vid Haga, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:12,807 - INFO - Search results received. Hits: 0
2024-07-25 14:21:12,807 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:12,808 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:12,809 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:12,810 - INFO - Starting fetch_newspaper_data for query: Waxholms kyrka, dates: 1908-01-01 to 1908-06-30


Processed query 'Lilla Catharineberg' successfully.
Checkpoint saved: Year 1908, Half 0, Index 39
Processed query 'Vid prins Gustafs byst vid Haga' successfully.
Checkpoint saved: Year 1908, Half 0, Index 40


2024-07-25 14:21:12,922 - INFO - Search results received. Hits: 0
2024-07-25 14:21:12,922 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:12,923 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:12,924 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:12,925 - INFO - Starting fetch_newspaper_data for query: Södertelje kyrka, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:13,032 - INFO - Search results received. Hits: 0
2024-07-25 14:21:13,033 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:13,034 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:13,034 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:13,035 - INFO - Starting fetch_newspaper_data for query: Kungshatt, dates: 1908-01-01 to 1908-06-30


Processed query 'Waxholms kyrka' successfully.
Checkpoint saved: Year 1908, Half 0, Index 41
Processed query 'Södertelje kyrka' successfully.
Checkpoint saved: Year 1908, Half 0, Index 42


2024-07-25 14:21:13,174 - INFO - Search results received. Hits: 0
2024-07-25 14:21:13,174 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:13,175 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:13,176 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:13,178 - INFO - Starting fetch_newspaper_data for query: Djurgården, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:13,310 - INFO - Search results received. Hits: 0
2024-07-25 14:21:13,310 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:13,311 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:13,312 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:13,321 - INFO - Starting fetch_newspaper_data for query: Vaxholm (nya salongen), dates: 1908-01-01 to 1908-06-30


Processed query 'Kungshatt' successfully.
Checkpoint saved: Year 1908, Half 0, Index 43
Processed query 'Djurgården' successfully.
Checkpoint saved: Year 1908, Half 0, Index 44


2024-07-25 14:21:13,490 - INFO - Search results received. Hits: 0
2024-07-25 14:21:13,490 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:13,491 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:13,492 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:13,493 - INFO - Starting fetch_newspaper_data for query: Hasselbacken, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:13,623 - INFO - Search results received. Hits: 0
2024-07-25 14:21:13,624 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:13,625 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:13,625 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:13,626 - INFO - Starting fetch_newspaper_data for query: Djurgårdsteatern, dates: 1908-01-01 to 1908-06-30


Processed query 'Vaxholm (nya salongen)' successfully.
Checkpoint saved: Year 1908, Half 0, Index 45
Processed query 'Hasselbacken' successfully.
Checkpoint saved: Year 1908, Half 0, Index 46


2024-07-25 14:21:13,763 - INFO - Search results received. Hits: 0
2024-07-25 14:21:13,764 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:13,765 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:13,766 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:13,767 - INFO - Starting fetch_newspaper_data for query: Katolska kapellet, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:13,881 - INFO - Search results received. Hits: 0
2024-07-25 14:21:13,882 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:13,883 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:13,884 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:13,885 - INFO - Starting fetch_newspaper_data for query: Drottninggatan 80, dates: 1908-01-01 to 1908-06-30


Processed query 'Djurgårdsteatern' successfully.
Checkpoint saved: Year 1908, Half 0, Index 47
Processed query 'Katolska kapellet' successfully.
Checkpoint saved: Year 1908, Half 0, Index 48


2024-07-25 14:21:14,021 - INFO - Search results received. Hits: 0
2024-07-25 14:21:14,021 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:14,023 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:14,024 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:14,026 - INFO - Starting fetch_newspaper_data for query: Robert Kahns lokal Drottninggatan 5, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:14,214 - INFO - Search results received. Hits: 0
2024-07-25 14:21:14,214 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:14,215 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:14,216 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:14,217 - INFO - Starting fetch_newspaper_data for query: Hagströms konditori/schweizeri, Beridaregatan 18, dates: 1908-01-01 to 1908-06-30


Processed query 'Drottninggatan 80' successfully.
Checkpoint saved: Year 1908, Half 0, Index 49
Processed query 'Robert Kahns lokal Drottninggatan 5' successfully.
Checkpoint saved: Year 1908, Half 0, Index 50


2024-07-25 14:21:14,449 - INFO - Search results received. Hits: 0
2024-07-25 14:21:14,450 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:14,452 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:14,453 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:14,455 - INFO - Starting fetch_newspaper_data for query: Berzeli park, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:14,600 - INFO - Search results received. Hits: 0
2024-07-25 14:21:14,601 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:14,603 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:14,603 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:14,605 - INFO - Starting fetch_newspaper_data for query: Bährs lokal (Vid Riddarhustorget), dates: 1908-01-01 to 1908-06-30


Processed query 'Hagströms konditori/schweizeri, Beridaregatan 18' successfully.
Checkpoint saved: Year 1908, Half 0, Index 51
Processed query 'Berzeli park' successfully.
Checkpoint saved: Year 1908, Half 0, Index 52


2024-07-25 14:21:14,754 - INFO - Search results received. Hits: 0
2024-07-25 14:21:14,754 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:14,755 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:14,755 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:14,756 - INFO - Starting fetch_newspaper_data for query: Lilla Ingermarshof, dates: 1908-01-01 to 1908-06-30


Processed query 'Bährs lokal (Vid Riddarhustorget)' successfully.
Checkpoint saved: Year 1908, Half 0, Index 53


2024-07-25 14:21:14,976 - INFO - Search results received. Hits: 0
2024-07-25 14:21:14,977 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:14,978 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:14,978 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:14,995 - INFO - Starting fetch_newspaper_data for query: Tivoli å Kongl Djurgården, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:15,140 - INFO - Search results received. Hits: 0
2024-07-25 14:21:15,140 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:15,141 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:15,142 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:15,143 - INFO - Starting fetch_newspaper_data for query: Kungl. Humlegårdens Rotunda, dates: 1908-01-01 to 1908-06-30


Processed query 'Lilla Ingermarshof' successfully.
Checkpoint saved: Year 1908, Half 0, Index 54
Processed query 'Tivoli å Kongl Djurgården' successfully.
Checkpoint saved: Year 1908, Half 0, Index 55


2024-07-25 14:21:15,308 - INFO - Search results received. Hits: 0
2024-07-25 14:21:15,309 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:15,310 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:15,311 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:15,312 - INFO - Starting fetch_newspaper_data for query: Ladugårdslandsteaterns trädgård, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:15,481 - INFO - Search results received. Hits: 0
2024-07-25 14:21:15,482 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:15,484 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:15,485 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:15,486 - INFO - Starting fetch_newspaper_data for query: Blå porten på kungl. Djurgården, dates: 1908-01-01 to 1908-06-30


Processed query 'Kungl. Humlegårdens Rotunda' successfully.
Checkpoint saved: Year 1908, Half 0, Index 56
Processed query 'Ladugårdslandsteaterns trädgård' successfully.
Checkpoint saved: Year 1908, Half 0, Index 57


2024-07-25 14:21:15,643 - INFO - Search results received. Hits: 0
2024-07-25 14:21:15,643 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:15,644 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:15,644 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:15,645 - INFO - Starting fetch_newspaper_data for query: Schweizeriet på Mosebacke, dates: 1908-01-01 to 1908-06-30


Processed query 'Blå porten på kungl. Djurgården' successfully.
Checkpoint saved: Year 1908, Half 0, Index 58


2024-07-25 14:21:15,846 - INFO - Search results received. Hits: 0
2024-07-25 14:21:15,847 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:15,848 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:15,849 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:15,851 - INFO - Starting fetch_newspaper_data for query: Värdshuset Lübeck, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:15,998 - INFO - Search results received. Hits: 0
2024-07-25 14:21:15,998 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:15,999 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:16,000 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:16,001 - INFO - Starting fetch_newspaper_data for query: Davidssons norra paviljong, dates: 1908-01-01 to 1908-06-30


Processed query 'Schweizeriet på Mosebacke' successfully.
Checkpoint saved: Year 1908, Half 0, Index 59
Processed query 'Värdshuset Lübeck' successfully.
Checkpoint saved: Year 1908, Half 0, Index 60


2024-07-25 14:21:16,141 - INFO - Search results received. Hits: 0
2024-07-25 14:21:16,141 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:16,142 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:16,143 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:16,144 - INFO - Starting fetch_newspaper_data for query: Källaren Nya Norrmalm vid Hötorget, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:16,294 - INFO - Search results received. Hits: 0
2024-07-25 14:21:16,294 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:16,295 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:16,295 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:16,296 - INFO - Starting fetch_newspaper_data for query: Café chantant, dates: 1908-01-01 to 1908-06-30


Processed query 'Davidssons norra paviljong' successfully.
Checkpoint saved: Year 1908, Half 0, Index 61
Processed query 'Källaren Nya Norrmalm vid Hötorget' successfully.
Checkpoint saved: Year 1908, Half 0, Index 62


2024-07-25 14:21:16,406 - INFO - Search results received. Hits: 0
2024-07-25 14:21:16,406 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:16,407 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:16,407 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:16,408 - INFO - Starting fetch_newspaper_data for query: Vinterträdgården, Valhalla, Mäster Samuelsgata 51, dates: 1908-01-01 to 1908-06-30


Processed query 'Café chantant' successfully.
Checkpoint saved: Year 1908, Half 0, Index 63


2024-07-25 14:21:16,708 - INFO - Search results received. Hits: 0
2024-07-25 14:21:16,709 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:16,710 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:16,711 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:16,712 - INFO - Starting fetch_newspaper_data for query: Valhalla, Mäster Samuelsgata 51, dates: 1908-01-01 to 1908-06-30


Processed query 'Vinterträdgården, Valhalla, Mäster Samuelsgata 51' successfully.
Checkpoint saved: Year 1908, Half 0, Index 64


2024-07-25 14:21:16,941 - INFO - Search results received. Hits: 0
2024-07-25 14:21:16,941 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:16,942 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:16,943 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:16,943 - INFO - Starting fetch_newspaper_data for query: Kungsholms kyrka, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:17,063 - INFO - Search results received. Hits: 0
2024-07-25 14:21:17,063 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:17,064 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:17,065 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:17,066 - INFO - Starting fetch_newspaper_data for query: Regeringsgatan 50, dates: 1908-01-01 to 1908-06-30


Processed query 'Valhalla, Mäster Samuelsgata 51' successfully.
Checkpoint saved: Year 1908, Half 0, Index 65
Processed query 'Kungsholms kyrka' successfully.
Checkpoint saved: Year 1908, Half 0, Index 66


2024-07-25 14:21:17,212 - INFO - Search results received. Hits: 0
2024-07-25 14:21:17,213 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:17,214 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:17,214 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:17,215 - INFO - Starting fetch_newspaper_data for query: Adolf Fredriks kyrka, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:17,357 - INFO - Search results received. Hits: 0
2024-07-25 14:21:17,357 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:17,358 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:17,359 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:17,360 - INFO - Starting fetch_newspaper_data for query: Nya teatern (Hammers salong), dates: 1908-01-01 to 1908-06-30


Processed query 'Regeringsgatan 50' successfully.
Checkpoint saved: Year 1908, Half 0, Index 67
Processed query 'Adolf Fredriks kyrka' successfully.
Checkpoint saved: Year 1908, Half 0, Index 68


2024-07-25 14:21:17,477 - INFO - Search results received. Hits: 0
2024-07-25 14:21:17,477 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:17,478 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:17,478 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:17,479 - INFO - Starting fetch_newspaper_data for query: Jakobs kyrka, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:17,613 - INFO - Search results received. Hits: 0
2024-07-25 14:21:17,613 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:17,614 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:17,614 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:17,615 - INFO - Starting fetch_newspaper_data for query: Berns salong, dates: 1908-01-01 to 1908-06-30


Processed query 'Nya teatern (Hammers salong)' successfully.
Checkpoint saved: Year 1908, Half 0, Index 69
Processed query 'Jakobs kyrka' successfully.
Checkpoint saved: Year 1908, Half 0, Index 70


2024-07-25 14:21:17,760 - INFO - Search results received. Hits: 0
2024-07-25 14:21:17,760 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:17,762 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:17,762 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:17,764 - INFO - Starting fetch_newspaper_data for query: Tyska kyrkan, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:17,862 - INFO - Search results received. Hits: 0
2024-07-25 14:21:17,863 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:17,864 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:17,865 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:17,866 - INFO - Starting fetch_newspaper_data for query: Manegen å K. Djurgården, dates: 1908-01-01 to 1908-06-30


Processed query 'Berns salong' successfully.
Checkpoint saved: Year 1908, Half 0, Index 71
Processed query 'Tyska kyrkan' successfully.
Checkpoint saved: Year 1908, Half 0, Index 72


2024-07-25 14:21:18,042 - INFO - Search results received. Hits: 0
2024-07-25 14:21:18,043 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:18,044 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:18,045 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:18,046 - INFO - Starting fetch_newspaper_data for query: Slöjdskolans stora Hörsal, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:18,172 - INFO - Search results received. Hits: 0
2024-07-25 14:21:18,172 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:18,173 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:18,173 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:18,175 - INFO - Starting fetch_newspaper_data for query: Valhalla, Mäster Samuelsgata 51, dates: 1908-01-01 to 1908-06-30


Processed query 'Manegen å K. Djurgården' successfully.
Checkpoint saved: Year 1908, Half 0, Index 73
Processed query 'Slöjdskolans stora Hörsal' successfully.
Checkpoint saved: Year 1908, Half 0, Index 74


2024-07-25 14:21:18,358 - INFO - Search results received. Hits: 0
2024-07-25 14:21:18,359 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:18,359 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:18,360 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:18,361 - INFO - Starting fetch_newspaper_data for query: Strömparterren, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:18,499 - INFO - Search results received. Hits: 0
2024-07-25 14:21:18,499 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:18,500 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:18,501 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:18,501 - INFO - Starting fetch_newspaper_data for query: Odeón-theatern, Regeringsgatan nr 28, dates: 1908-01-01 to 1908-06-30


Processed query 'Valhalla, Mäster Samuelsgata 51' successfully.
Checkpoint saved: Year 1908, Half 0, Index 75
Processed query 'Strömparterren' successfully.
Checkpoint saved: Year 1908, Half 0, Index 76


2024-07-25 14:21:18,684 - INFO - Search results received. Hits: 0
2024-07-25 14:21:18,684 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:18,685 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:18,686 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:18,687 - INFO - Starting fetch_newspaper_data for query: Katarina elementärskolas lokal, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:18,853 - INFO - Search results received. Hits: 0
2024-07-25 14:21:18,854 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:18,854 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:18,855 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:18,856 - INFO - Starting fetch_newspaper_data for query: Piperska Muren, dates: 1908-01-01 to 1908-06-30


Processed query 'Odeón-theatern, Regeringsgatan nr 28' successfully.
Checkpoint saved: Year 1908, Half 0, Index 77
Processed query 'Katarina elementärskolas lokal' successfully.
Checkpoint saved: Year 1908, Half 0, Index 78


2024-07-25 14:21:19,017 - INFO - Search results received. Hits: 0
2024-07-25 14:21:19,017 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:19,018 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:19,019 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:19,020 - INFO - Starting fetch_newspaper_data for query: Frimurarebarnhuset vid Christineberg, dates: 1908-01-01 to 1908-06-30


Processed query 'Piperska Muren' successfully.
Checkpoint saved: Year 1908, Half 0, Index 79


2024-07-25 14:21:19,279 - INFO - Search results received. Hits: 0
2024-07-25 14:21:19,280 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:19,281 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:19,281 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:19,282 - INFO - Starting fetch_newspaper_data for query: Dalarö kyrka, dates: 1908-01-01 to 1908-06-30
2024-07-25 14:21:19,412 - INFO - Search results received. Hits: 0
2024-07-25 14:21:19,413 - INFO - Extracted 0 URLs from search results
2024-07-25 14:21:19,413 - INFO - Table 'newspaper_data' created or already exists
2024-07-25 14:21:19,415 - INFO - Data processing completed. Total rows saved: 0
2024-07-25 14:21:19,417 - INFO - Starting fetch_newspaper_data for query: Tomteboda, strax bortom Carlberg, dates: 1908-01-01 to 1908-06-30


Processed query 'Frimurarebarnhuset vid Christineberg' successfully.
Checkpoint saved: Year 1908, Half 0, Index 80
Processed query 'Dalarö kyrka' successfully.
Checkpoint saved: Year 1908, Half 0, Index 81


KeyboardInterrupt: 

# Load SQL Database into Dataframe

In [None]:
import sqlite3
import pandas as pd

# Connect to the SQLite database
conn = sqlite3.connect(db_path)

# SQL query to select all columns except 'Raw API Result'
query = """
SELECT Date, [Package ID], Part, Page, [ComposedBlock Content], [Full Prompt]
FROM newspaper_data
"""

# Read the query results into a pandas DataFrame
df = pd.read_sql_query(query, conn)

# Close the database connection
conn.close()

# Display the first few rows of the DataFrame
display(df.head())


# Optional: If you want to see all rows, you can use:
# pd.set_option('display.max_rows', None)
# print(df)

# Optional: If you want to save this to a CSV file for further analysis:
# df.to_csv('newspaper_data_summary.csv', index=False)

# Experimental: Delete Duplicate Rows based on ComposedBlock Content

In [None]:
import sqlite3
from sqlalchemy import create_engine

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
engine = create_engine(f'sqlite:///{db_path}', echo=False)

# Remove duplicates based on '[ComposedBlock Content]' column
df = df.drop_duplicates(subset='[ComposedBlock Content]')

# Update the existing table with the deduplicated DataFrame
df.to_sql('newspaper_data', engine, if_exists='replace', index=False)

# Commit the changes and close the connection
conn.commit()
conn.close()

print(f"Deduplicated data committed to the 'newspaper_data' table. Rows: {len(df)}")
