In [1]:
import sqlite3

NEW_SQLITE_DATABASE = 'derstandard.db'

def create_new_database_and_tables():
    conn = sqlite3.connect(NEW_SQLITE_DATABASE)
    cursor = conn.cursor()
    
    # Tabelle Urls erstellen
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS Urls (
        urlID INTEGER PRIMARY KEY AUTOINCREMENT,
        url TEXT UNIQUE,
        pubdate DATE,
        download_date DATE,
        success BOOLEAN
    )
    ''')
    
    # Tabelle Failed erstellen
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS Failed (
        failedID INTEGER PRIMARY KEY AUTOINCREMENT,
        urlID INTEGER,
        date DATE,
        errormsg TEXT,
        success BOOLEAN,
        FOREIGN KEY(urlID) REFERENCES Urls(urlID)
    )
    ''')
    
    # Tabelle Articles erstellen
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS Articles (
        articleID INTEGER PRIMARY KEY AUTOINCREMENT,
        urlID INTEGER,
        kicker TEXT,
        title TEXT,
        subtitle TEXT,
        author TEXT,
        datetime DATETIME,
        article_text TEXT,
        FOREIGN KEY(urlID) REFERENCES Urls(urlID)
    )
    ''')
    
    # Weitere Tabellen erstellen (falls erforderlich)
    
    conn.commit()
    conn.close()

# Aufrufen der Funktion
create_new_database_and_tables()


In [3]:
def migrate_data_to_new_database(old_db_path, new_db_path):
    old_conn = sqlite3.connect(old_db_path)
    old_cursor = old_conn.cursor()
    
    new_conn = sqlite3.connect(new_db_path)
    new_cursor = new_conn.cursor()
    
    # Migration der URLs
    old_cursor.execute('SELECT URL_ID, URL, publication_date, url_download_date FROM urls')
    urls = old_cursor.fetchall()
    url_mapping = {}  # Mapping von altem URL_ID zu neuem urlID
    
    for row in urls:
        old_url_id = row[0]
        url = row[1]
        pubdate = row[2]
        download_date = row[3]
        success = download_date is not None  # Erfolg basiert auf download_date
        
        new_cursor.execute('''
            INSERT INTO Urls (url, pubdate, download_date, success)
            VALUES (?, ?, ?, ?)
        ''', (url, pubdate, download_date, success))
        
        new_url_id = new_cursor.lastrowid
        url_mapping[old_url_id] = new_url_id
    
    new_conn.commit()
    
    # Migration der Artikel
    old_cursor.execute('SELECT url, kicker, title, subtitle, author, datetime, article_text FROM articles')
    articles = old_cursor.fetchall()
    
    for row in articles:
        url = row[0]
        kicker = row[1]
        title = row[2]
        subtitle = row[3]
        author = row[4]
        datetime_value = row[5]
        article_text = row[6]
        
        # urlID aus neuer Urls-Tabelle abrufen
        new_cursor.execute('SELECT urlID FROM Urls WHERE url = ?', (url,))
        result = new_cursor.fetchone()
        if result:
            urlID = result[0]
            
            new_cursor.execute('''
                INSERT INTO Articles (urlID, kicker, title, subtitle, author, datetime, article_text)
                VALUES (?, ?, ?, ?, ?, ?, ?)
            ''', (urlID, kicker, title, subtitle, author, datetime_value, article_text))
        else:
            print(f"URL nicht in neuer Urls-Tabelle gefunden: {url}")
    
    new_conn.commit()
    
    # Migration der Fehler
    old_cursor.execute('SELECT url, date, exception FROM failed')
    failed_entries = old_cursor.fetchall()
    
    for row in failed_entries:
        url = row[0]
        date = row[1]
        exception_msg = row[2]
        success = False  # Fehlerhafte Einträge sind nicht erfolgreich
        
        new_cursor.execute('SELECT urlID FROM Urls WHERE url = ?', (url,))
        result = new_cursor.fetchone()
        if result:
            urlID = result[0]
            
            new_cursor.execute('''
                INSERT INTO Failed (urlID, date, errormsg, success)
                VALUES (?, ?, ?, ?)
            ''', (urlID, date, exception_msg, success))
        else:
            print(f"URL nicht in neuer Urls-Tabelle gefunden: {url}")
    
    new_conn.commit()
    
    old_conn.close()
    new_conn.close()

# Aufrufen der Migrationsfunktion
old_db_path = 'webscraping_derstandard.db'
new_db_path = 'derstandard.db'
migrate_data_to_new_database(old_db_path, new_db_path)

URL nicht in neuer Urls-Tabelle gefunden: /https://www.derstandard.at/jetzt/livebericht/98812/suzuka-international-racing-course---wurz-im-brasilianer-sandwich
URL nicht in neuer Urls-Tabelle gefunden: /https://www.derstandard.at/jetzt/livebericht/90910/und-eddie-irvine-gewinnt-sepang-doch
URL nicht in neuer Urls-Tabelle gefunden: /https://www.derstandard.at/jetzt/livebericht/47675/haekkinen-gewinnt---irvine-ist-die-zumindest-partielle-sonnenfinsternis-von-michael-schumacher-so-hp
URL nicht in neuer Urls-Tabelle gefunden: /https://www.derstandard.at/jetzt/livebericht/50173/coulthard-der-transportunternehmerssohn-gewinnt
URL nicht in neuer Urls-Tabelle gefunden: /https://www.derstandard.at/jetzt/livebericht/44122/irvine-gewinnt-abermals---hockenheim-ist-natuerlich-jetzt-ein-irrenhaus-so-hp
URL nicht in neuer Urls-Tabelle gefunden: /https://www.derstandard.at/jetzt/livebericht/41924/irvine-der-gebrauchtwagenhaendlerssohn-gewinnt---toll-der-mann-mit-dem-tankschlauch
URL nicht in neuer Url