In [2]:
import sqlite3
from transformers import pipeline
from tqdm import tqdm
import logging

# Konfigurieren Sie das Logging
logging.basicConfig(filename='emotion_analysis.log', level=logging.ERROR, format='%(asctime)s %(levelname)s:%(message)s')

def create_emotions_table(conn):
    cursor = conn.cursor()
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS Emotions (
        emotionID INTEGER PRIMARY KEY,
        emotion TEXT UNIQUE
    )
    ''')
    conn.commit()
    
    emotions = [
        (0, 'Wut'),
        (1, 'Furcht'),
        (2, 'Ekel'),
        (3, 'Trauer'),
        (4, 'Freude'),
        (5, 'keine Emotion')
    ]
    cursor.executemany('''
    INSERT OR IGNORE INTO Emotions (emotionID, emotion)
    VALUES (?, ?)
    ''', emotions)
    conn.commit()

def create_article_comment_emotions_table(conn):
    cursor = conn.cursor()
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS Article_comment_emotions (
        articleID INTEGER,
        commentID INTEGER,
        emotionID INTEGER,
        PRIMARY KEY (articleID, commentID),
        FOREIGN KEY(articleID) REFERENCES Articles(articleID),
        FOREIGN KEY(commentID) REFERENCES comments(commentID),
        FOREIGN KEY(emotionID) REFERENCES Emotions(emotionID)
    )
    ''')
    conn.commit()

def process_comments_emotion_analysis(conn, batch_size=100):
    cursor = conn.cursor()
    
    # Abrufen aller Kommentare
    cursor.execute('SELECT commentID, articleID, comment_header, comment FROM comments')
    all_comments = cursor.fetchall()
    
    # Mapping der Labels zu Emotionen
    emotion_label_mapping = {
        'LABEL_0': (0, 'Wut'),
        'LABEL_1': (1, 'Furcht'),
        'LABEL_2': (2, 'Ekel'),
        'LABEL_3': (3, 'Trauer'),
        'LABEL_4': (4, 'Freude'),
        'LABEL_5': (5, 'keine Emotion')
    }
    
    # Lade die Emotionserkennungs-Pipeline
    emotion_pipeline = pipeline("text-classification", model="visegradmedia-emotion/Emotion_RoBERTa_german6_v7")
    
    # Verarbeitung in Batches
    for i in tqdm(range(0, len(all_comments), batch_size), desc='Processing Comments'):
        batch = all_comments[i:i+batch_size]
        texts = []
        comment_ids = []
        article_ids = []
        indices = []  # Zum Nachverfolgen der Originalindizes im Batch
        
        for idx, comment in enumerate(batch):
            commentID, articleID, comment_header, comment_body = comment
            # Kombinieren von comment_header und comment_body
            text_parts = []
            if comment_header and isinstance(comment_header, str):
                text_parts.append(comment_header.strip())
            if comment_body and isinstance(comment_body, str):
                text_parts.append(comment_body.strip())
            text = ' '.join(text_parts)
            # Falls der Text leer ist, überspringen
            if not text:
                continue
            texts.append(text)
            comment_ids.append(commentID)
            article_ids.append(articleID)
            indices.append(idx)
        
        if not texts:
            continue  # Keine gültigen Texte in diesem Batch
        
        # Emotionserkennung durchführen
        try:
            results = emotion_pipeline(texts)
        except Exception as e:
            logging.error(f"Fehler bei der Verarbeitung des Batches ab Index {i}: {e}")
            continue  # Überspringen Sie diesen Batch
        
        # Vorbereitung der Daten zum Einfügen
        data_to_insert = []
        for idx_in_results, result in enumerate(results):
            score = result['score']
            label = result['label']
            original_idx = indices[idx_in_results]
            commentID = comment_ids[idx_in_results]
            articleID = article_ids[idx_in_results]
            
            if score >= 0.95:
                emotionID, emotion = emotion_label_mapping.get(label, (5, 'keine Emotion'))
            else:
                # Score unter 95%, daher "keine Emotion"
                emotionID, emotion = (5, 'keine Emotion')
            
            data_to_insert.append((articleID, commentID, emotionID))
        
        # Einfügen der Ergebnisse in die Datenbank
        try:
            cursor.executemany('''
            INSERT OR REPLACE INTO Article_comment_emotions (articleID, commentID, emotionID)
            VALUES (?, ?, ?)
            ''', data_to_insert)
            conn.commit()
        except sqlite3.Error as e:
            logging.error(f"SQLite Fehler beim Einfügen der Emotionen: {e}")
            conn.rollback()

def main():
    # Verbindung zur Datenbank herstellen
    conn = sqlite3.connect('derstandard.db')
    
    # Tabellen neu erstellen
    create_emotions_table(conn)
    create_article_comment_emotions_table(conn)
    
    # Emotionserkennung auf Kommentare anwenden
    process_comments_emotion_analysis(conn, batch_size=100)
    
    # Verbindung schließen
    conn.close()

if __name__ == "__main__":
    main()


Processing Comments: 100%|██████████| 34/34 [01:57<00:00,  3.46s/it]
