In [1]:
from confluent_kafka import Consumer, KafkaError
import psycopg2
import json
import os

# Configuration du consommateur Kafka
conf = {
    'bootstrap.servers': 'host.docker.internal:9092',  # Adresse de votre broker Kafka
    'group.id': 'my_group',                           # Identifiant du groupe de consommateurs
    'auto.offset.reset': 'earliest'                   # Lire depuis le d√©but si aucun offset n'existe
}

# Cr√©ation du consommateur Kafka
consumer = Consumer(conf)

# Connexion √† PostgreSQL
pg_conf = {
    'host': 'postgres',  # ou 'localhost' si vous ne l'ex√©cutez pas dans Docker
    'port': '5432',
    'database': os.getenv('DB_NAME'),
    'user': os.getenv('DB_USER'),
    'password': os.getenv('DB_PASSWORD')
}

try:
    # √âtablir une connexion √† PostgreSQL
    print("Connexion √† PostgreSQL...")
    conn = psycopg2.connect(**pg_conf)
    cursor = conn.cursor()

    # Cr√©er une table pour les donn√©es brutes (Bronze)
    print("Cr√©ation de la table bronze_mastodon si elle n'existe pas d√©j√†...")
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bronze_mastodon (
        id BIGINT PRIMARY KEY,
        content TEXT,
        username TEXT,
        reblogs_count BIGINT,
        favourites_count BIGINT,
        replies_count BIGINT,
        language TEXT,
        url TEXT,
        media_url TEXT,
        media_preview_url TEXT,
        hashtags TEXT,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )
    """)
    conn.commit()
    print("Table bronze_mastodon pr√™te.")

    # Souscription au topic Kafka
    consumer.subscribe(['mastodon_stream'])
    print("Souscription au topic 'mastodon_stream' r√©ussie.")

    while True:
        # Poll pour les nouveaux contenus
        print("Polling Kafka pour de nouveaux contenus...")
        msg = consumer.poll(1.0)  # Attendre jusqu'√† 1 seconde pour un contenu

        if msg is None:
            continue
        if msg.error():
            print(f"Erreur Kafka: {msg.error()}")
            if msg.error().code() == KafkaError._PARTITION_EOF:
                continue
            else:
                print(f"Erreur irr√©cup√©rable: {msg.error()}")
                break

        try:
            # D√©coder le contenu re√ßu de Kafka
            content_value = msg.value().decode('utf-8')
            print(f"Contenu re√ßu de Kafka : {content_value}")
            content_json = json.loads(content_value)

            # R√©cup√©rer les donn√©es importantes du contenu
            content_id = content_json.get('id')
            content_created_at = content_json.get('created_at')
            content = content_json.get('content')
            username = content_json.get('username')
            reblogs_count = content_json.get('reblogs_count', 0)
            favourites_count = content_json.get('favourites_count', 0)
            replies_count = content_json.get('replies_count', 0)
            language = content_json.get('language')
            url = content_json.get('url')

            # R√©cup√©rer les m√©dias s'ils sont pr√©sents
            media_url = None
            media_preview_url = None
            if 'media_attachments' in content_json and content_json['media_attachments']:
                media_url = content_json['media_attachments'][0].get('url')
                media_preview_url = content_json['media_attachments'][0].get('preview_url')

            # R√©cup√©rer les hashtags s'ils sont pr√©sents (sous forme de cha√Æne)
            hashtags = content_json.get('hashtags', '')  # On r√©cup√®re directement comme cha√Æne

            # Afficher le contenu re√ßu
            print(f"ID: {content_id}, Content: {content}, Hashtags: {hashtags}")

            # V√©rifier si l'identifiant existe d√©j√† dans la base de donn√©es
            cursor.execute("""
            SELECT 1 FROM bronze_mastodon WHERE id = %s
            """, (content_id,))
            exists = cursor.fetchone()

            if not exists:
                # Ins√©rer le contenu dans la table bronze
                cursor.execute("""
                INSERT INTO bronze_mastodon (id, created_at, content, username, reblogs_count, favourites_count, replies_count, language, url, media_url, media_preview_url, hashtags) 
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
                """, (content_id, content_created_at, content, username, reblogs_count, favourites_count, replies_count, language, url, media_url, media_preview_url, hashtags))
                conn.commit()
                print(f"Contenu ins√©r√© dans la base de donn√©es : {content}")
            else:
                # Mettre √† jour les colonnes counts et autres informations si le contenu existe d√©j√†
                cursor.execute("""
                UPDATE bronze_mastodon 
                SET created_at = %s, content = %s, username = %s, reblogs_count = %s, favourites_count = %s, replies_count = %s, language = %s, url = %s, media_url = %s, media_preview_url = %s, hashtags = %s
                WHERE id = %s;
                """, (content_created_at, content, username, reblogs_count, favourites_count, replies_count, language, url, media_url, media_preview_url, hashtags, content_id))
                conn.commit()
                print(f"Contenu mis √† jour dans la base de donn√©es : {content}")

        except json.JSONDecodeError as e:
            print(f"Erreur lors du d√©codage du contenu JSON : {e}")
        except Exception as e:
            print(f"Erreur inattendue : {e}")

except KeyboardInterrupt:
    print("Interruption par l'utilisateur.")

except Exception as e:
    print(f"Erreur g√©n√©rale : {e}")

finally:
    # Fermer les connexions
    print("Fermeture des connexions...")
    consumer.close()
    if cursor:
        cursor.close()
    if conn:
        conn.close()
    print("Connexions ferm√©es.")

Connexion √† PostgreSQL...
Cr√©ation de la table bronze_mastodon si elle n'existe pas d√©j√†...
Table bronze_mastodon pr√™te.
Souscription au topic 'mastodon_stream' r√©ussie.
Polling Kafka pour de nouveaux contenus...
Polling Kafka pour de nouveaux contenus...
Contenu re√ßu de Kafka : {"id": 113268368177621053, "created_at": "2024-10-07 21:55:24+00:00", "content": "<p>\ud83d\udd25New Wildfire: E-1 Fire / West of Logan Mountain / California <a href=\"https://m.ai6yr.org/tags/E\" class=\"mention hashtag\" rel=\"nofollow noopener noreferrer\" target=\"_blank\">#<span>E</span></a>-1Fire <a href=\"https://share.watchduty.org/i/35788\" rel=\"nofollow noopener noreferrer\" translate=\"no\" target=\"_blank\"><span class=\"invisible\">https://</span><span class=\"\">share.watchduty.org/i/35788</span><span class=\"invisible\"></span></a> <a href=\"https://m.ai6yr.org/tags/firewx\" class=\"mention hashtag\" rel=\"nofollow noopener noreferrer\" target=\"_blank\">#<span>firewx</span></a></p>", "us