In [0]:
%python
from bs4 import BeautifulSoup
import requests
from kafka import KafkaProducer
import json
from datetime import datetime, timedelta

def get_table_heading(table):
    '''Extracting Table Heading From Webpage'''

    heading = list()
    thead = table.find('thead').find('tr').find_all('th')[1:7]
    for th in thead:
        heading.append(th.text)

    return heading


def extract_crypto_data(table):
    '''Extracting Crypto Data From Table'''

    crypto_list = list()
    tbody = table.find('tbody').find_all('tr')
    for tr in tbody:
        tds = tr.find_all('td')[1:7]

        crypto_list.append((
            tds[0].text,
            tds[1].find('span').text,
            tds[2].find('div').text,
            tds[3].text,
            tds[4].text,
            tds[5].text,))

    return crypto_list



url = 'https://crypto.com/price'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table')

heading = get_table_heading(table)
crypto_data = extract_crypto_data(table)

# Kafka configuration
bootstrap_servers = 'kafka-broker:29092'  # Replace with your Kafka broker's address

# Create Kafka producer
producer = KafkaProducer(bootstrap_servers=bootstrap_servers,
                         value_serializer=lambda v: json.dumps(v).encode('utf-8'))

topic = 'source_topic-001'  # Replace with your Kafka topic name

# Produce data to Kafka topic with adjusted datetime and modified keys
for data_row in crypto_data:
    data_dict = dict(zip(heading, data_row))
    data_dict.pop('#', None)  # Remove '#' field if present
    adjusted_datetime = datetime.now() + timedelta(hours=1)  # Subtract an hour from current datetime
    data_dict['Datetime'] = adjusted_datetime.isoformat()  # Add adjusted datetime
    
    # Modify keys with spaces to use underscores
    modified_data_dict = {key.replace(' ', '_'): value for key, value in data_dict.items()}
    
    producer.send(topic, value=modified_data_dict)
print(modified_data_dict)
# Close the producer
producer.close()