In [None]:
#Libraries
import pandas as pd
import sqlite3
import pickle
from kafka import KafkaProducer, KafkaConsumer
import json
import random
import time

import warnings
warnings.filterwarnings("ignore")

In [118]:
# Kafka settings
bootstrap_servers = ['localhost:9092']  # Kafka server address
topic = 'predictions_topic'  # Kafka topic to send predictions

# Function to create Kafka producer
def create_kafka_producer():
    return KafkaProducer(
        bootstrap_servers=bootstrap_servers,
        value_serializer=lambda v: json.dumps(v).encode('utf-8'), # Serialize data to JSON format
        api_version=(0, 10, 1)
    )

In [120]:
def load_models():
    # Load the linear regression model
    with open('happiness_model_linear.pkl', 'rb') as f:
        linear_regression_model = pickle.load(f)

    # Load the decision tree model
    with open('happiness_model_tree.pkl', 'rb') as f:
        decision_tree_model = pickle.load(f)

    return linear_regression_model, decision_tree_model

In [165]:
# Function to create SQLite database and table
def create_sqlite_db():
    conn = sqlite3.connect('predictions.db')  # Connect to SQLite database (creates the file if it doesn't exist)
    c = conn.cursor()
    c.execute('DROP TABLE IF EXISTS predictions;')
    # Create the 'predictions' table if it doesn't exist
    c.execute('''
        CREATE TABLE IF NOT EXISTS predictions (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            features TEXT,  -- To store the features as a JSON string
            linear_prediction REAL,
            tree_prediction REAL
        )
    ''')
    conn.commit()
    conn.close()
    print("Database and table created.")

In [150]:
# Function to insert data into the database
def insert_prediction_to_db(features_json, linear_prediction, tree_prediction):
    conn = sqlite3.connect('predictions.db')
    c = conn.cursor()
    
    # Insert the features and predictions into the table
    c.execute('''
        INSERT INTO predictions (features, linear_prediction, tree_prediction)
        VALUES (?, ?, ?)
    ''', (features_json, linear_prediction, tree_prediction))
    
    conn.commit()
    conn.close()
    print(f"Data inserted into database: Features={features_json}, Linear Prediction={linear_prediction}, Tree Prediction={tree_prediction}")

In [146]:
# Function to send data to Kafka and store it in SQLite
def send_data_to_kafka(data, linear_regression_model, decision_tree_model, producer):
    for i in range(len(data)):
        # Extract the features (without the target column)
        features = data.iloc[i].drop('Happiness Score').to_list()

        # Make predictions with both models
        linear_prediction = linear_regression_model.predict([features])[0]
        tree_prediction = decision_tree_model.predict([features])[0]

        features_json = json.dumps(features)

        # Create a message to send to Kafka, converting features to JSON
        message = {
            'features': features_json,  # Convert features to JSON string
            'linear_prediction': linear_prediction,
            'tree_prediction': tree_prediction
        }

        # Send the message to Kafka
        producer.send('predictions_topic', value=message)
        producer.flush()  # Ensure the message is sent immediately

        print(f"Sent to Kafka: {message}")

        # Also insert the data into SQLite database
        insert_prediction_to_db(features_json, linear_prediction, tree_prediction)

In [140]:
# Function to consume data from Kafka and store it in SQLite
def consume_data_from_kafka_and_store():
    consumer = KafkaConsumer(
        'predictions_topic',  # The Kafka topic to consume from
        bootstrap_servers=bootstrap_servers,
        group_id='prediction_group',  # Consumer group ID
        value_deserializer=lambda m: json.loads(m.decode('utf-8'))  # Deserialize data from JSON
    )

    for message in consumer:
        data = message.value
        features = data['features']
        linear_prediction = data['linear_prediction']
        tree_prediction = data['tree_prediction']

        features_json = json.dumps(features)

        # Insert the received data into the SQLite database
        insert_prediction_to_db(features_json, linear_prediction, tree_prediction)
        print(f"Data consumed and stored: {data}")

In [171]:
def main():
    # Create the SQLite database and table
    create_sqlite_db()

    # Load the models
    linear_regression_model, decision_tree_model = load_models()

    # Create a Kafka producer
    producer = create_kafka_producer()

    # Assuming final_data is your DataFrame that contains the features and target
    data_test = pd.read_csv('data_test.csv')  # Load your dataset here

    # Send data to Kafka and store it in SQLite
    send_data_to_kafka(data_test, linear_regression_model, decision_tree_model, producer)

    # Start consuming data from Kafka and storing it in SQLite
    consume_data_from_kafka_and_store()

if __name__ == '__main__':
    main()

Database and table created.




Sent to Kafka: {'features': '[0.308, 0.391, 0.452]', 'linear_prediction': 4.442772734340008, 'tree_prediction': 4.395}
Data inserted into database: Features=[0.308, 0.391, 0.452], Linear Prediction=4.442772734340008, Tree Prediction=4.395
Sent to Kafka: {'features': '[0.874, 0.365, 0.519]', 'linear_prediction': 5.305658955137872, 'tree_prediction': 4.574}
Data inserted into database: Features=[0.874, 0.365, 0.519], Linear Prediction=5.305658955137872, Tree Prediction=4.574
Sent to Kafka: {'features': '[0.97306, 0.68613, 0.4027]', 'linear_prediction': 5.53044459873677, 'tree_prediction': 4.788}
Data inserted into database: Features=[0.97306, 0.68613, 0.4027], Linear Prediction=5.53044459873677, Tree Prediction=4.788
Sent to Kafka: {'features': '[1.15851, 0.3494, 0.28098]', 'linear_prediction': 5.109296856005871, 'tree_prediction': 4.49700021743774}
Data inserted into database: Features=[1.15851, 0.3494, 0.28098], Linear Prediction=5.109296856005871, Tree Prediction=4.49700021743774
Sent



Sent to Kafka: {'features': '[1.0088, 0.69805, 0.30033]', 'linear_prediction': 5.354214619375032, 'tree_prediction': 5.483}
Data inserted into database: Features=[1.0088, 0.69805, 0.30033], Linear Prediction=5.354214619375032, Tree Prediction=5.483
Sent to Kafka: {'features': '[1.20813, 0.92356, 0.40672]', 'linear_prediction': 6.115350468752444, 'tree_prediction': 5.762}
Data inserted into database: Features=[1.20813, 0.92356, 0.40672], Linear Prediction=6.115350468752444, Tree Prediction=5.762
Sent to Kafka: {'features': '[1.44024, 0.65696, 0.47375]', 'linear_prediction': 6.270288777803186, 'tree_prediction': 6.375}
Data inserted into database: Features=[1.44024, 0.65696, 0.47375], Linear Prediction=6.270288777803186, Tree Prediction=6.375
Sent to Kafka: {'features': '[0.77042, 0.57407, 0.53206]', 'linear_prediction': 5.437894702683175, 'tree_prediction': 4.796}
Data inserted into database: Features=[0.77042, 0.57407, 0.53206], Linear Prediction=5.437894702683175, Tree Prediction=4.79



Data inserted into database: Features=[1.35948, 0.88645, 0.25168], Linear Prediction=5.912607630238028, Tree Prediction=6.09800004959106
Sent to Kafka: {'features': '[0.359, 0.614, 0.555]', 'linear_prediction': 4.999910347713511, 'tree_prediction': 4.286}
Data inserted into database: Features=[0.359, 0.614, 0.555], Linear Prediction=4.999910347713511, Tree Prediction=4.286
Sent to Kafka: {'features': '[1.53062355518341, 0.590148329734802, 0.449750572443008]', 'linear_prediction': 6.256920753297135, 'tree_prediction': 5.458}
Data inserted into database: Features=[1.53062355518341, 0.590148329734802, 0.449750572443008], Linear Prediction=6.256920753297135, Tree Prediction=5.458
Sent to Kafka: {'features': '[1.004, 0.802, 0.39]', 'linear_prediction': 5.67284858322175, 'tree_prediction': 5.813}
Data inserted into database: Features=[1.004, 0.802, 0.39], Linear Prediction=5.67284858322175, Tree Prediction=5.813
Sent to Kafka: {'features': '[0.65435, 0.16007, 0.34334]', 'linear_prediction': 



Data inserted into database: Features=[1.684, 0.871, 0.555], Linear Prediction=7.01828775953192, Tree Prediction=6.57200002670288
Sent to Kafka: {'features': '[1.002, 0.785, 0.086]', 'linear_prediction': 4.949156346046598, 'tree_prediction': 4.806}
Data inserted into database: Features=[1.002, 0.785, 0.086], Linear Prediction=4.949156346046598, Tree Prediction=4.806
Sent to Kafka: {'features': '[0.741, 0.851, 0.543]', 'linear_prediction': 5.738846305489078, 'tree_prediction': 5.86}
Data inserted into database: Features=[0.741, 0.851, 0.543], Linear Prediction=5.738846305489078, Tree Prediction=5.86
Sent to Kafka: {'features': '[0.259, 0.253, 0.434]', 'linear_prediction': 4.1808613897745275, 'tree_prediction': 3.587}
Data inserted into database: Features=[0.259, 0.253, 0.434], Linear Prediction=4.1808613897745275, Tree Prediction=3.587
Sent to Kafka: {'features': '[0.885416388511658, 0.495879292488098, 0.501537680625916]', 'linear_prediction': 5.428643301583756, 'tree_prediction': 5.196



Data inserted into database: Features=[0.562, 0.723, 0.527], Linear Prediction=5.323456764614805, Tree Prediction=5.103
Sent to Kafka: {'features': '[1.264, 0.946, 0.281]', 'linear_prediction': 5.923381380206491, 'tree_prediction': 5.984}
Data inserted into database: Features=[1.264, 0.946, 0.281], Linear Prediction=5.923381380206491, Tree Prediction=5.984
Sent to Kafka: {'features': '[1.07062232494354, 0.595027923583984, 0.477487415075302]', 'linear_prediction': 5.726944591991824, 'tree_prediction': 5.62900018692017}
Data inserted into database: Features=[1.07062232494354, 0.595027923583984, 0.477487415075302], Linear Prediction=5.726944591991824, Tree Prediction=5.62900018692017
Sent to Kafka: {'features': '[0.35997, 0.56874, 0.38282]', 'linear_prediction': 4.552358677775253, 'tree_prediction': 4.286}
Data inserted into database: Features=[0.35997, 0.56874, 0.38282], Linear Prediction=4.552358677775253, Tree Prediction=4.286
Sent to Kafka: {'features': '[0.026, 0.105, 0.225]', 'linea



Sent to Kafka: {'features': '[0.93383, 0.70766, 0.09511]', 'linear_prediction': 4.793634129324339, 'tree_prediction': 4.949}
Data inserted into database: Features=[0.93383, 0.70766, 0.09511], Linear Prediction=4.793634129324339, Tree Prediction=4.949
Sent to Kafka: {'features': '[0.47428, 0.65088, 0.43477]', 'linear_prediction': 4.9144223783214755, 'tree_prediction': 4.996}
Data inserted into database: Features=[0.47428, 0.65088, 0.43477], Linear Prediction=4.9144223783214755, Tree Prediction=4.996
Sent to Kafka: {'features': '[0.75216, 0.05108, 0.27854]', 'linear_prediction': 4.2357512357501825, 'tree_prediction': 4.212}
Data inserted into database: Features=[0.75216, 0.05108, 0.27854], Linear Prediction=4.2357512357501825, Tree Prediction=4.212
Sent to Kafka: {'features': '[1.116, 0.726, 0.528]', 'linear_prediction': 6.051222882326673, 'tree_prediction': 6.545}
Data inserted into database: Features=[1.116, 0.726, 0.528], Linear Prediction=6.051222882326673, Tree Prediction=6.545
Sent



Data inserted into database: Features=[0.38227, 0.17344, 0.1643], Linear Prediction=3.6286804948877487, Tree Prediction=4.193
Sent to Kafka: {'features': '[1.22943, 0.57386, 0.4052]', 'linear_prediction': 5.743040842517043, 'tree_prediction': 5.82499980926514}
Data inserted into database: Features=[1.22943, 0.57386, 0.4052], Linear Prediction=5.743040842517043, Tree Prediction=5.82499980926514
Sent to Kafka: {'features': '[1.09426, 0.34752, 0.44089]', 'linear_prediction': 5.392589293135142, 'tree_prediction': 5.615}
Data inserted into database: Features=[1.09426, 0.34752, 0.44089], Linear Prediction=5.392589293135142, Tree Prediction=5.615
Sent to Kafka: {'features': '[1.32548, 0.87464, 0.64938]', 'linear_prediction': 6.7730222497379815, 'tree_prediction': 7.441}
Data inserted into database: Features=[1.32548, 0.87464, 0.64938], Linear Prediction=6.7730222497379815, Tree Prediction=7.441
Sent to Kafka: {'features': '[1.181, 0.999, 0.067]', 'linear_prediction': 5.381270212518071, 'tree_



Sent to Kafka: {'features': '[0.557, 0.292, 0.129]', 'linear_prediction': 3.9093697120279596, 'tree_prediction': 3.59299993515015}
Data inserted into database: Features=[0.557, 0.292, 0.129], Linear Prediction=3.9093697120279596, Tree Prediction=3.59299993515015
Sent to Kafka: {'features': '[1.26074862480164, 0.638566970825195, 0.325707912445068]', 'linear_prediction': 5.673724437684646, 'tree_prediction': 5.90199995040894}
Data inserted into database: Features=[1.26074862480164, 0.638566970825195, 0.325707912445068], Linear Prediction=5.673724437684646, Tree Prediction=5.90199995040894
Sent to Kafka: {'features': '[1.148, 0.686, 0.324]', 'linear_prediction': 5.576620090799887, 'tree_prediction': 5.098}
Data inserted into database: Features=[1.148, 0.686, 0.324], Linear Prediction=5.576620090799887, Tree Prediction=5.098
Sent to Kafka: {'features': '[0.737299203872681, 0.653095960617065, 0.447551846504211]', 'linear_prediction': 5.289249069223433, 'tree_prediction': 5.254}
Data inserte



Data inserted into database: Features=[0.87616, 0.45569, 0.51231], Linear Prediction=5.395872821208753, Tree Prediction=4.574
Sent to Kafka: {'features': '[1.383, 0.996, 0.592]', 'linear_prediction': 6.853147728205681, 'tree_prediction': 7.494}
Data inserted into database: Features=[1.383, 0.996, 0.592], Linear Prediction=6.853147728205681, Tree Prediction=7.494
Sent to Kafka: {'features': '[0.1327, 0.26162, 0.38041]', 'linear_prediction': 3.902306857169875, 'tree_prediction': 4.02799987792969}
Data inserted into database: Features=[0.1327, 0.26162, 0.38041], Linear Prediction=3.902306857169875, Tree Prediction=4.02799987792969
Sent to Kafka: {'features': '[0.091, 0.145, 0.065]', 'linear_prediction': 2.9875611008250025, 'tree_prediction': 2.904999985694885}
Data inserted into database: Features=[0.091, 0.145, 0.065], Linear Prediction=2.9875611008250025, Tree Prediction=2.904999985694885
Sent to Kafka: {'features': '[0.54649, 0.40064, 0.35571]', 'linear_prediction': 4.54224744408539, '



Data inserted into database: Features=[1.124, 0.891, 0.523], Linear Prediction=6.237217796216569, Tree Prediction=6.489
Sent to Kafka: {'features': '[1.44357192516327, 0.80915766954422, 0.617950856685638]', 'linear_prediction': 6.780123318221995, 'tree_prediction': 6.901}
Data inserted into database: Features=[1.44357192516327, 0.80915766954422, 0.617950856685638], Linear Prediction=6.780123318221995, Tree Prediction=6.901
Sent to Kafka: {'features': '[1.067, 0.789, 0.235]', 'linear_prediction': 5.3823880623373235, 'tree_prediction': 4.813}
Data inserted into database: Features=[1.067, 0.789, 0.235], Linear Prediction=5.3823880623373235, Tree Prediction=4.813
Sent to Kafka: {'features': '[0.4225, 0.23402, 0.49309]', 'linear_prediction': 4.508849387921286, 'tree_prediction': 3.484}
Data inserted into database: Features=[0.4225, 0.23402, 0.49309], Linear Prediction=4.508849387921286, Tree Prediction=3.484
Sent to Kafka: {'features': '[1.017, 0.417, 0.557]', 'linear_prediction': 5.6387321



NoBrokersAvailable: NoBrokersAvailable