In [1]:
from kafka import KafkaConsumer
import json
from pyspark.sql import SparkSession
from pyspark.ml.feature import Imputer, StringIndexer, OneHotEncoder, VectorAssembler, StandardScaler
from pyspark.ml import Pipeline, PipelineModel
from pyspark.sql.types import StructType, StructField, DoubleType, StringType, IntegerType

# Initialize Spark session
spark = SparkSession.builder \
    .appName("LoanPredictionConsumer") \
    .getOrCreate()

24/12/27 13:57:02 WARN Utils: Your hostname, dtdat resolves to a loopback address: 127.0.1.1; using 192.168.2.12 instead (on interface wlp0s20f3)
24/12/27 13:57:02 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/12/27 13:57:02 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [2]:
spark

In [3]:
schema = StructType([
    StructField("LoanID", StringType(), True),
    StructField("Age", IntegerType(), True),
    StructField("Income", IntegerType(), True),
    StructField("LoanAmount", IntegerType(), True),
    StructField("CreditScore", IntegerType(), True),
    StructField("MonthsEmployed", IntegerType(), True),
    StructField("NumCreditLines", IntegerType(), True),
    StructField("InterestRate", DoubleType(), True),
    StructField("LoanTerm", IntegerType(), True),
    StructField("DTIRatio", DoubleType(), True),
    StructField("Education", StringType(), True),
    StructField("EmploymentType", StringType(), True),
    StructField("MaritalStatus", StringType(), True),
    StructField("HasMortgage", StringType(), True),
    StructField("HasDependents", StringType(), True),
    StructField("LoanPurpose", StringType(), True),
    StructField("HasCoSigner", StringType(), True),
    StructField("Default", IntegerType(), True)
])

In [4]:
consumer = KafkaConsumer(
    'loan_application',
    bootstrap_servers='localhost:9092',
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    value_deserializer=lambda x: json.loads(x.decode('utf-8'))
)

In [5]:
from pyspark.ml.classification import LogisticRegressionModel
model_path = "/home/drissdo/Desktop/Scalable-Distributed-Systems/ML/model"
lr_model = LogisticRegressionModel.load(model_path)

In [6]:
def convert_data(loan_data):
    education_map = {'High School':1, 'Bachelor':0, 'Master':2, 'PhD':3}
    employment_map = {'Full-time':3, 'Part-time':0, 'Self-employed':2, 'Unemployed':1}
    marital_status_map = {'Single': 2, 'Married': 0, 'Divorced': 1}
    has_mortgage_map = {'Yes': 1, 'No': 0}
    has_dependents_map = {'Yes': 1, 'No': 0}
    loan_purpose_map = {'Home':1, 'Other': 3, 'Education':2, 'Business':0, 'Auto': 4}
    has_cosigner_map = {'Yes': 1, 'No': 0}
    
    loan_data['Education'] = education_map[loan_data['Education']]
    loan_data['EmploymentType'] = employment_map[loan_data['EmploymentType']]
    loan_data['MaritalStatus'] = marital_status_map[loan_data['MaritalStatus']]
    loan_data['HasMortgage'] = has_mortgage_map[loan_data['HasMortgage']]
    loan_data['HasDependents'] = has_dependents_map[loan_data['HasDependents']]
    loan_data['LoanPurpose'] = loan_purpose_map[loan_data['LoanPurpose']]
    loan_data['HasCoSigner'] = has_cosigner_map[loan_data['HasCoSigner']]
    
    
    data = df = spark.createDataFrame([loan_data])
    numerical_cols = [
        "Age", "Income", "LoanAmount", "CreditScore", "MonthsEmployed",
        "NumCreditLines", "InterestRate", "LoanTerm", "DTIRatio"
    ]

    assembler = VectorAssembler(inputCols=numerical_cols, outputCol="features")
    loan_data_assembled = assembler.transform(data)

    scaler = StandardScaler(inputCol="features", outputCol="scaled_features", withStd=True, withMean=True)
    scaler_model = scaler.fit(loan_data_assembled)
    loan_data_scaled = scaler_model.transform(loan_data_assembled)
    
    
    return loan_data_scaled
    

In [7]:
import random
for message in consumer:
    loan_data = message.value
    
    predict_data = convert_data(loan_data= loan_data)
    
    print(f"\nReceived Loan Application:")
    print(f"Loan ID: {loan_data['LoanID']}")
    print(f"Amount: ${loan_data['LoanAmount']}")
    print(f"Purpose: {loan_data['LoanPurpose']}")
    
    lr_predictions = lr_model.transform(predict_data)
    
    prediction_value = lr_predictions.select("prediction").collect()[0]["prediction"]
    
    if prediction_value == 1:
        print("Result: Accept")
    else:
        print("Result: Non Accept")
    
    print("-" * 50)
    

    
    

                                                                                


Received Loan Application:
Loan ID: 8ede67bd-c402-40e2-8aa8-d0c7dc70ff1c
Amount: $73068
Purpose: 3


24/12/27 13:57:08 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.VectorBLAS


Result: Non Accept
--------------------------------------------------

Received Loan Application:
Loan ID: 8f4dcc88-4426-4f3b-90c5-c787642693c7
Amount: $71513
Purpose: 0
Result: Accept
--------------------------------------------------

Received Loan Application:
Loan ID: dbc532ec-36f2-4483-9671-6fefa38991bd
Amount: $84949
Purpose: 0
Result: Accept
--------------------------------------------------

Received Loan Application:
Loan ID: 6fc10149-d2de-4a72-a4ec-26608eca8031
Amount: $14241
Purpose: 2
Result: Accept
--------------------------------------------------

Received Loan Application:
Loan ID: 1975dfc7-e16b-4e24-8844-aea85ec989b6
Amount: $74190
Purpose: 0
Result: Non Accept
--------------------------------------------------

Received Loan Application:
Loan ID: b9c03ce6-ec48-4fce-b630-49ad2afa9ddb
Amount: $41623
Purpose: 1
Result: Non Accept
--------------------------------------------------

Received Loan Application:
Loan ID: c6a70b3d-6698-49e6-a067-893dc7e569fd
Amount: $52261
Pur

24/12/27 13:57:16 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors



Received Loan Application:
Loan ID: f0870f54-d1d4-4afd-8d6c-d36561b06728
Amount: $82797
Purpose: 1
Result: Accept
--------------------------------------------------

Received Loan Application:
Loan ID: a93df5e4-f186-4437-bbd1-f1892056f088
Amount: $85263
Purpose: 3
Result: Accept
--------------------------------------------------

Received Loan Application:
Loan ID: 99349f93-dd0c-468f-9504-9cb4403ddc2d
Amount: $24810
Purpose: 4
Result: Accept
--------------------------------------------------

Received Loan Application:
Loan ID: 9eac3303-3399-493d-a190-89eafc7c1a8d
Amount: $98629
Purpose: 1
Result: Non Accept
--------------------------------------------------

Received Loan Application:
Loan ID: a6f3d1a8-9ad7-4c71-9852-567982ad110d
Amount: $20941
Purpose: 2
Result: Non Accept
--------------------------------------------------

Received Loan Application:
Loan ID: 9e314b3f-d847-493c-9775-35ca3e28cae8
Amount: $33543
Purpose: 1
Result: Non Accept
-------------------------------------------

ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/home/drissdo/Desktop/Scalable-Distributed-Systems/venv/lib/python3.11/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/drissdo/Desktop/Scalable-Distributed-Systems/venv/lib/python3.11/site-packages/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
                          ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/drissdo/anaconda3/lib/python3.11/socket.py", line 706, in readinto
    return self._sock.recv_into(b)
           ^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt


Result: Accept
--------------------------------------------------


KeyboardInterrupt: 