In [None]:
import zipfile

# Path to the zip file
zip_file_path = 'Random_forest_classification_model.zip'

# Extract all contents
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall('Random_forest_classification_model')

In [1]:
from pyspark.sql import SparkSession

Spark = (
    SparkSession
    .builder
    .appName("Pyspark Streaming")
    .config("Spark.streaming.stopGracefullyOnShutdown",True)
    .master("local[*]")
    .getOrCreate()
)
Spark.conf.set("spark.sql.streaming.schemaInference",True)

In [2]:
from pyspark.ml.classification import RandomForestClassificationModel

# Load the model
loaded_model = RandomForestClassificationModel.load("Random_forest_classification_model")

In [None]:
# Print the model's parameters
print("Model parameters:", loaded_model.explainParams())

In [3]:
from pyspark.sql.types import StringType, StructField, StructType,IntegerType
customer_schema = StructType([
    StructField('customerID', StringType(), True),
    StructField('gender', StringType(), True),
    StructField('SeniorCitizen', IntegerType(), True),
    StructField('Partner', StringType(), True),
    StructField('Dependents', StringType(), True),
    StructField('tenure', IntegerType(), True),
    StructField('PhoneService', StringType(), True),
    StructField('MultipleLines', StringType(), True),
    StructField('InternetService', StringType(), True),
    StructField('OnlineSecurity', StringType(), True),
    StructField('OnlineBackup', StringType(), True),
    StructField('DeviceProtection', StringType(), True),
    StructField('TechSupport', StringType(), True),
    StructField('StreamingTV', StringType(), True),
    StructField('StreamingMovies', StringType(), True),
    StructField('Contract', StringType(), True),
    StructField('PaperlessBilling', StringType(), True),
    StructField('PaymentMethod', StringType(), True),
    StructField('MonthlyCharges', IntegerType(), True),
    StructField('TotalCharges', IntegerType(), True)
])

In [4]:
customer_details_df = Spark.readStream.format("csv").schema(customer_schema).load("Streaming_csv_results")

In [5]:
customer_details_df.printSchema()

root
 |-- customerID: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- SeniorCitizen: integer (nullable = true)
 |-- Partner: string (nullable = true)
 |-- Dependents: string (nullable = true)
 |-- tenure: integer (nullable = true)
 |-- PhoneService: string (nullable = true)
 |-- MultipleLines: string (nullable = true)
 |-- InternetService: string (nullable = true)
 |-- OnlineSecurity: string (nullable = true)
 |-- OnlineBackup: string (nullable = true)
 |-- DeviceProtection: string (nullable = true)
 |-- TechSupport: string (nullable = true)
 |-- StreamingTV: string (nullable = true)
 |-- StreamingMovies: string (nullable = true)
 |-- Contract: string (nullable = true)
 |-- PaperlessBilling: string (nullable = true)
 |-- PaymentMethod: string (nullable = true)
 |-- MonthlyCharges: integer (nullable = true)
 |-- TotalCharges: integer (nullable = true)



In [None]:
# customer_details_df.show(truncate = False)

In [6]:
models_indexer = ["Partner_model","Dependents_model","PhoneService_model","InternetService_model","OnlineSecurity_model","OnlineBackup_model",
 "DeviceProtection_model","TechSupport_model","Contract_model","PaperlessBilling_model","Churn_model"]

In [None]:
import zipfile

# Path to the zip file
for i in models_indexer:
    zip_file_path = i + '.zip'
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(i)

In [7]:
from pyspark.ml.feature import StringIndexerModel

In [8]:
d = {}

for i in models_indexer:
    d[i] = StringIndexerModel.load(i)

In [9]:
print(d)

{'Partner_model': StringIndexerModel: uid=StringIndexer_935db0f4391c, handleInvalid=error, 'Dependents_model': StringIndexerModel: uid=StringIndexer_7f2ef6e178fd, handleInvalid=error, 'PhoneService_model': StringIndexerModel: uid=StringIndexer_ee8f3f5b2409, handleInvalid=error, 'InternetService_model': StringIndexerModel: uid=StringIndexer_6ffe42d60c9e, handleInvalid=error, 'OnlineSecurity_model': StringIndexerModel: uid=StringIndexer_ffbdfcab7dee, handleInvalid=error, 'OnlineBackup_model': StringIndexerModel: uid=StringIndexer_f065b8cf7adc, handleInvalid=error, 'DeviceProtection_model': StringIndexerModel: uid=StringIndexer_8bc6c9afa3f5, handleInvalid=error, 'TechSupport_model': StringIndexerModel: uid=StringIndexer_88c86105fac8, handleInvalid=error, 'Contract_model': StringIndexerModel: uid=StringIndexer_e06964f7c4fc, handleInvalid=error, 'PaperlessBilling_model': StringIndexerModel: uid=StringIndexer_06cf89589711, handleInvalid=error, 'Churn_model': StringIndexerModel: uid=StringInd

In [10]:
telecom_df_copy = customer_details_df.select('*')

In [11]:
telecom_df_copy = telecom_df_copy.drop('customerId','gender','MultipleLines','StreamingTV','PaymentMethod','StreamingMovies','Monthlycharges','TotalCharges')

In [None]:
# telecom_df_copy.show(truncate = False)

In [12]:
details_df = telecom_df_copy.select('*')

In [13]:
details_df = d.get('Partner_model').transform(details_df)

In [14]:
# details_df.show()

In [15]:
details_df = d.get('Dependents_model').transform(details_df)

In [16]:
details_df = d.get('PhoneService_model').transform(details_df)

In [17]:
details_df = d.get('InternetService_model').transform(details_df)

In [18]:
details_df = d.get('OnlineSecurity_model').transform(details_df)

In [19]:
details_df = d.get('OnlineBackup_model').transform(details_df)

In [20]:
details_df = d.get('DeviceProtection_model').transform(details_df)

In [21]:
details_df = d.get('TechSupport_model').transform(details_df)

In [22]:
details_df = d.get('Contract_model').transform(details_df)

In [23]:
details_df = d.get('PaperlessBilling_model').transform(details_df)

In [24]:
details_df.printSchema()

root
 |-- SeniorCitizen: integer (nullable = true)
 |-- Partner: string (nullable = true)
 |-- Dependents: string (nullable = true)
 |-- tenure: integer (nullable = true)
 |-- PhoneService: string (nullable = true)
 |-- InternetService: string (nullable = true)
 |-- OnlineSecurity: string (nullable = true)
 |-- OnlineBackup: string (nullable = true)
 |-- DeviceProtection: string (nullable = true)
 |-- TechSupport: string (nullable = true)
 |-- Contract: string (nullable = true)
 |-- PaperlessBilling: string (nullable = true)
 |-- Partner_encoded: double (nullable = false)
 |-- Dependents_encoded: double (nullable = false)
 |-- PhoneService_encoded: double (nullable = false)
 |-- InternetService_encoded: double (nullable = false)
 |-- OnlineSecurity_encoded: double (nullable = false)
 |-- OnlineBackup_encoded: double (nullable = false)
 |-- DeviceProtection_encoded: double (nullable = false)
 |-- TechSupport_encoded: double (nullable = false)
 |-- Contract_encoded: double (nullable = fa

In [None]:
# details_df.show(truncate = False)

In [25]:
details_df = details_df.drop('Partner','Dependents','PhoneService','InternetService','OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','Contract','PaperlessBilling')

In [None]:
# details_df.show()

In [26]:
from pyspark.ml.feature import VectorAssembler

assembler_1 = VectorAssembler(
    inputCols = [
        'SeniorCitizen', 'tenure',
        'Partner_encoded', 'Dependents_encoded', 'PhoneService_encoded',
        'InternetService_encoded', 'OnlineSecurity_encoded',
        'OnlineBackup_encoded', 'DeviceProtection_encoded',
        'TechSupport_encoded', 'Contract_encoded',
        'PaperlessBilling_encoded'
    ],
    outputCol = 'customer_details'
)

details_df = assembler_1.transform(details_df)

In [27]:
details_df.printSchema()

root
 |-- SeniorCitizen: integer (nullable = true)
 |-- tenure: integer (nullable = true)
 |-- Partner_encoded: double (nullable = false)
 |-- Dependents_encoded: double (nullable = false)
 |-- PhoneService_encoded: double (nullable = false)
 |-- InternetService_encoded: double (nullable = false)
 |-- OnlineSecurity_encoded: double (nullable = false)
 |-- OnlineBackup_encoded: double (nullable = false)
 |-- DeviceProtection_encoded: double (nullable = false)
 |-- TechSupport_encoded: double (nullable = false)
 |-- Contract_encoded: double (nullable = false)
 |-- PaperlessBilling_encoded: double (nullable = false)
 |-- customer_details: vector (nullable = true)



In [28]:
input_to_model = details_df.select('customer_details')
input_to_model.printSchema()

root
 |-- customer_details: vector (nullable = true)



In [None]:
# input_to_model.show(truncate = False)

In [29]:
from pyspark.ml.classification import RandomForestClassificationModel

my_model = RandomForestClassificationModel.load('Random_forest_classification_model')

In [30]:
prediction1 = my_model.transform(input_to_model)

In [None]:
# prediction1.show()

In [None]:
# prediction1.select(prediction1.prediction).show()

In [31]:
result = prediction1.select(prediction1.prediction)

In [None]:
# result.show()

In [32]:
from pyspark.ml.feature import StringIndexer, IndexToString

In [None]:
import zipfile


zip_file_path = 'Churn_model' + '.zip'
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall('Churn_model')

In [34]:
x = StringIndexerModel.load('Churn_model')

In [35]:
index_to_string = IndexToString(inputCol="prediction", outputCol="ouput", labels=x.labels)
original_df = index_to_string.transform(result)
# original_df.show()

In [None]:
(
    original_df
    .writeStream
    .format("console")
    .outputMode("append")
    .trigger(processingTime = "3 seconds")
    .start()
    .awaitTermination()
)