In [24]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from keras.models import model_from_json
import warnings
warnings.filterwarnings('ignore')

In [25]:
# Try to load an existing survey DataFrame from a file
try:
    existing_survey_df = pd.read_csv('survey_data.csv')
except FileNotFoundError:
    existing_survey_df = pd.DataFrame()

# Define the questions
questions = [
    "Q1. What is your gender? (Male, Female)",
    "Q2. Are you a senior citizen? (Yes, No)",
    "Q3. Do you have a partner? (Yes, No)",
    "Q4. Do you have dependents? (Yes, No)",
    "Q5. How many months have you been with the company?",
    "Q6. Do you have a phone service? (Yes, No)",
    "Q7. Do you have multiple lines? (Yes, No)",
    "Q8. What is your internet service provider (DSL, Fiber optic, No)",
    "Q9. Do you have an online security service? (Yes, No)",
    "Q10. Do you have an online backup service? (Yes, No)",
    "Q11. Do you have a device protection service? (Yes, No)",
    "Q12. Do you have a tech support service? (Yes, No)",
    "Q13. Do you have a streaming TV service? (Yes, No)",
    "Q14. Do you have a streaming movies service? (Yes, No)",
    "Q15. What is your contract term (Month-to-month, One year, Two year)",
    "Q16. Do you have a paperless billing? (Yes, No)",
    "Q17. What is your payment method (Electronic check, Mailed check, Bank transfer, Credit card)",
    "Q18. What is the monthly fee that you are charged?",
    "Q19. What is the total fee that you are charged?",
]

# Define the custom labels for each question
custom_labels = [
    "Gender",
    "SeniorCitizen",
    "Partner",
    "Dependents",
    "Tenure",
    "PhoneService",
    "MultipleLines",
    "InternetService",
    "OnlineSecurity",
    "OnlineBackup",
    "DeviceProtection",
    "TechSupport",
    "StreamingTV",
    "StreamingMovies",
    "Contract",
    "PaperlessBilling",
    "PaymentMethod",
    "MonthlyCharges",
    "TotalCharges"
]

valid_options = {
    "Gender": ["Male", "Female", 'male', 'female'],
    "SeniorCitizen": ["Yes", "No", 'yes', 'no'],
    "Partner": ["Yes", "No", 'yes', 'no'],
    "Dependents": ["Yes", "No", 'yes', 'no'],
    "PhoneService": ["Yes", "No", 'yes', 'no'],
    "MultipleLines": ["Yes", "No", 'yes', 'no'],
    "InternetService": ["DSL", "Fiber optic", "No", 'no', 'fiber optic'],
    "OnlineSecurity": ["Yes", "No", 'yes', 'no'],
    "OnlineBackup": ["Yes", "No", 'yes', 'no'],
    "DeviceProtection": ["Yes", "No", 'yes', 'no'],
    "TechSupport": ["Yes", "No", 'yes', 'no'],
    "StreamingTV": ["Yes", "No", 'yes', 'no'],
    "StreamingMovies": ["Yes", "No", 'yes', 'no'],
    "Contract": ["Month-to-month", "One year", "Two year", 'month-to-month', 'one year', 'two year'],
    "PaperlessBilling": ["Yes", "No", 'yes', 'no'],
    "PaymentMethod": ["Electronic check", "Mailed check", "Bank transfer", "Credit card", 'electronic check', 'mailed check', 'bank transfer', 'credit card']
}

# Create an empty DataFrame with one row filled with None values
survey_df = pd.DataFrame(columns=custom_labels)
survey_df.loc[0] = [None] * len(custom_labels)

# Initialize a flag to check if the user entered "quit"
user_quit = False

# Keep track of the number of questions answered
questions_answered = 0

# Ask the user the questions and store answers until they type "quit"
for i, question in enumerate(questions):
    if user_quit:
        # If the user has entered "quit," increment the count and continue
        questions_answered += 1
        continue

    label = custom_labels[i]
    
    if label in valid_options:
        # For questions with options
        while True:
            answer = input(question + " Answer, type 'quit' to exit): ")
            
            if answer.lower() == 'quit':
                user_quit = True
                break

            if answer not in valid_options[label]:
                print("Invalid input. Please choose from the valid options.")
            else:
                # Update the corresponding cell in the survey_df
                survey_df.at[0, label] = answer
                break
    else:
        # For numeric questions like "Tenure"
        while True:
            answer = input(question + " Answer (numeric value, type 'quit' to exit): ")
            
            if answer.lower() == 'quit':
                user_quit = True
                break

            if not answer.isdigit():
                print("Invalid input. Please enter a numeric value.")
            else:
                # Update the corresponding cell in the survey_df
                survey_df.at[0, label] = answer
                break

    questions_answered += 1

survey_df = survey_df[custom_labels]

columns_to_convert = ['Tenure', 'MonthlyCharges', 'TotalCharges']
for column in columns_to_convert:
    if column in survey_df:
        survey_df[column] = pd.to_numeric(survey_df[column], errors='coerce')


# Append the new survey data to the existing DataFrame
if not existing_survey_df.empty:
    existing_survey_df = pd.concat([existing_survey_df, survey_df], ignore_index=True)
else:
    existing_survey_df = survey_df

# Save the updated survey DataFrame to a file
existing_survey_df.to_csv('survey_data.csv', index=False)

# Iterate through the columns and capitalize the first letter for string columns
for column in existing_survey_df.columns:
    if existing_survey_df[column].dtype == 'object':  # Check if the column contains string values
        existing_survey_df[column] = existing_survey_df[column].str[0].str.upper() + existing_survey_df[column].str[1:]



Q1. What is your gender? (Male, Female) Answer, type 'quit' to exit):  male
Q2. Are you a senior citizen? (Yes, No) Answer, type 'quit' to exit):  no
Q3. Do you have a partner? (Yes, No) Answer, type 'quit' to exit):  yes
Q4. Do you have dependents? (Yes, No) Answer, type 'quit' to exit):  no
Q5. How many months have you been with the company? Answer (numeric value, type 'quit' to exit):  60
Q6. Do you have a phone service? (Yes, No) Answer, type 'quit' to exit):  yes
Q7. Do you have multiple lines? (Yes, No) Answer, type 'quit' to exit):  no
Q8. What is your internet service provider (DSL, Fiber optic, No) Answer, type 'quit' to exit):  fiber optics


Invalid input. Please choose from the valid options.


Q8. What is your internet service provider (DSL, Fiber optic, No) Answer, type 'quit' to exit):  fiber optic
Q9. Do you have an online security service? (Yes, No) Answer, type 'quit' to exit):  yes
Q10. Do you have an online backup service? (Yes, No) Answer, type 'quit' to exit):  no
Q11. Do you have a device protection service? (Yes, No) Answer, type 'quit' to exit):  yes
Q12. Do you have a tech support service? (Yes, No) Answer, type 'quit' to exit):  yes
Q13. Do you have a streaming TV service? (Yes, No) Answer, type 'quit' to exit):  yes
Q14. Do you have a streaming movies service? (Yes, No) Answer, type 'quit' to exit):  no
Q15. What is your contract term (Month-to-month, One year, Two year) Answer, type 'quit' to exit):  two year
Q16. Do you have a paperless billing? (Yes, No) Answer, type 'quit' to exit):  yes
Q17. What is your payment method (Electronic check, Mailed check, Bank transfer, Credit card) Answer, type 'quit' to exit):  credit card
Q18. What is the monthly fee that 

In [26]:
df = existing_survey_df

def label_encode_columns(df, columns_to_encode, label_mapping=None):
    if label_mapping is None:
        label_mapping = {}
    
    for column in columns_to_encode:
        le = preprocessing.LabelEncoder()
        unique_values = label_mapping.get(column, df[column].unique())
        df[column] = le.fit_transform(df[column])
    
    return df

# Columns to be encoded
columns_to_encode = ['Gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
                     'InternetService', 'OnlineSecurity', 'OnlineBackup', 'TechSupport',
                     'DeviceProtection', 'StreamingTV', 'StreamingMovies', 'Contract',
                     'PaperlessBilling', 'PaymentMethod']

# Create a dictionary to map unique values to labels
label_mapping = {
    'Gender': ['Female', 'Male'],
    'SeniorCitizen' : ['No', 'Yes'],
    'Partner': ['No', 'Yes'],
    'Dependents': ['No', 'Yes'],
    'PhoneService': ['No', 'Yes'],
    'MultipleLines' : ['No', 'Yes'],
    'InternetService' : ['DSL', 'Fiber optic', 'No'],
    'OnlineSecurity' : ['No', 'Yes'],
    'OnlineBackup' : ['No', 'Yes'],
    'TechSupport' : ['No', 'Yes'],
    'DeviceProtection' : ['No', 'Yes',],
    'StreamingTV' : ['No', 'Yes'],
    'StreamingMovies' : ['No', 'Yes'],
    'Contract' : ['Month-to-month','Two year','One year'],
    'PaperlessBilling' :['No','Yes'],
    'PaymentMethod' : ['Electronic check','Mailed check','Bank transfer','Credit card'],
}

df = label_encode_columns(df.copy(), columns_to_encode, label_mapping)

df['TotalPayment'] = df['Tenure'] * df['MonthlyCharges']
df['HasOnlineSecurityBackup'] = df['OnlineSecurity'] & df['OnlineBackup']
df['HasTechSupportAndDeviceProtection'] = df['TechSupport'] & df['DeviceProtection']
df['MonthlyToTotalChargesRatio'] = df['MonthlyCharges'] / df['TotalCharges']
service_columns = ['OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies'] 
df['AdditionalServices'] = df[service_columns].sum(axis=1)
df["NEW_noProt"] = df.apply(lambda x: 1 if (x["OnlineBackup"] != 1) or (x["DeviceProtection"] != 1) or (x["TechSupport"] != 1) else 0, axis=1)
df["NEW_AVG_Charges"] = df["TotalCharges"] / (df["Tenure"] + 1)
df["NEW_Increase"] = df["NEW_AVG_Charges"] / df["MonthlyCharges"]
df["NEW_AVG_Service_Fee"] = df["MonthlyCharges"] / (df['AdditionalServices'] + 1)

num_features = ['Tenure', 'MonthlyCharges', 'TotalCharges', 'MonthlyToTotalChargesRatio', 'TotalPayment', 'NEW_AVG_Charges', "NEW_Increase", 'NEW_AVG_Service_Fee']

X_cat = df.drop(columns=num_features)
X_num = df[num_features]

scaler = MinMaxScaler()
X_num_scaled = scaler.fit_transform(X_num)
X_num_scaled = pd.DataFrame(X_num_scaled, columns=num_features)

X = pd.concat([X_cat, X_num_scaled], axis=1)

pca = PCA(n_components = 5)

X_pca = pca.fit_transform(X)

json_file = open('model_nn.json', 'r')
loaded_model_nn = json_file.read()
json_file.close()

model_nn = model_from_json(loaded_model_nn)

model_nn.load_weights('model_nn.h5')

model_nn.compile(optimizer='adam', loss='binary_crossentropy')

# Use predict method to get predicted probabilities
predicted_probabilities = model_nn.predict(X_pca)


# for prob in predicted_probabilities:
#     if prob >= 0.5:
#         print('The customer will churn')
#     else:
#         print('The customer will not churn')



**Prediction for the last user input:**

In [27]:
predicted_probabilities_class_1 = predicted_probabilities[-1:, 0]

if predicted_probabilities_class_1 >= 0.5:
    print('The customer will churn')
else:
    print('The customer will not churn')

The customer will not churn
