In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
df = pd.read_csv("bank.csv", delimiter = ";")
df.head()

# Can work with bank-full data as well.

In [None]:
df.isna().sum().sum()

In [None]:
df.info()

<font color = "red"> Input variables:

bank client data:
   - 1 - age (numeric)
   - 2 - job : type of job (categorical: "admin.","unknown","unemployed","management","housemaid","entrepreneur","student",
                                       "blue-collar","self-employed","retired","technician","services") 
   - 3 - marital : marital status (categorical: "married","divorced","single"; note: "divorced" means divorced or widowed)
   - 4 - education (categorical: "unknown","secondary","primary","tertiary")
   - 5 - default: has credit in default? (binary: "yes","no")
   - 6 - balance: average yearly balance, in euros (numeric) 
   - 7 - housing: has housing loan? (binary: "yes","no")
   - 8 - loan: has personal loan? (binary: "yes","no")

related with the last contact of the current campaign:
   - 9 - contact: contact communication type (categorical: "unknown","telephone","cellular") 
  - 10 - day: last contact day of the month (numeric)
  - 11 - month: last contact month of year (categorical: "jan", "feb", "mar", ..., "nov", "dec")
  - 12 - duration: last contact duration, in seconds (numeric)

other attributes:
  - 13 - campaign: number of contacts performed during this campaign and for this client (numeric, includes last contact)
  - 14 - pdays: number of days that passed by after the client was last contacted from a previous campaign (numeric, -1 means client was not previously contacted)
  - 15 - previous: number of contacts performed before this campaign and for this client (numeric)
  - 16 - poutcome: outcome of the previous marketing campaign (categorical: "unknown","other","failure","success")

Output variable (desired target):
  - 17 - y - has the client subscribed a term deposit? (binary: "yes","no")

<img src="https://miro.medium.com/v2/resize:fit:1200/1*hmtbIgxoflflJqMJ_UHwXw.jpeg" width=800>

In [None]:
sns.boxplot(data = df, x = df["age"])

In [None]:
# Dropping unnecessary columns

ToDrop = ["contact", "day", "month"]

df2 = df.drop(columns = ToDrop)
df2.head()

In [None]:
df3 = pd.get_dummies(df2, columns = ['job', 'marital', 'education', 'poutcome'])
df3.head()

In [None]:
# Convert 'yes'/'no' to True/False for the specified columns
columns_to_convert = ['default', 'housing', 'loan', 'y']
df3[columns_to_convert] = df3[columns_to_convert].applymap(lambda x: True if x == 'yes' else False)
df3.head()

In [None]:
# From actual dataframe, take only those records
# where the age is less than equal to 70
df4 = df3[df3["age"] <= 70]
df4.head()

In [None]:
# Class balancing

# Step 1: Take all rows where y is 'yes'
df4_yes = df4[df4['y'] == True]

# Step 2: Take only 521 rows where y is 'no'
df4_no = df4[df4['y'] == False].sample(n=521, random_state=42)

# Step 3: Combine both dataframes
balanced_df4 = pd.concat([df4_yes, df4_no], axis=0)

# Optionally, you might want to shuffle the combined dataframe
balanced_df4 = balanced_df4.sample(frac=1, random_state=42).reset_index(drop=True)

# Print the shape of the balanced DataFrame
print("Shape of balanced DataFrame:", balanced_df4.shape)


In [None]:
X = balanced_df4.drop("y", axis = 1)
Y = balanced_df4["y"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify = Y)

In [None]:
lr = LogisticRegression()

pipeline = make_pipeline(StandardScaler(), lr)
pipeline.fit(X_train, y_train)  

PipelinePredict = pipeline.predict(X_test)
print(classification_report(y_test, PipelinePredict))

In [None]:
rfc = RandomForestClassifier()

pipeline = make_pipeline(StandardScaler(), rfc)
pipeline.fit(X_train, y_train)  

PipelinePredict = pipeline.predict(X_test)
print(classification_report(y_test, PipelinePredict))

In [None]:
!pip install gradio

In [None]:
import gradio as gr

def predict(age, default, balance, housing, loan, duration, campaign, pdays, previous, job, marital_status, education_selection, outcome_selection):
    # Initialize a list with the first 9 features
    combined_input = [age, default, balance, housing, loan, duration, campaign, pdays, previous]

    # Convert radio button selections to one-hot encoding
    job_vector = [1 if job == j else 0 for j in jobs]
    marital_vector = [1 if marital_status == m else 0 for m in marital]
    education_vector = [1 if education_selection == e else 0 for e in education]
    outcome_vector = [1 if outcome_selection == o else 0 for o in outcome]

    # Add the one-hot vectors to the combined input
    combined_input.extend(job_vector)
    combined_input.extend(marital_vector)
    combined_input.extend(education_vector)
    combined_input.extend(outcome_vector)

    # Debugging: Print the total number of features
    # print("Basic Features:", len(combined_input))  # Should be 9
    # print("Job Vector Length:", len(job_vector))  # Should be 12
    # print("Marital Vector Length:", len(marital_vector))  # Should be 3
    # print("Education Vector Length:", len(education_vector))  # Should be 4
    # print("Outcome Vector Length:", len(outcome_vector))  # Should be 4

    # print("Total Features:", len(combined_input))

    # Return the combined input for debugging purposes
    #return combined_input

    # Convert combined_input to the format expected by your model (e.g., numpy array, DataFrame, etc.)
    # For instance, if your model expects a numpy array, you could do:
    model_input = np.array([combined_input])

    # Make a prediction using your model
    prediction = pipeline.predict(model_input)

    # Return the prediction
    return prediction[0]

# Define inputs for columns 0-8
inputs = [
    "number",  # age
    "checkbox",  # default
    "number",  # balance
    "checkbox",  # housing
    "checkbox",  # loan
    "number",  # duration
    "number",  # campaign
    "number",  # pdays
    "number",  # previous
]

# Define options for radio button groups for columns 9-34
jobs = ["admin.", "blue-collar", "entrepreneur", "housemaid", "management", "retired", "self-employed", "services", "student", "technician", "unemployed", "unknown"]
marital = ["divorced", "married", "single"]
education = ["primary", "secondary", "tertiary", "unknown"]
outcome = ["failure", "other", "success", "unknown"]

# Extend inputs with radio button groups
inputs.extend([gr.Radio(jobs), gr.Radio(marital), gr.Radio(education), gr.Radio(outcome)])
