# **1. Data Processing**

In [110]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

data = pd.read_csv('BankCustomerData.csv')

In [111]:
data_dummies = pd.get_dummies(data, drop_first = True)
print(data.head(42639))

       age           job  marital  education default  balance housing loan  \
0       58    management  married   tertiary      no     2143     yes   no   
1       44    technician   single  secondary      no       29     yes   no   
2       33  entrepreneur  married  secondary      no        2     yes  yes   
3       47   blue-collar  married    unknown      no     1506     yes   no   
4       33       unknown   single    unknown      no        1      no   no   
...    ...           ...      ...        ...     ...      ...     ...  ...   
42634   21       student   single  secondary      no     2488      no   no   
42635   87       retired  married    primary      no     2190      no   no   
42636   34   blue-collar  married    primary      no     6718      no   no   
42637   22       student   single  secondary      no      254      no   no   
42638   32    management   single   tertiary      no     1962      no   no   

         contact  day month  duration  campaign  pdays  previou

In [112]:
print(data.isnull().sum())

age             0
job             0
marital         0
education       0
default         0
balance         0
housing         0
loan            0
contact         0
day             0
month           0
duration        0
campaign        0
pdays           0
previous        0
poutcome        0
term_deposit    0
dtype: int64


In [113]:
data_dummies['req_age'] = (data['age']>33).astype(int)
data_dummies['new_loan'] = data['loan'].apply(lambda x:1 if x=="yes" else 0)

In [114]:
x = data_dummies.drop(['age','balance','new_loan','req_age'],axis=1)
y = data_dummies['req_age']

In [115]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20, random_state=80)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)

In [116]:
model = LogisticRegression()
model.fit(x_train_scaled, y_train)

In [117]:
y_pred = model.predict(x_test_scaled)

In [118]:
accuracy = accuracy_score(y_test,y_pred)
bank_matrix = confusion_matrix(y_test,y_pred)
bank_report = classification_report(y_test,y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix: ")
print(bank_matrix)
print("Bank Report: ")
print(bank_report)

Accuracy: 0.7666510318949343
Confusion Matrix: 
[[1076 1304]
 [ 686 5462]]
Bank Report: 
              precision    recall  f1-score   support

           0       0.61      0.45      0.52      2380
           1       0.81      0.89      0.85      6148

    accuracy                           0.77      8528
   macro avg       0.71      0.67      0.68      8528
weighted avg       0.75      0.77      0.75      8528



6. **Summarize the model's performance and discuss any insights or implications for the bank's marketing strategies.**

# **CONCLUSION:**
> To summarize the model's performance, it achieved an accuracy rate of 76% in predicting the customers behavior based on the demographic data. This suggests that the model can effectively identify patterns and trends, allowing the bank to tailor its marketing strategies more precisely. Insights include the importance of targeting specific demographics with personalized offers and understanding customer preferences to enhance engagement.

