In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.utils import class_weight
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,precision_score,recall_score,f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam

In [None]:
df = pd.read_csv("bank_additional_full_cleaned.csv")


In [None]:
df.shape

In [None]:
nul=df.isnull().sum()
nul

In [None]:
dup=df.duplicated().sum()
dup

In [None]:
df = df.drop_duplicates()

In [None]:
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()

# Compute skewness
skew_vals = df[numeric_cols].skew(numeric_only=True)
print("Skewness (numeric):")
print(skew_vals.sort_values(ascending=False))

# Bar plot of skewness
plt.figure(figsize=(8,4))
skew_vals.sort_values(ascending=False).plot(kind='bar', color='steelblue')
plt.title('Skewness of numeric features')
plt.ylabel('Skew')
plt.tight_layout()
plt.show()

n = len(numeric_cols)
cols = 3
rows = int(np.ceil(n / cols)) if n > 0 else 0
if n > 0:
    plt.figure(figsize=(12, 3 * rows))
    for i, col in enumerate(numeric_cols, 1):
        plt.subplot(rows, cols, i)
        sns.histplot(df[col].dropna(), kde=False, bins=30, color='salmon')
        plt.title(col)
    plt.tight_layout()
    plt.show()


In [None]:
features = [
    'age','job','marital','education','default','housing','loan',
    'contact','month','day_of_week','campaign','pdays','previous','poutcome',
    'emp.var.rate','cons.price.idx','cons.conf.idx','euribor3m','nr.employed'
]
X = df[features]
y = df['y'].map({'yes': 1, 'no': 0})  

cat_cols = ['job','marital','education','default','housing','loan',
            'contact','month','day_of_week','poutcome']
num_cols = [col for col in X.columns if col not in cat_cols]

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first'), cat_cols),
        ('num', StandardScaler(), num_cols)
    ]
)

X_processed = preprocessor.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42, stratify=y
)

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = dict(zip(np.unique(y_train), class_weights))
print(class_weight_dict)

model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])


history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=20,
    batch_size=64,
    verbose=2,
    class_weight=class_weight_dict
)
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy (from model.evaluate): {acc:.4f}")

In [None]:
y_pred_prob = model.predict(X_test, verbose=0).ravel()
y_pred = (y_pred_prob >= 0.5).astype(int)

print("\nMetrics on test set:")
print(f"Accuracy : {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Recall   : {recall_score(y_test, y_pred):.4f}")
print(f"F1 Score : {f1_score(y_test, y_pred):.4f}")