Checkpoint Objective
This dataset contains banking marketing campaign data and we can use it to optimize marketing campaigns to attract more customers to term deposit subscription.

What is a Term Deposit?

A Term deposit is a deposit that a bank or a financial institution offers with a fixed rate (often better than just opening deposit account) in which your money will be returned back at a specific maturity time.

dataset link : https://drive.google.com/file/d/1kVnOzZ84avAAY1mA2I_Oh6FH3UPEb-_3/view

Instructions
In this assignment you will have to take the following steps:

Import your dataset and perform initial analysis and visualization

Clean the data

Build a  deep learning network to predict the marketing campaign outcome ("deposit" either Yes or No)

You have to submit a jupyter notebook file.

Link : Get started with Google Colab

Please make sure it's a public shareable link.

# Import Dataset and Perform Initial Analysis and Visualization

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
#import dataset
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [None]:
# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/Untitled folder/DATASETS/bank.csv')

In [None]:
# Initial exploration
print(data.head())  # View the first few rows
print(data.info())  # Get information about the dataset (data types, null values, etc.)
print(data.describe())  # Summary statistics for numerical columns


In [None]:
# Visualization (for example)
# Plot histograms for numeric columns
data.hist(figsize=(12, 10))
plt.show()

In [None]:
# Visualize categorical variables (e.g., 'deposit')
plt.figure(figsize=(8, 6))
data['deposit'].value_counts().plot(kind='bar')
plt.xlabel('Deposit')
plt.ylabel('Count')
plt.title('Deposit Distribution')
plt.show()


In [None]:
data.plot(kind='density', subplots=True, sharex=False, layout=(12,3), figsize=(15,10))
plt.show()

In [None]:
import seaborn as sns
plt.figure(figsize=(10, 10))
sns.heatmap(data.corr(), annot=True)
plt.show()

# Clean the Data

In [None]:
# Handling missing values
data.isnull().sum()  # Check for missing values

In [None]:

# Remove duplicates, if they exist
data.duplicated().sum()
# Remove duplicates
#df.drop_duplicates(inplace=True)

# The dataframe has no duplicated rows


In [None]:
# List of columns with 'object' dtype
object_columns = data.select_dtypes(include=['object']).columns.tolist()

print("Columns with 'object' dtype:")
print(object_columns)

In [None]:
# Encode categorical features
#I intend to replace the original categorical columns with their encoded values.

from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
categorical_columns = [ 'job','marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome', 'deposit']


for col in categorical_columns:
    data[col] = label_encoder.fit_transform(data[col])

In [None]:
data.info()

In [None]:
# Split the data into features and target variable
X = data.drop('deposit', axis=1)
y = data['deposit']

In [None]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:

# Scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


#  Build a Deep Learning Network

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout


In [None]:
# Create a Sequential model
model = Sequential()

In [None]:
# Add layers to the model
model.add(Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_data=(X_test_scaled, y_test))


# Prediction code

In [None]:
# Predictions using the trained model
predictions = model.predict(X_test_scaled)

In [None]:
# Converting probabilities to binary outcomes
threshold = 0.5  # You can adjust this threshold as needed
binary_predictions = (predictions > threshold).astype('int')

In [None]:
# Evaluate the model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print("Accuracy:", accuracy_score(y_test, binary_predictions))
print("\nClassification Report:")
print(classification_report(y_test, binary_predictions))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, binary_predictions))
