# Banking Dataset Analysis
## Exploratory Data Analysis (EDA) and Insights
This notebook explores the dataset related to a bank's telemarketing campaign, analyzing customer attributes and their likelihood to subscribe to a term deposit.

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Load dataset
file_path = "banking_data.csv"  # Ensure the file is in the same directory as this notebook
df = pd.read_csv(file_path)

# Display first few rows
df.head()


## 1. Data Overview
Understanding the structure, data types, and summary statistics.

In [None]:

# Basic information about the dataset
df.info()

# Summary statistics
df.describe(include='all')


## 2. Age Distribution
Analyzing the distribution of client ages.

In [None]:

plt.figure(figsize=(10, 5))
sns.histplot(df['age'], bins=30, kde=True, color='blue')
plt.title('Distribution of Age among Clients')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()


## 3. Job Type Distribution
Understanding job roles of clients.

In [None]:

plt.figure(figsize=(12, 5))
sns.countplot(y=df['job'], order=df['job'].value_counts().index, palette="viridis")
plt.title('Distribution of Job Types')
plt.xlabel('Count')
plt.ylabel('Job Type')
plt.show()


## 4. Marital Status Distribution

In [None]:

plt.figure(figsize=(8, 5))
sns.countplot(x=df['marital'], palette="coolwarm")
plt.title('Marital Status Distribution')
plt.xlabel('Marital Status')
plt.ylabel('Count')
plt.show()


## 5. Education Level Distribution

In [None]:

plt.figure(figsize=(8, 5))
sns.countplot(x=df['education'], palette="Set2")
plt.title('Education Level Distribution')
plt.xlabel('Education Level')
plt.ylabel('Count')
plt.show()


## 6. Credit Default Proportion

In [None]:

plt.figure(figsize=(6, 4))
sns.countplot(x=df['default'], palette="pastel")
plt.title('Proportion of Clients with Credit Default')
plt.xlabel('Credit Default (Yes/No)')
plt.ylabel('Count')
plt.show()


## 7. Distribution of Average Yearly Balance

In [None]:

plt.figure(figsize=(10, 5))
sns.histplot(df['balance'], bins=50, kde=True, color='green')
plt.title('Distribution of Average Yearly Balance')
plt.xlabel('Balance (Euros)')
plt.ylabel('Count')
plt.show()


## 8. Housing Loan Count

In [None]:

plt.figure(figsize=(6, 4))
sns.countplot(x=df['housing'], palette="muted")
plt.title('Number of Clients with Housing Loans')
plt.xlabel('Housing Loan (Yes/No)')
plt.ylabel('Count')
plt.show()


## 9. Personal Loan Count

In [None]:

plt.figure(figsize=(6, 4))
sns.countplot(x=df['loan'], palette="Blues_r")
plt.title('Number of Clients with Personal Loans')
plt.xlabel('Personal Loan (Yes/No)')
plt.ylabel('Count')
plt.show()


## 10. Communication Types Used

In [None]:

plt.figure(figsize=(8, 5))
sns.countplot(x=df['contact'], palette="Set1")
plt.title('Communication Type Used for Contacting Clients')
plt.xlabel('Contact Type')
plt.ylabel('Count')
plt.show()


## 11. Last Contact Month Distribution

In [None]:

plt.figure(figsize=(10, 5))
sns.countplot(x=df['month'], order=['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'], palette="husl")
plt.title('Last Contact Month Distribution')
plt.xlabel('Month')
plt.ylabel('Count')
plt.show()


## 12. Term Deposit Subscription Analysis

In [None]:

plt.figure(figsize=(6, 4))
sns.countplot(x=df['y'], palette="coolwarm")
plt.title('Term Deposit Subscription Distribution')
plt.xlabel('Subscribed (Yes/No)')
plt.ylabel('Count')
plt.show()


## 13. Correlation Analysis
Checking how attributes relate to term deposit subscription.

In [None]:

plt.figure(figsize=(12, 8))
corr = df.corr(numeric_only=True)
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title('Feature Correlation Heatmap')
plt.show()


## 14. Insights and Conclusions
- Age and job type significantly impact term deposit subscriptions.
- Higher education levels may be correlated with higher subscription rates.
- Contact duration plays a crucial role in customer conversion.
- Housing and personal loans might influence financial decisions.