In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from google.colab import files
uploaded = files.upload()

df = pd.read_csv('banking_data.csv')

In [None]:
sns.set_theme(style="whitegrid", palette="pastel", font_scale=1.1)
plt.rcParams['font.family'] = 'Liberation Serif'

Age Distribution

In [None]:
plt.figure()
sns.histplot(df['age'], kde=True, bins=30, color='skyblue')
plt.title('Age Distribution of Clients')
plt.xlabel('Age in years')
plt.ylabel('Count')
plt.show()

Job Type Distribution

In [None]:
plt.figure()
df['job'].value_counts().plot(kind='bar', color='salmon')
plt.title('Job Type Distribution')
plt.xlabel('Job')
plt.ylabel('Number of Clients')
plt.xticks(rotation=45)
plt.show()

Marital Status Distribution

In [None]:
plt.figure()
df['marital'].value_counts().plot(kind='pie', autopct='%1.1f%%', startangle=140)
plt.title('Marital Status Distribution')
plt.ylabel('')
plt.show()

Education Level Distribution

In [None]:
plt.figure()
df['education'].value_counts().plot(kind='bar', color='lightgreen')
plt.title('Education Level Distribution')
plt.xlabel('Education')
plt.ylabel('Number of Clients')
plt.xticks(rotation=45)
plt.show()

Default Credit Proportion

In [None]:
default_counts = df['default'].value_counts(normalize=True) * 100
fig, ax = plt.subplots(figsize=(8, 4))
ax.axis('off')
ax.text(0.5, 0.7, 'Default Credit Proportion', fontsize=30, weight='bold', ha='center')
ax.text(0.5, 0.45, f"No Default: {default_counts['no']:.2f}%", fontsize=20, ha='center', color='seagreen')
ax.text(0.5, 0.25, f"Has Default: {default_counts['yes']:.2f}%", fontsize=20, ha='center', color='salmon')
plt.show()

Average Yearly Balance Distribution

In [None]:
plt.figure()
sns.histplot(df['balance'], bins=50, kde=True, color='purple')
plt.title('Distribution of Average Yearly Balance')
plt.xlabel('Balance (Euros)')
plt.ylabel('Count')
plt.xlim(-5000, 10000)
plt.show()

Housing Loan Status

In [None]:
housing_counts = df['housing'].value_counts()

fig, ax = plt.subplots(figsize=(8, 4))
ax.axis('off')

ax.text(0.5, 0.7, 'Housing Loan Status', fontsize=30, weight='bold', ha='center')
ax.text(0.5, 0.45, f"Yes: {housing_counts['yes']:,} clients", fontsize=20, ha='center', color='steelblue')
ax.text(0.5, 0.25, f"No: {housing_counts['no']:,} clients", fontsize=20, ha='center', color='slategray')
plt.show()

Personal Loan Status

In [None]:
loan_counts = df['loan'].value_counts()
fig, ax = plt.subplots(figsize=(8,4))
ax.axis('off')
ax.text(0.5, 0.75, 'Personal Loan Status', fontsize=30, weight='bold', ha='center')
ax.text(0.5, 0.45, f"Yes: {loan_counts['yes']:,} clients", fontsize=20, ha='center', color='indianred')
ax.text(0.5, 0.25, f"No: {loan_counts['no']:,} clients", fontsize=20, ha='center', color='darkslategray')
plt.show()


Communication Types

In [None]:
plt.figure()
df['contact'].value_counts().plot(kind='bar', color='orange')
plt.title('Communication Types')
plt.xlabel('Contact Type')
plt.ylabel('Number of Clients')
plt.show()

Last Contact Day of the Month

In [None]:
plt.figure()
sns.histplot(df['day'], bins=31, kde=False, color='teal')
plt.title('Last Contact Day of the Month')
plt.xlabel('Day')
plt.ylabel('Count')
plt.show()

Last Contact Month

In [None]:
plt.figure()
month_order = ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
               'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
sns.countplot(data=df, x='month', order=month_order, hue='y', palette='husl')
plt.title('Last Contact Month Distribution by Subscription Outcome')
plt.xlabel('Month')
plt.ylabel('Count')
plt.show()

Last Contact Duration Distribution

In [None]:
plt.figure()
sns.histplot(df['duration'], bins=50, kde=True, color='coral')
plt.title('Distribution of Last Contact Duration (seconds)')
plt.xlabel('Duration')
plt.ylabel('Count')
plt.xlim(0, 1500)
plt.show()

Number of Contacts During Campaign

In [None]:
plt.figure()
sns.histplot(df['campaign'], bins=20, kde=False, color='dodgerblue')
plt.title('Number of Contacts During Campaign')
plt.xlabel('Number of Contacts')
plt.ylabel('Count')
plt.xlim(0, 50)
plt.show()

Days Since Last Contact

In [None]:
plt.figure()
sns.histplot(df['pdays'], bins=30, kde=False, color='lightcoral')
plt.title('Days Since Last Contact from Previous Campaign')
plt.xlabel('Days')
plt.ylabel('Count')
plt.xlim(-2, 400)
plt.show()

Previous Contacts Count

In [None]:
df['previous'].value_counts().sort_index().plot(kind='bar', color='mediumseagreen')
plt.title('Distribution of Previous Contacts')
plt.xlabel('Number of Previous Contacts')
plt.ylabel('Number of Clients')
plt.xlim(-1, 15)  # Optional: zoom to lower range
plt.show()

Previous Campaign Outcomes

In [None]:
plt.figure()
df['poutcome'].value_counts().plot(kind='bar', color='slateblue')
plt.title('Previous Campaign Outcomes')
plt.xlabel('Outcome')
plt.ylabel('Count')
plt.show()

Term Deposit Subscription Distribution

In [None]:
plt.figure()
df['y'].value_counts().plot(kind='pie', autopct='%1.1f%%', colors=['lightgray', 'lightblue'], startangle=90)
plt.title('Term Deposit Subscription Distribution')
plt.ylabel('')
plt.show()

Correlation Matrix

In [None]:
plt.figure()
numeric_cols = ['age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous']
corr_matrix = df[numeric_cols].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix of Numeric Features')
plt.show()

Correlation with Term Deposit Subscription

In [None]:
df['y_binary'] = df['y'].map({'no': 0, 'yes': 1})
correlations = df.corr(numeric_only=True)['y_binary'].sort_values(ascending=False)
fig, ax = plt.subplots(figsize=(8,4))
ax.axis('off')
ax.text(0.5, 0.7, 'Correlation with Term Deposit Subscription', fontsize=25, weight='bold', ha='center')
y_pos = 0.6
for feature, corr in correlations.items():
    ax.text(0.15, y_pos, f"{feature}", fontsize=17, weight='bold', ha='left')
    ax.text(0.85, y_pos, f"{corr:.3f}", fontsize=17, ha='right')
    y_pos -= 0.07

plt.show()
