# Exploratory Data Analysis (EDA)
This notebook explores the credit scoring dataset.

In [None]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scripts.config import RAW_DATA_DIR
from scripts.generate_sample_data import generate_sample_dataset

# Ensure dataset exists
data_file = 'credit_scoring_data.csv'
data_path = os.path.join(RAW_DATA_DIR, data_file)
if not os.path.exists(data_path):
    generate_sample_dataset(n_samples=1000)

df = pd.read_csv(data_path)
df.head()

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
target = 'credit_score'
sns.countplot(x=target, data=df)
plt.title('Target Distribution')
plt.show()

In [None]:
numeric_cols = df.select_dtypes(include=['int64','float64']).columns
df[numeric_cols].hist(bins=30, figsize=(12,10))
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,8))
corr = df.corr(numeric_only=True)
sns.heatmap(corr, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap')
plt.show()