# Exploratory Data Analysis (EDA) - Xente Dataset

Exploration of the Xente dataset for credit risk modeling.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('data/raw/xente_dataset.csv')

# Overview
print('Dataset Info:')
print(df.info())
print('\nSummary Statistics:')
print(df.describe())

# Numerical feature distribution
plt.figure(figsize=(10, 6))
sns.histplot(df['Amount'], bins=50)
plt.title('Distribution of Amount')
plt.show()

# Categorical feature distribution
plt.figure(figsize=(10, 6))
sns.countplot(x='ProductCategory', data=df)
plt.xticks(rotation=45)
plt.title('Product Category Distribution')
plt.show()

# Correlation analysis
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# Missing values
print('\nMissing Values:')
print(df.isnull().sum())

# Outlier detection
plt.figure(figsize=(10, 6))
sns.boxplot(x=df['Amount'])
plt.title('Box Plot of Amount')
plt.show()