# Data Exploration - Lending Club Dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
# Load Data
data_path = '../data/raw/LendingClub_data.csv'
if not os.path.exists(data_path):
    print("Dataset not found. Please ensure 'LendingClub_data.csv' is in 'data/raw/'.")
else:
    df = pd.read_csv(data_path)
    print(f"Dataset Loaded: {df.shape}")
    print(df.head())

In [None]:
# Basic Info
print(df.info())
print(df.describe())

In [None]:
# Check Target Distribution (Loan Status)
plt.figure(figsize=(10, 6))
sns.countplot(y='loan_status', data=df)
plt.title('Loan Status Distribution')
plt.show()

In [None]:
# FICO Score Distribution
plt.figure(figsize=(10, 6))
sns.histplot(df['fico_range_low'].dropna(), bins=30, kde=True)
plt.title('FICO Score Distribution')
plt.xlabel('FICO Score')
plt.show()

In [None]:
# Correlation Matrix (Numeric)
numeric_df = df.select_dtypes(include=['float64', 'int64'])
plt.figure(figsize=(12, 10))
sns.heatmap(numeric_df.corr(), cmap='coolwarm', annot=False)
plt.title('Correlation Matrix')
plt.show()