In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('../data/credit_data.csv')

# Display first few rows
df.head()

# Rename target column
df = df.rename(columns={'default.payment.next.month': 'default'})

# Basic information about the dataset
df.info()

# Statistical summary
df.describe()

# Check for missing values
df.isnull().sum()

# Distribution of the target variable
sns.countplot(x='default', data=df)
plt.title('Distribution of Default Variable')
plt.show()

# Correlation matrix
corr_matrix = df.corr()
plt.figure(figsize=(12,10))
sns.heatmap(corr_matrix, annot=False, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# Histograms of numerical features
df.hist(figsize=(20,20))
plt.show()

# Box plots to detect outliers
numerical_features = ['LIMIT_BAL', 'AGE', 'BILL_AMT1', 'PAY_AMT1']
for feature in numerical_features:
    plt.figure(figsize=(8,4))
    sns.boxplot(x=df[feature])
    plt.title(f'Boxplot of {feature}')
    plt.show()
