# Synthetic Financial Dataset â€“ Distribution & Skewness Analysis

## Load Dataset

In [None]:

import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("synthetic_financial_dataset.csv")
df.head()


## Descriptive Statistics

In [None]:

df.describe()


## Distribution Check (Histogram + KDE)

In [None]:

numeric_cols = ["age", "monthly_income", "experience_years", "credit_score"]

for col in numeric_cols:
    plt.figure()
    df[col].plot(kind="hist", bins=30, density=True, alpha=0.6)
    df[col].plot(kind="kde")
    plt.title(f"Distribution of {col}")
    plt.xlabel(col)
    plt.show()


## Handling Skewness

Log transformation applied to positively skewed features.

In [None]:

import numpy as np

df["monthly_income_log"] = np.log1p(df["monthly_income"])
df["experience_years_log"] = np.log1p(df["experience_years"])

df[["monthly_income", "monthly_income_log",
    "experience_years", "experience_years_log"]].describe()


## Distribution After Transformation

In [None]:

for col in ["monthly_income_log", "experience_years_log"]:
    plt.figure()
    df[col].plot(kind="hist", bins=30, density=True, alpha=0.6)
    df[col].plot(kind="kde")
    plt.title(f"Distribution of {col}")
    plt.xlabel(col)
    plt.show()
