In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 200)

print("Libraries loaded successfully.")


In [None]:
trader_df = pd.read_csv("../data/raw/historical_trader_data.csv")
sentiment_df = pd.read_csv("../data/raw/fear_greed_index.csv")

print("Trader Data Shape:", trader_df.shape)
print("Sentiment Data Shape:", sentiment_df.shape)


In [None]:
display(trader_df.head())
display(sentiment_df.head())


In [None]:
print("=== Trader Data Schema ===")
print(trader_df.dtypes)

print("\n=== Sentiment Data Schema ===")
print(sentiment_df.dtypes)


In [None]:
def profile_dataframe(df, name):
    print(f"\n===== {name} PROFILE =====")
    print("Rows:", df.shape[0])
    print("Columns:", df.shape[1])
    print("\nNull Values:\n", df.isnull().sum())
    print("\nUnique Values:\n", df.nunique())

profile_dataframe(trader_df, "TRADER DATA")
profile_dataframe(sentiment_df, "SENTIMENT DATA")


In [None]:
# Convert time columns
trader_df["time"] = pd.to_datetime(trader_df["time"], errors="coerce")
sentiment_df["Date"] = pd.to_datetime(sentiment_df["Date"], errors="coerce")

print("Trader Time Range:", trader_df["time"].min(), "→", trader_df["time"].max())
print("Sentiment Date Range:", sentiment_df["Date"].min(), "→", sentiment_df["Date"].max())


In [None]:
plt.figure(figsize=(10,4))
sns.histplot(trader_df["execution price"], bins=100)
plt.title("Execution Price Distribution")
plt.show()

plt.figure(figsize=(10,4))
sns.histplot(trader_df["size"], bins=100)
plt.title("Trade Size Distribution")
plt.show()


In [None]:
plt.figure(figsize=(6,4))
sentiment_df["Classification"].value_counts().plot(kind="bar")
plt.title("Fear vs Greed Distribution")
plt.show()


In [None]:
data_quality = {
    "Trader Rows": trader_df.shape[0],
    "Trader Columns": trader_df.shape[1],
    "Sentiment Rows": sentiment_df.shape[0],
    "Sentiment Columns": sentiment_df.shape[1],
    "Trader Null %": (trader_df.isnull().sum().sum() / (trader_df.shape[0]*trader_df.shape[1]))*100,
    "Sentiment Null %": (sentiment_df.isnull().sum().sum() / (sentiment_df.shape[0]*sentiment_df.shape[1]))*100
}

pd.DataFrame.from_dict(data_quality, orient="index", columns=["Value"])
