# 🔥 FireDucks vs. Pandas: EDA Benchmark Notebook

This notebook compares **Pandas** and **FireDucks** on a synthetic dataset with 1 million rows.

In [None]:
import pandas as pd
import fireducks as fd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import time

# Enable FireDucks benchmark mode
fd.enable_benchmark()


## 📥 Load the Dataset

In [None]:
# Load CSV with Pandas
start_time = time.time()
df_pd = pd.read_csv("large_dataset.csv")
print(f"Pandas Load Time: {time.time() - start_time:.2f} seconds")

# Load CSV with FireDucks
start_time = time.time()
df_fd = fd.read_csv("large_dataset.csv")
print(f"FireDucks Load Time: {time.time() - start_time:.2f} seconds")


## 📊 Summary Statistics

In [None]:
# Pandas
df_pd.describe()


In [None]:
# FireDucks
df_fd.describe()


## 🔍 GroupBy Aggregation

In [None]:
# Pandas
df_pd.groupby("category")["sales"].mean()


In [None]:
# FireDucks
df_fd.groupby("category").mean("sales")


## 📈 Visualizations

In [None]:
# Plotly Histogram using Pandas
fig = px.histogram(df_pd, x="price", title="Price Distribution - Pandas")
fig.show()


In [None]:
# Plotly Histogram using FireDucks
fig = px.histogram(df_fd, x="price", title="Price Distribution - FireDucks")
fig.show()
