In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better looking plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


In [None]:
# Create sample data
np.random.seed(42)
n_samples = 1000

data = {
    'category': np.random.choice(['A', 'B', 'C', 'D'], n_samples),
    'value1': np.random.normal(50, 15, n_samples),
    'value2': np.random.exponential(2, n_samples),
    'date': pd.date_range('2023-01-01', periods=n_samples, freq='D')
}

df = pd.DataFrame(data)
df.head()


In [None]:
# Display basic statistics
print("Dataset shape:", df.shape)
print("\nBasic statistics:")
df.describe()


In [None]:
# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Distribution of categories
df['category'].value_counts().plot(kind='bar', ax=axes[0,0])
axes[0,0].set_title('Category Distribution')
axes[0,0].set_ylabel('Count')

# Histogram of value1
axes[0,1].hist(df['value1'], bins=30, alpha=0.7)
axes[0,1].set_title('Distribution of Value1')
axes[0,1].set_xlabel('Value1')
axes[0,1].set_ylabel('Frequency')

# Box plot by category
df.boxplot(column='value1', by='category', ax=axes[1,0])
axes[1,0].set_title('Value1 by Category')

# Scatter plot
axes[1,1].scatter(df['value1'], df['value2'], alpha=0.5)
axes[1,1].set_xlabel('Value1')
axes[1,1].set_ylabel('Value2')
axes[1,1].set_title('Value1 vs Value2')

plt.tight_layout()
plt.show()
