# Data Exploration Notebook

This notebook is used for exploratory data analysis (EDA) on the dataset. It includes loading the data, visualizing it, and performing initial analyses to understand the data better.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
sns.set(style='whitegrid')

# Load the dataset
data_path = '../data/raw/your_dataset.csv'  # Update with your dataset path
data = pd.read_csv(data_path)

# Display the first few rows of the dataset
data.head()

In [None]:
# Summary statistics
data.describe()

In [None]:
# Check for missing values
missing_values = data.isnull().sum()
missing_values[missing_values > 0]

In [None]:
# Visualize the distribution of a specific feature
plt.figure(figsize=(10, 6))
sns.histplot(data['your_feature'], bins=30, kde=True)
plt.title('Distribution of Your Feature')
plt.xlabel('Your Feature')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Correlation matrix
plt.figure(figsize=(12, 8))
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()