# Exploratory Data Analysis

This notebook is used for exploratory data analysis (EDA) to visualize data distributions and relationships. It will help in understanding the dataset and guide further modeling decisions.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
sns.set(style='whitegrid')

# Load dataset
data = pd.read_csv('path/to/your/dataset.csv')  # Update with your dataset path

# Display the first few rows of the dataset
data.head()

In [None]:
# Summary statistics
data.describe()

In [None]:
# Visualize data distributions
plt.figure(figsize=(12, 6))
sns.histplot(data['column_name'], bins=30, kde=True)  # Replace 'column_name' with your column
plt.title('Distribution of Column Name')
plt.xlabel('Column Name')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Visualize relationships between features
plt.figure(figsize=(12, 6))
sns.scatterplot(x='feature1', y='feature2', data=data)  # Replace with your feature names
plt.title('Relationship between Feature1 and Feature2')
plt.xlabel('Feature1')
plt.ylabel('Feature2')
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(10, 8))
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', square=True)
plt.title('Correlation Heatmap')
plt.show()