In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the cleaned data (assuming it's in a CSV file)
cleaned_data = pd.read_csv('cleaned_data.csv')

# Step 3: Exploratory Data Analysis (EDA)

# 3.1 Summary Statistics
# Get summary statistics for numeric columns
summary_stats = cleaned_data.describe()

# 3.2 Data Visualization
# Create visualizations to explore the data
# Example: Histogram of customer ages
plt.figure(figsize=(8, 6))
sns.histplot(cleaned_data['age'], bins=20, kde=True)
plt.title('Distribution of Customer Ages')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()

# Example: Box plot to visualize product purchase amounts
plt.figure(figsize=(8, 6))
sns.boxplot(x='product_category', y='purchase_amount', data=cleaned_data)
plt.title('Box Plot of Purchase Amounts by Product Category')
plt.xlabel('Product Category')
plt.ylabel('Purchase Amount')
plt.xticks(rotation=45)
plt.show()

# 3.3 Correlation Analysis
# Compute and visualize correlations between numeric variables
correlation_matrix = cleaned_data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()

# 3.4 Categorical Data Analysis
# Analyze categorical variables using bar plots or count plots
# Example: Count plot of customer genders
plt.figure(figsize=(8, 6))
sns.countplot(x='gender', data=cleaned_data)
plt.title('Count of Customers by Gender')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()

# 3.5 Time Series Analysis (if applicable)
# If your data contains time-related information, analyze it using time series plots.

# 3.6 Further Analysis
# Conduct any additional analyses as per your project's requirements and objectives.

# EDA can involve many other types of visualizations and statistical tests,
# depending on the nature of your data and the questions you want to answer.

# Save the EDA results or visualizations as needed for your report or presentation.
