In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

file_path = "retail_sales_dataset.csv" 
data = pd.read_csv(file_path)

print("Dataset Overview:")
print(data.head())

# Check for missing values
print("\nMissing Values:")
print(data.isnull().sum())

# Data summary statistics
print("\nSummary Statistics:")
print(data.describe())

# Data visualization
# 1.Quantity distribution
plt.figure(figsize=(8, 6))
sns.histplot(data['Quantity'], kde=True, bins=30, color='blue')
plt.title('Distribution of Quantity')
plt.xlabel('price')
plt.ylabel('total amount')
plt.show()

# 2. Price  distribution
plt.figure(figsize=(8, 6))
sns.histplot(data['Price per Unit'], kde=True, bins=30, color='green')
plt.title('Distribution of price')
plt.xlabel('Price')
plt.ylabel('product')
plt.show()


plt.figure(figsize=(8, 6))
sns.barplot(x=data['Price per Unit'], y=data['Total Amount'], estimator=sum, ci=None, palette='muted')
plt.title('Total amout by price per unit')
plt.xlabel('price')
plt.ylabel('Total Amount')
plt.show()

# 4. Profit by Region
plt.figure(figsize=(8, 6))
sns.boxplot(x=data['Quantity'], y=data['Age'], palette='coolwarm')
plt.title('Quantity by  Age')
plt.xlabel('Quantity')
plt.ylabel('Age')
plt.show()

# 5. Correlation heatmap
numeric_data = data.select_dtypes(include=['number'])
corr_matrix = numeric_data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()

# 6. Sales trend over time
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
data = data.dropna(subset=['Date'])
sales_trend = data.groupby(data['Date'].dt.to_period('M'))[['Price per Unit', 'Total Amount']].sum().reset_index()
sales_trend['Date'] = sales_trend['Date'].dt.to_timestamp()
plt.figure(figsize=(10, 5))
plt.plot(sales_trend['Date'], sales_trend['Date'], marker='o', label='Sales', color='blue')
plt.title('profit Trend Over Time')
plt.xlabel('price per unit')
plt.ylabel('total amount')
plt.grid(True)
plt.legend()
plt.show()

# Key Insights
print("\nKey Insights:")
print("1. Analyze which categories contribute most to sales and profits.")
print("2. Check if specific regions have higher profitability.")
print("3. Understand seasonal trends in sales to align inventory and promotions.")
print("4. Explore correlations between sales, profit, and other factors for actionable insights.")