In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:

# Load the dataset
data = pd.read_csv('retail_sales_dataset.csv')

# Data Cleaning
data = data.dropna()  # Dropping missing values
data['Date'] = pd.to_datetime(data['Date'])

# Ensure numerical columns are of appropriate data type
data['Quantity'] = data['Quantity'].astype(float)
data['Price per Unit'] = data['Price per Unit'].astype(float)
data['Total Amount'] = data['Total Amount'].astype(float)

In [None]:
# Descriptive Statistics
mean_sales = data['Total Amount'].mean()
median_sales = data['Total Amount'].median()
mode_sales = data['Total Amount'].mode()[0]
std_sales = data['Total Amount'].std()

print(f"Mean Sales: {mean_sales}")
print(f"Median Sales: {median_sales}")
print(f"Mode Sales: {mode_sales}")
print(f"Standard Deviation of Sales: {std_sales}")

In [None]:
# Time Series Analysis
plt.figure(figsize=(12, 6))
plt.plot(data['Date'], data['Total Amount'])
plt.title('Sales Over Time')
plt.xlabel('Date')
plt.ylabel('Total Amount')
plt.show()

monthly_sales = data.set_index('Date').resample('M')['Total Amount'].sum()

plt.figure(figsize=(12, 6))
plt.plot(monthly_sales)
plt.title('Monthly Sales Over Time')
plt.xlabel('Date')
plt.ylabel('Total Amount')
plt.show()

In [None]:
# Customer and Product Analysis
customer_age_distribution = data['Age'].value_counts()
customer_gender_distribution = data['Gender'].value_counts()

customer_age_distribution.plot(kind='bar')
plt.title('Customer Age Distribution')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()

customer_gender_distribution.plot(kind='bar')
plt.title('Customer Gender Distribution')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()

purchase_frequency = data['Customer ID'].value_counts()
average_spending = data.groupby('Customer ID')['Total Amount'].mean()
popular_products = data['Product Category'].value_counts().head(10)

purchase_frequency.plot(kind='bar')
plt.title('Purchase Frequency')
plt.xlabel('Customer ID')
plt.ylabel('Frequency')
plt.show()

average_spending.plot(kind='bar')
plt.title('Average Spending Per Customer')
plt.xlabel('Customer ID')
plt.ylabel('Average Spending')
plt.show()

popular_products.plot(kind='bar')
plt.title('Top 10 Popular Products')
plt.xlabel('Product Category')
plt.ylabel('Count')
plt.show()

In [None]:
# Visualization - Heatmap for correlation analysis
numeric_data = data[['Quantity', 'Price per Unit', 'Total Amount']]
corr = numeric_data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Recommendations
recommendations = """
1. Increase marketing efforts during peak sales seasons identified in the time series analysis.
2. Target marketing campaigns based on customer demographics, focusing on the most profitable customer segments.
3. Stock up on the top-selling products identified in the product analysis to prevent stockouts.
4. Implement loyalty programs to increase purchase frequency among customers with high average spending.
"""

print(recommendations)
