# 🛍️ Retail Sales Analysis

This project explores a real-world sales dataset from a retail store, using Python for data analysis and visualization. The goal is to uncover insights about customer behavior, product performance and transaction trends.

## 📦 Dataset Features
- Invoice ID, Branch, City, Customer Type, Gender
- Product line, Unit Price, Quantity, Tax, Total
- Date, Time, Payment Method
- Cost of Goods Sold (COGS), Gross Margin, Gross Income
- Rating


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv("Dataset.csv")

# Display first 5 rows
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
df['Gender'].value_counts()

In [None]:
df['Customer type'].value_counts()

In [None]:
df['Payment'].value_counts()

In [None]:
sns.set(style="whitegrid")
sns.countplot(data=df, x='Gender', palette='Set2')
plt.title('Gender Distribution')
plt.show()

In [None]:
sns.countplot(data=df, x='Payment', palette='Set1')
plt.title('Payment Method Distribution')
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(data=df, x='Product line', y='Rating', palette='coolwarm')
plt.title('Average Rating by Product Line')
plt.xticks(rotation=45)
plt.show()

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['Time'] = pd.to_datetime(df['Time']).dt.time
df['Day'] = df['Date'].dt.day_name()
df['Month'] = df['Date'].dt.month_name()

In [None]:
plt.figure(figsize=(8,5))
sns.countplot(data=df, x='Day', order=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'])
plt.title('Transactions by Day of the Week')
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='YlGnBu', linewidths=0.5)
plt.title('Correlation Matrix of Numerical Features')
plt.show()

In [None]:
daily_sales = df.groupby('Date').agg({'Total':'sum', 'gross income':'sum'}).reset_index()

plt.figure(figsize=(12,6))
plt.plot(daily_sales['Date'], daily_sales['Total'], label='Total Sales', color='blue')
plt.plot(daily_sales['Date'], daily_sales['gross income'], label='Gross Income', color='green')
plt.xlabel('Date')
plt.ylabel('Amount')
plt.title('Daily Sales & Gross Income Trend')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
top_products = df.groupby('Product line')['Total'].sum().sort_values(ascending=False)

plt.figure(figsize=(10,5))
sns.barplot(x=top_products.index, y=top_products.values, palette='viridis')
plt.title('Total Sales by Product Line')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.show()

In [None]:
branch_income = df.groupby('Branch')['gross income'].sum().sort_values(ascending=False)

plt.figure(figsize=(8,5))
sns.barplot(x=branch_income.index, y=branch_income.values, palette='magma')
plt.title('Total Gross Income by Branch')
plt.ylabel('Gross Income')
plt.show()

In [None]:
plt.figure(figsize=(8,5))
sns.boxplot(data=df, x='Payment', y='Rating', palette='Set3')
plt.title('Customer Ratings by Payment Method')
plt.show()