<a href="https://colab.research.google.com/github/Mainuddinchati/Demo_project1/blob/main/Retail_sales.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Retail sales**

**1.Load and Inspect the Dataset**

In [None]:
import numpy as np
import pandas as pd

In [None]:
df=pd.read_csv('/content/retailsales.csv.csv')

In [None]:
df

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

**2.Data Cleaning**

In [None]:
df.isnull().sum()


In [None]:
df.duplicated().sum()


In [None]:
df.drop_duplicates(inplace=True)


In [None]:
df['Date']=pd.to_datetime(df['Date'])

In [None]:
df.info()

In [None]:
df['Revenue']=df['Quantity']*df['Price per Unit']

In [None]:
print(df['Revenue'])

# **3.Exploratory Data Analysis(EDA)**

In [None]:
df.set_index('Date').resample('M')['Total Amount'].sum().plot(kind='line', title='Total Sales Over Time')


In [None]:
df.groupby('Product Category')['Total Amount'].sum().plot(kind='bar', title='Sales by Product Category')


In [None]:
top_customers = df.groupby('Customer ID')['Total Amount'].sum().sort_values(ascending=False).head(10)


In [None]:
print(top_customers)

In [None]:
df['Gender'].value_counts().plot(kind='pie', autopct='%1.1f%%', title='Customer Gender Distribution')


In [None]:
df['Age'].plot(kind='hist', bins=10, title='Customer Age Distribution')


In [None]:
bins = [20, 30, 40, 50, 60, 70]
labels = ['20-30', '30-40', '40-50', '50-60', '60-70']
df['Age Group'] = pd.cut(df['Age'], bins=bins, labels=labels)
df.groupby('Age Group')['Total Amount'].sum().plot(kind='bar', title='Sales by Age Group')


In [None]:
top_products = df.groupby('Product Category')['Quantity'].sum().sort_values(ascending=False)


In [None]:
print(top_products)

In [None]:
df['Quantity'].plot(kind='hist', bins=10, title='Quantity Distribution per Transaction')


## **4.Time Series Analysis**

In [None]:
df.set_index('Date').resample('M')['Total Amount'].sum().plot(title='Monthly Sales Trend')


In [None]:
import matplotlib.pyplot as plt


df.set_index('Date')['Total Amount'].groupby(pd.Grouper(freq='M')).sum().plot(kind='bar', title='Sales by Month')

# **5.Customer Segmentation**

In [None]:
# Recency
latest_date = df['Date'].max()
recency = df.groupby('Customer ID')['Date'].apply(lambda x: (latest_date - x.max()).days)

# Frequency
frequency = df.groupby('Customer ID')['Transaction ID'].count()

# Monetary
monetary = df.groupby('Customer ID')['Total Amount'].sum()

# Combine RFM metrics
rfm = pd.DataFrame({'Recency': recency, 'Frequency': frequency, 'Monetary': monetary})


In [None]:
print(recency)

In [None]:
print(frequency)

In [None]:
print(monetary)

In [None]:
print(rfm)

# **6.Market Basket Analysis**

In [None]:
from mlxtend.frequent_patterns import apriori, association_rules

basket = df.pivot_table(index='Transaction ID', columns='Product Category', values='Quantity', aggfunc='sum', fill_value=0)

basket = basket.applymap(lambda x: 1 if x > 0 else 0)

frequent_itemsets = apriori(basket, min_support=0.05, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules.head()

In [None]:
print(basket)

In [None]:
print(frequent_itemsets)

In [None]:
print(rules)

In [None]:
rules.head()

# **7.Sales Forecasting**

In [None]:
!pip install prophet
from prophet import Prophet

df_forecast = df[['Date', 'Total Amount']].rename(columns={'Date': 'ds', 'Total Amount': 'y'})

model = Prophet()
model.fit(df_forecast)

future = model.make_future_dataframe(periods=90)
forecast = model.predict(future)
model.plot(forecast)