In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("../data/retail_sales_dataset.csv")
df['Date'] = pd.to_datetime(df['Date'])
df.columns = df.columns.str.replace(' ', '_')

In [2]:
df.head(10)

Unnamed: 0,Transaction_ID,Date,Customer_ID,Gender,Age,Product_Category,Quantity,Price_per_Unit,Total_Amount
0,1,2023-11-24,CUST001,Male,34,Beauty,3,50,150
1,2,2023-02-27,CUST002,Female,26,Clothing,2,500,1000
2,3,2023-01-13,CUST003,Male,50,Electronics,1,30,30
3,4,2023-05-21,CUST004,Male,37,Clothing,1,500,500
4,5,2023-05-06,CUST005,Male,30,Beauty,2,50,100
5,6,2023-04-25,CUST006,Female,45,Beauty,1,30,30
6,7,2023-03-13,CUST007,Male,46,Clothing,2,25,50
7,8,2023-02-22,CUST008,Male,30,Electronics,4,25,100
8,9,2023-12-13,CUST009,Male,63,Electronics,2,300,600
9,10,2023-10-07,CUST010,Female,52,Clothing,4,50,200


In [3]:
daily_sales = (
    df.groupby(df['Date'].dt.date)['Total_Amount']
    .sum()
    .reset_index()
)

daily_sales.columns = ['Date', 'Total_Amount']
daily_sales.head()


Unnamed: 0,Date,Total_Amount
0,2023-01-01,3600
1,2023-01-02,1765
2,2023-01-03,600
3,2023-01-04,1240
4,2023-01-05,1100


In [4]:
window = 7  # 7-day rolling window

daily_sales['Rolling_Mean'] = daily_sales['Total_Amount'].rolling(window).mean()
daily_sales['Rolling_Std'] = daily_sales['Total_Amount'].rolling(window).std()


In [5]:
daily_sales['Z_Score'] = (
    (daily_sales['Total_Amount'] - daily_sales['Rolling_Mean']) 
    / daily_sales['Rolling_Std']
)

daily_sales.head(10)


Unnamed: 0,Date,Total_Amount,Rolling_Mean,Rolling_Std,Z_Score
0,2023-01-01,3600,,,
1,2023-01-02,1765,,,
2,2023-01-03,600,,,
3,2023-01-04,1240,,,
4,2023-01-05,1100,,,
5,2023-01-06,620,,,
6,2023-01-07,150,1296.428571,1141.974585,-1.0039
7,2023-01-08,625,871.428571,533.015903,-0.462329
8,2023-01-09,200,647.857143,409.693556,-1.093152
9,2023-01-10,230,595.0,439.668436,-0.830171


In [6]:
threshold = 2  # standard practice

daily_sales['Anomaly'] = daily_sales['Z_Score'].abs() > threshold
daily_sales[daily_sales['Anomaly'] == True]


Unnamed: 0,Date,Total_Amount,Rolling_Mean,Rolling_Std,Z_Score,Anomaly
11,2023-01-13,1930,576.428571,628.20322,2.154671,True
62,2023-03-07,3040,973.571429,950.10839,2.17494,True
85,2023-04-01,4400,1201.428571,1492.229077,2.143486,True
119,2023-05-08,4200,1330.0,1351.237088,2.12398,True
127,2023-05-16,7260,1875.714286,2500.545655,2.153244,True
134,2023-05-23,8455,2691.428571,2720.898617,2.11826,True
161,2023-06-19,2945,1020.714286,953.399307,2.018342,True
166,2023-06-24,6220,1688.571429,2228.472411,2.033424,True
183,2023-07-14,5125,1180.0,1907.500546,2.068151,True
203,2023-08-05,5205,1197.142857,1830.846473,2.189073,True
