<a href="https://colab.research.google.com/github/Mehaboob999/notebook-rough/blob/main/05_anomaly_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest


In [None]:
# Load cleaned dataset
df = pd.read_csv('/content/cleaned_sales.csv')
df.head()



In [None]:
df['order_date'] = pd.to_datetime(df['order_date'])

# Group by month
monthly = df.groupby(pd.Grouper(key='order_date', freq='M'))[['revenue', 'profit']].sum().reset_index()
monthly.head()


In [None]:
# Isolation Forest works well for time-based outlier detection
model = IsolationForest(contamination=0.05, random_state=42)
monthly['anomaly'] = model.fit_predict(monthly[['revenue', 'profit']])

# -1 means anomaly
monthly['anomaly_flag'] = monthly['anomaly'].apply(lambda x: 'Anomaly' if x == -1 else 'Normal')


In [None]:
plt.figure(figsize=(12,6))
sns.lineplot(data=monthly, x='order_date', y='revenue', label='Revenue')
sns.scatterplot(data=monthly[monthly.anomaly_flag=='Anomaly'], x='order_date', y='revenue', color='red', label='Anomaly')
plt.title("Revenue Over Time with Anomalies")
plt.show()


In [None]:
plt.figure(figsize=(12,6))
sns.lineplot(data=monthly, x='order_date', y='profit', label='Profit')
sns.scatterplot(data=monthly[monthly.anomaly_flag=='Anomaly'], x='order_date', y='profit', color='orange', label='Anomaly')
plt.title("Profit Over Time with Anomalies")
plt.show()


In [None]:
monthly.to_csv('../data/anomalies.csv', index=False)
print("✅ anomalies.csv saved!")
