In [None]:
import pandas as pd
import numpy as np
from scipy import stats

def load_and_preprocess_data(file_path):
    # Load data
    df = pd.read_csv('/content/drive/MyDrive/sam.csv')

    # Convert date to datetime
    df['date'] = pd.to_datetime(df['date'])

    # Handle missing values (if any)
    df = df.dropna()

    return df

def calculate_statistics(df):
    # Calculate statistics by category
    stats_by_category = df.groupby('category')['amount'].agg(['mean', 'median', 'std'])
    return stats_by_category

def detect_anomalies(df, stats_by_category):
    anomalies = []

    for _, row in df.iterrows():
        category_stats = stats_by_category.loc[row['category']]

        # Z-score method for amount anomalies
        z_score = (row['amount'] - category_stats['mean']) / category_stats['std']

        if abs(z_score) > 1:  # Using 1 standard deviations as threshold
            anomalies.append({
                'transaction_id': row['transaction_id'],
                'date': row['date'],
                'category': row['category'],
                'amount': row['amount'],
                'reason_for_anomaly': 'Unusual amount (Z-score: {:.2f})'.format(z_score)
            })

    return anomalies

def generate_report(anomalies):
    print("Anomaly Report:")
    print("=" * 80)
    for anomaly in anomalies:
        print(f"Transaction ID: {anomaly['transaction_id']}")
        print(f"Date: {anomaly['date']}")
        print(f"Category: {anomaly['category']}")
        print(f"Amount: ${anomaly['amount']:.2f}")
        print(f"Reason: {anomaly['reason_for_anomaly']}")
        print("-" * 80)

    print(f"\nTotal anomalies detected: {len(anomalies)}")

def main():
    # Assume the data is in a CSV file named 'transactions.csv'
    file_path = '/content/drive/MyDrive/sam.csv'

    # Load and preprocess data
    df = load_and_preprocess_data('/content/drive/MyDrive/sam.csv')

    # Calculate statistics
    stats_by_category = calculate_statistics(df)

    # Detect anomalies
    anomalies = detect_anomalies(df, stats_by_category)

    # Generate report
    generate_report(anomalies)

if __name__ == "__main__":
    main()

Anomaly Report:
Transaction ID: TRX004
Date: 2024-06-02 00:00:00
Category: Food
Amount: $3000.00
Reason: Unusual amount (Z-score: 1.15)
--------------------------------------------------------------------------------

Total anomalies detected: 1
