## Isolation Forest for Anomaly Detection
**Objective**: Understand and apply the Isolation Forest algorithm to identify anomalies in datasets.

### Task: Anomaly Detection in Financial Transactions
**Steps**:
1. Import Libraries
2. Load Dataset
3. Apply Isolation Forest
4. Visualize Anomalies

In [None]:
# write your code from here
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt
import seaborn as sns

def detect_anomalies_isolation_forest(df, contamination=0.05):
    """
    Detects anomalies in a financial transaction dataset using the Isolation Forest algorithm.

    Args:
        df (pd.DataFrame): The input DataFrame containing financial transaction data.
        contamination (float, optional): The proportion of outliers in the data set. Defaults to 0.05.

    Returns:
        pd.DataFrame: The input DataFrame with an added 'anomaly' column, where 1 indicates an anomaly and 0 indicates a normal transaction.  Returns an empty DataFrame if an error occurs.
    """

    if df.empty:
        raise ValueError("Input DataFrame is empty.")

    if not all(isinstance(col, (int, float)) for col in df.columns):
        raise ValueError("All columns must be numeric.")
    
    try:
        # Apply Isolation Forest
        model = IsolationForest(contamination=contamination, random_state=42)  # Added random_state for reproducibility
        model.fit(df)
        anomaly_scores = model.decision_function(df)
        anomaly_labels = model.predict(df)
        anomaly_labels = [0 if label == 1 else 1 for label in anomaly_labels]
        
        df['anomaly'] = anomaly_labels
        df['anomaly_score'] = anomaly_scores

        return df
    except Exception as e:
        print(f"Error during anomaly detection: {e}")
        return pd.DataFrame()  

def visualize_anomalies(df, x_col, y_col, title="Anomaly Detection using Isolation Forest"):
    """
    Visualizes anomalies detected by the Isolation Forest algorithm.

    Args:
        df (pd.DataFrame): The DataFrame containing the data and the 'anomaly' column.
        x_col (str): The name of the column to be used for the x-axis.
        y_col (str): The name of the column to be used for the y-axis.
        title (str): Title of the plot
    """
    if df.empty:
        print("Warning: Empty DataFrame. No anomalies to visualize.")
        return
    if 'anomaly' not in df.columns:
        raise KeyError("The DataFrame must contain an 'anomaly' column.")
    if not all(col in df.columns for col in [x_col, y_col]):
        raise KeyError(f"Columns '{x_col}' and '{y_col}' must be present in the DataFrame.")
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=df[x_col], y=df[y_col], hue=df['anomaly'], palette={0: 'blue', 1: 'red'})
    plt.title(title)
    plt.xlabel(x_col)
    plt.ylabel(y_col)
    plt.legend(title='Anomaly', labels=['Normal', 'Anomaly'])
    plt.show()

if __name__ == "__main__":
    data = {
        'TransactionAmount': [10, 20, 15, 25, 1000, 30, 12, 18, 500, 22],
        'TransactionCount': [2, 3, 2, 4, 1, 3, 2, 3, 1, 3],
        'AccountBalance': [1000, 2500, 1200, 3000, 500, 1800, 950, 2100, 300, 1550]
    }
    df = pd.DataFrame(data)
    try:
        df_with_anomalies = detect_anomalies_isolation_forest(df.copy(), contamination=0.1) 
    except (ValueError, KeyError, TypeError) as e:
        print(f"Error: {e}")
        df_with_anomalies = pd.DataFrame() 
    
    if not df_with_anomalies.empty:
        try:
            visualize_anomalies(df_with_anomalies, 'TransactionAmount', 'TransactionCount', title='Isolation Forest Anomaly Detection')
            visualize_anomalies(df_with_anomalies, 'TransactionAmount', 'AccountBalance', title='Anomalies in Transaction Amount vs Account Balance')
        except KeyError as e:
            print(f"Error: {e}")
            exit()
        print("\nDataFrame with Anomaly Labels:")
        print(df_with_anomalies)
    else:
        print("No anomalies detected or error occurred during processing.")


Error: All columns must be numeric.
No anomalies detected or error occurred during processing.
