# Challenge

Another approach to identifying fraudulent transactions is to look for outliers in the data. Standard deviation or quartiles are often used to detect outliers. Using this starter notebook, code two Python functions:

* One that uses standard deviation to identify anomalies for any cardholder.

* Another that uses interquartile range to identify anomalies for any cardholder.

## Identifying Outliers using Standard Deviation

In [None]:
# Initial imports
import pandas as pd
import numpy as np
import random
from sqlalchemy import create_engine
import psycopg2


In [None]:
# Create a connection to the database
engine = create_engine("postgresql://postgres:postgres@localhost:5432/fraud_detection")


In [None]:
# Write function that locates outliers using standard deviation

def identify_outliers_std(data):
    """
    Identify outliers using standard deviation.
    
    Args:
    - data: A list or array of numerical data.
    
    Returns:
    - outliers: A list of outlier values.
    """
    # Calculate mean and standard deviation
    mean = np.mean(data)
    std_dev = np.std(data)
    
    # Set the threshold for identifying outliers (e.g., 3 standard deviations away from the mean)
    threshold = 3 * std_dev
    
    # Find outliers
    outliers = [value for value in data if abs(value - mean) > threshold]
    
    return outliers


In [None]:
# Find anomalous transactions for 3 random card holders
# Importing necessary libraries
import pandas as pd
from sqlalchemy import create_engine

# Create a connection to the database
engine = create_engine("postgresql://username:password@localhost:5432/data") # Replace "username:password" with own credentials access to "Data" folder required

# Define SQL query to find anomalous transactions for 3 random card holders
query = """
SELECT *
FROM transaction
WHERE card_id IN (
    SELECT DISTINCT card_id
    FROM transaction
    ORDER BY random()
    LIMIT 3
) AND amount < 2.00;
"""

# Execute the SQL query and load the results into a DataFrame
anomalous_transactions = pd.read_sql(query, engine)

# Display the DataFrame with anomalous transactions
print(anomalous_transactions)


## Identifying Outliers Using Interquartile Range

In [None]:
# Write a function that locates outliers using interquartile range
def find_outliers_iqr(data):
    # Compute the first quartile (Q1)
    Q1 = data.quantile(0.25)
    
    # Compute the third quartile (Q3)
    Q3 = data.quantile(0.75)
    
    # Calculate the interquartile range (IQR)
    IQR = Q3 - Q1
    
    # Define lower and upper bounds
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    # Identify outliers
    outliers = data[(data < lower_bound) | (data > upper_bound)]
    
    return outliers


In [None]:
# Find anomalous transactions for 3 random card holders
import pandas as pd
from sqlalchemy import create_engine

# Define the function to find outliers using IQR
def find_outliers_iqr(data):
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    outliers = data[(data < lower_bound) | (data > upper_bound)]
    return outliers

# Create a connection to the database
engine = create_engine("postgresql://username:password@localhost:5432/data") # Replace "username:password" with own credentials access to "Data" folder required

# Select 3 random card holders
query = """
SELECT DISTINCT card_id
FROM transaction
ORDER BY random()
LIMIT 3;
"""
random_card_holders = pd.read_sql(query, engine)

# Find anomalous transactions for each card holder
anomalous_transactions = pd.DataFrame()
for card_id in random_card_holders['card_id']:
    query = f"""
    SELECT amount
    FROM transaction
    WHERE card_id = {card_id};
    """
    transactions = pd.read_sql(query, engine)
    outliers = find_outliers_iqr(transactions['amount'])
    anomalous_transactions = pd.concat([anomalous_transactions, outliers])

print(anomalous_transactions)
