# Challenge

Another approach to identifying fraudulent transactions is to look for outliers in the data. Standard deviation or quartiles are often used to detect outliers. Using this starter notebook, code two Python functions:

* One that uses standard deviation to identify anomalies for any cardholder.

* Another that uses interquartile range to identify anomalies for any cardholder.

## Identifying Outliers using Standard Deviation

In [58]:
# Initial imports
import pandas as pd
import numpy as np
import random
from sqlalchemy import create_engine
from numpy import percentile

In [53]:
# Create a connection to the database
engine = create_engine("postgresql://postgres:admin@localhost:5432/fraud_detection")

def get_transaction_by_card_holder(card_holder_number):
    """
    Search transactions by cardholder number
    Attribute:
        card_holder_number: Number of the cardholder
    Return:
        Dataframe: Transactions data for de cardholder informed
    """
    # Create query
    query = f"""
        select t.*
        from transaction t 
        join credit_card c on t.card = c.card
        where c.cardholder_id = {card_holder_number};
        """
    
    # execute query
    data_frame = pd.read_sql(query, engine)
    return data_frame

In [67]:
# Write function that locates outliers using standard deviation

def standard_deviation_outliers(card_holder_number):
    print(f'Card Holder {card_holder_number}')
    # Search transactions by card holder number
    data_frame = get_transaction_by_card_holder(card_holder_number)
    # calculate summary statistics
    amount_mean, amount_std = data_frame['amount'].mean(), transactions_df['amount'].std()
    # identify outliers
    cut_off = amount_std * 3
    lower, upper = amount_mean - cut_off, amount_mean + cut_off
    # identify outliers
    outliers = [x for x in data_frame['amount'] if x < lower or x > upper]
    print('Identified outliers: %d' % len(outliers))
    # remove outliers
    outliers_removed = [x for x in data_frame['amount'] if x >= lower and x <= upper]
    print('Non-outlier observations: %d' % len(outliers_removed))
    print('-----')

In [68]:
# Find anomalous transactions for 3 random card holders
standard_deviation_outliers(2)
standard_deviation_outliers(18)
standard_deviation_outliers(25)

Card Holder 2
Identified outliers: 0
Non-outlier observations: 99
-----
Card Holder 18
Identified outliers: 7
Non-outlier observations: 126
-----
Card Holder 25
Identified outliers: 9
Non-outlier observations: 115
-----


## Identifying Outliers Using Interquartile Range

In [69]:
# Write a function that locates outliers using interquartile range

def interquartile_range_outliers(card_holder_number):
    print(f'Card Holder {card_holder_number}')
    # Search transactions by card holder number
    data_frame = get_transaction_by_card_holder(card_holder_number)
    # calculate interquartile range
    q25, q75 = percentile(data_frame['amount'], 25), percentile(data_frame['amount'], 75)
    iqr = q75 - q25
    print('Percentiles: 25th=%.3f, 75th=%.3f, IQR=%.3f' % (q25, q75, iqr))
    # calculate the outlier cutoff
    cut_off = iqr * 1.5
    lower, upper = q25 - cut_off, q75 + cut_off
    # identify outliers
    outliers = [x for x in data_frame['amount'] if x < lower or x > upper]
    print('Identified outliers: %d' % len(outliers))
    # remove outliers
    outliers_removed = [x for x in data_frame['amount'] if x >= lower and x <= upper]
    print('Non-outlier observations: %d' % len(outliers_removed))
    print('-----')

In [70]:
# Find anomalous transactions for 3 random card holders
interquartile_range_outliers(2)
interquartile_range_outliers(18)
interquartile_range_outliers(25)

Card Holder 2
Percentiles: 25th=4.140, 75th=13.295, IQR=9.155
Identified outliers: 0
Non-outlier observations: 99
-----
Card Holder 18
Percentiles: 25th=3.460, 75th=16.160, IQR=12.700
Identified outliers: 12
Non-outlier observations: 121
-----
Card Holder 25
Percentiles: 25th=2.998, 75th=14.430, IQR=11.432
Identified outliers: 12
Non-outlier observations: 112
-----
