  # Challenge

  ## Identifying Outliers using Standard Deviation

In [1]:
# Import Dependencies
import os
import pandas as pd
import numpy as np
import random
from sqlalchemy import create_engine
from dotenv import load_dotenv
import statistics
import random
import datetime as dt

In [2]:
# Load ENV
load_dotenv()

# Save Database Password
password = os.getenv("password")

# Create Engine
engine = create_engine(f"postgresql://postgres:{password}@localhost:5432/fraud_detection")

In [3]:
# Function to Identify Outliers Based on Standard Deviation
def checkOutliersSTD(dummy):
    
    # Save Mean
    dummy_mean = dummy.describe().loc['mean'].item()
    
    # Save Standard Deviation
    dummy_std = dummy.describe().loc['mean'].item()
    
    # Save Initial Number of Outliers
    outliers = 0
    
    # For Loop to Check for Outliers
    for x in range(len(dummy)):
        
        # Assign Current Amount
        current = dummy["amount"][x]
        
        # Print Outlier and Increase Outlier Count if Current is an Upper Outlier
        if current > dummy_mean + dummy_std:
            print(f"{current} is an upper outlier")
            outliers += 1
            
        # Print Outlier and Increase Outlier Count if Current is a Lower Outlier
        elif current < dummy_mean - dummy_std:
            print(f"{current} is a lower outlier")
            outliers += 1
            
    # Print No Outliers Found if None Found
    if outliers < 1:
        print("No Outliers Found.")

In [4]:
# Function to Create DataFrames
def createDF(card_holder_num):
    
    # Initial Query
    query = f"""
    SELECT t.date, t.amount
    FROM transaction t
    JOIN credit_card c
    ON t.card = c.card
    WHERE id_card_holder = {int(card_holder_num)}
    ORDER BY t.date
    """
    
    # Read Query
    df = pd.read_sql(query, engine)
    
    # Return Created DataFrame
    return df

In [5]:
# Assign Empty List
random_3 = []

# Fill List with 3 Random Numbers 1-25
for x in range(3):
    random_3.append(random.randint(1,26))

# Create DataFrame from First Random Number
rando_1 = createDF(random_3[0])

# Create DataFrame from Second Random Number
rando_2 = createDF(random_3[1])

# Create DataFrame from Third Random Number
rando_3 = createDF(random_3[2])

In [6]:
# Check Outliers for First Random Number
checkOutliersSTD(rando_1)

1685.0000000000002 is an upper outlier
445.0 is an upper outlier
1072.0 is an upper outlier
543.0 is an upper outlier
1086.0 is an upper outlier
233.0 is an upper outlier
1449.0 is an upper outlier
2249.0 is an upper outlier
1296.0 is an upper outlier


In [7]:
# Check Outliers for Second Random Number
checkOutliersSTD(rando_2)

19.5 is an upper outlier
18.98 is an upper outlier
19.67 is an upper outlier
18.44 is an upper outlier
19.69 is an upper outlier
18.88 is an upper outlier
18.27 is an upper outlier
20.13 is an upper outlier
20.53 is an upper outlier
18.7 is an upper outlier
18.67 is an upper outlier


In [8]:
# Check Outliers for Third Random Number
checkOutliersSTD(rando_3)

1011.0 is an upper outlier
525.0 is an upper outlier
1901.0 is an upper outlier
258.0 is an upper outlier
291.0 is an upper outlier
466.0 is an upper outlier
1301.0 is an upper outlier
1035.0 is an upper outlier


  ## Identifying Outliers Using Interquartile Range

In [9]:
# Function to Identify Outliers Based on Interquartile Range
def checkOutliersIQR(dummy):
    
    # Save Median
    statistics.median(dummy["amount"])

    # Save Lower Quartile
    lower_q = dummy.describe().loc['25%']

    # Save Upper Quartile
    upper_q = dummy.describe().loc['75%']

    # Save IQR
    iqr = upper_q - lower_q
    
    # Function to Find Fences
    def findFence(num):
        
        # Find Fence Limit
        find_fence = iqr * num

        # Assign Lower Fence
        lower_fence = lower_q - find_fence

        # Assign Upper Fence
        upper_fence = upper_q + find_fence
        
        # Return Fences
        return (lower_fence,upper_fence)

    # Find Inner Fences
    inner_fence = findFence(1.5)
    
    # Find Outer Fences
    outer_fence = findFence(3)
    
    # Save Initial Number of Outliers
    outliers = 0
    
    # For Loop to Check for Outliers
    for x in range(len(dummy)):
        
        # Assign Current Amount
        current = dummy["amount"][x]
        
        # Print Outlier and Increase Outlier Count if Current is a Major Upper Outlier
        if current > outer_fence[1].item():
            print(f"{current} is a major upper outlier")
            outliers += 1
            
        # Print Outlier and Increase Outlier Count if Current is a Minor Upper Outlier
        elif current > inner_fence[1].item():
            print(f"{current} is a minor upper outlier")
            outliers += 1
            
        # Print Outlier and Increase Outlier Count if Current is a Major Lower Outlier
        elif current < outer_fence[0].item():
            print(f"{current} is a major lower outlier")
            outliers += 1
            
        # Print Outlier and Increase Outlier Count if Current is a Minor Lower Outlier
        elif current < inner_fence[0].item():
            print(f"{current} is a minor lower outlier")
            outliers += 1

    # Print No Outliers Found if None Found
    if outliers < 1:
        print("No Outliers Found.")

In [14]:
# Check Outliers for First Random Number
checkOutliersIQR(rando_1)

1685.0000000000002 is a major upper outlier
445.0 is a major upper outlier
1072.0 is a major upper outlier
543.0 is a major upper outlier
1086.0 is a major upper outlier
160.0 is a major upper outlier
233.0 is a major upper outlier
1449.0 is a major upper outlier
2249.0 is a major upper outlier
1296.0 is a major upper outlier


In [15]:
# Check Outliers for Second Random Number
checkOutliersIQR(rando_3)

1011.0 is a major upper outlier
525.0 is a major upper outlier
1901.0 is a major upper outlier
258.0 is a major upper outlier
291.0 is a major upper outlier
466.0 is a major upper outlier
1301.0 is a major upper outlier
1035.0 is a major upper outlier


In [16]:
# Check Outliers for Third Random Number
checkOutliersIQR(rando_2)

No outliers found.
