In [23]:
import pandas as pd
import numpy as np

# Sample distance data (in km) from Ogun State
city_distances = {
    "Lagos": 50,
    "Abuja": 760,
    "Kano": 985,
    "Port Harcourt": 580,
    "Enugu": 450,
    "Jigawa": 1020,
    "Kaduna": 850,
    "Ogun": 0  # Base reference (Sign-up city)
}

# Sample transaction data
transactions = pd.DataFrame({
    'account_type': [1, 0, 0, 1, 1],
    'device_type': [0, 1, 1, 1, 1],
    'amount': [100, 5000, 250, 10000, 750],
    'customer_history': [50, 3, 27, 1, 12],
    'time_of_day': [14, 3, 16, 2, 13],
    'transaction_frequency': [0.2, 0.4, 0.3, 0.2, 0.8],
    'sign_up_city': ["Lagos", "Jigawa", "Kaduna", "Lagos", "Abuja"],
    'transaction_city': ["Lagos", "Kano", "Kaduna", "Port Harcourt", "Jigawa"],
})

# Risk weights
weights = {
    'account_type': 0.3,
    'device_type': 0.5,
    'customer_history': 0.3,
    'time_of_day': 0.2,
    'transaction_frequency': 0.3
}

# Preferred time ranges
preferred_times = [(8, 11), (13, 14), (18, 22)]

def in_preferred_time(transaction_time):
    return any(start <= transaction_time <= end for start, end in preferred_times)

def calculate_time_risk(row):
    return 0 if in_preferred_time(row['time_of_day']) else 0.2

# Location, traverse the amount risk using account type

# Normalize function
def normalize(series):
    return (series - series.min()) / (series.max() - series.min())

# Normalize the amount column
transactions['normalized_amount'] = normalize(transactions['amount'])

# Apply different weights based on customer type
def calculate_amount_risk(row):
    if row['account_type'] == 0:  # Business
        return row['normalized_amount'] * 0.1  # Low weight
    else:
        return row['normalized_amount'] * 0.4  # Higher weight

# Compute the risk and store it in a new column
transactions['amount_risk'] = transactions.apply(calculate_amount_risk, axis=1)

# Function to compute location risk
def calculate_location_risk(row, weight=0.4):  # Adjust weight as needed
    sign_up_distance = city_distances.get(row['sign_up_city'], 0)
    transaction_distance = city_distances.get(row['transaction_city'], 0)

    # Absolute difference in distance
    distance_difference = abs(sign_up_distance - transaction_distance)

    # Normalize using max possible distance (assuming max is 1020 for Jigawa)
    normalized_risk = distance_difference / max(city_distances.values())

    return normalized_risk * weight  # Scale by weight

# Apply function to each row
transactions['location_risk'] = transactions.apply(calculate_location_risk, axis=1)

# Apply time risk calculation
transactions['time_risk'] = transactions.apply(calculate_time_risk, axis=1)

# Normalize features
def normalize(series):
    return (series - series.min()) / (series.max() - series.min())

transactions['history_risk'] = (1 - normalize(transactions['customer_history'])) * weights['customer_history']
transactions['account_type_risk'] = normalize(transactions['account_type']) * weights['account_type']
transactions['device_type_risk'] = normalize(transactions['device_type']) * weights['device_type']
transactions['transaction_frequency_risk'] = normalize(transactions['transaction_frequency']) * weights['transaction_frequency']

transactions['total_risk'] = (
    transactions['amount_risk'] + 
    transactions['history_risk'] + 
    transactions['time_risk'] + 
    transactions['location_risk'] +
    transactions['account_type_risk'] +
    transactions['device_type_risk'] +
    transactions['transaction_frequency_risk']
)

print(transactions[['amount', 'total_risk']])


   amount  total_risk
0     100    0.300000
1    5000    1.150976
2     250    0.892331
3   10000    1.907843
4     750    1.460876
