# NUPAT AI Fellowship – Stage Two
## Case Study Assessment

This notebook contains the complete analysis, modeling, and recommendations based on the provided datasets.

## 1. Import Libraries and Load Data

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

trades = pd.read_csv("trades.csv")
activity = pd.read_csv("user_activity.csv")

trades.head(), activity.head()


## 2. Data Preparation

In [None]:

trades['timestamp'] = pd.to_datetime(trades['timestamp'])
activity['timestamp'] = pd.to_datetime(activity['timestamp'])

activity['day_of_week'] = activity['timestamp'].dt.day_name()
activity['hour'] = activity['timestamp'].dt.hour
trades['date'] = trades['timestamp'].dt.date


## Part 1: Market Dynamics

In [None]:

trades['trade_value'] = trades['price'] * trades['volume']
trades['usd_value'] = trades['trade_value'] / 1500

top_pairs = (
    trades.groupby('pair')['usd_value']
    .sum()
    .sort_values(ascending=False)
    .head(3)
)

top_pairs


## Visualization

In [None]:

plt.figure()
top_pairs.plot(kind='bar')
plt.title("Top 3 Trading Pairs by USD Volume")
plt.ylabel("USD Volume")
plt.show()


## Volatility Analysis – BTCNGN

In [None]:

btc = trades[trades['pair'] == "BTCNGN"]

daily_volatility = btc.groupby('date')['price'].agg(lambda x: x.max() - x.min())
rolling_volatility = daily_volatility.rolling(7).mean()

plt.figure()
rolling_volatility.plot()
plt.title("7-Day Rolling Average Volatility (BTCNGN)")
plt.ylabel("Volatility")
plt.show()


## User Behavior – Deposit Timing

In [None]:

deposit_data = activity[activity['activity_type'] == 'deposit']

deposit_by_day = deposit_data.groupby('day_of_week')['amount'].sum()
deposit_by_hour = deposit_data.groupby('hour')['amount'].sum()

plt.figure()
deposit_by_day.plot(kind='bar')
plt.title("Deposits by Day of Week")
plt.show()

plt.figure()
deposit_by_hour.plot(kind='bar')
plt.title("Deposits by Hour")
plt.show()


## Part 2: Fraud Detection

In [None]:

activity_features = activity.groupby('user_id').agg(
    deposits=('activity_type', lambda x: (x == 'deposit').sum()),
    withdrawals=('activity_type', lambda x: (x == 'withdrawal').sum()),
    total_deposited=('amount', lambda x: x[activity.loc[x.index, 'activity_type']=='deposit'].sum()),
    total_withdrawn=('amount', lambda x: x[activity.loc[x.index, 'activity_type']=='withdrawal'].sum()),
    first_deposit_time=('timestamp', 'min'),
    first_withdrawal_time=('timestamp', 'max')
).reset_index()

activity_features['time_to_withdraw'] = (
    activity_features['first_withdrawal_time'] - activity_features['first_deposit_time']
).dt.total_seconds() / 3600

trade_features = trades.groupby('user_id').agg(
    trade_volume=('volume', 'sum'),
    trade_count=('pair', 'count'),
    unique_pairs=('pair', 'nunique')
).reset_index()

user_features = activity_features.merge(trade_features, on='user_id', how='left')
user_features.fillna(0, inplace=True)

user_features['suspicious'] = np.where(
    (user_features['time_to_withdraw'] <= 24) &
    (user_features['trade_volume'] < 0.2 * user_features['total_deposited']) &
    (user_features['total_withdrawn'] >= 0.8 * user_features['total_deposited']),
    1, 0
)


## Model Training and Evaluation

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, recall_score

features = [
    'deposits','withdrawals','total_deposited','total_withdrawn',
    'trade_volume','trade_count','unique_pairs','time_to_withdraw'
]

X = user_features[features]
y = user_features['suspicious']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))


## Part 3: Strategic Recommendation

Target low-volume Kenyan traders based on low trade frequency, small average trade size, and preference for KES-based pairs.