In [None]:
from datetime import datetime, timedelta
from db import get_user_data  # Your function to query DB

user_profiles = {}

def is_outdated(user_profile):
    """Returns True if the profile is outdated (e.g., older than 1 min)."""
    return (datetime.now() - user_profile['updated_at']) > timedelta(minutes=1)

def compute_profile(df):
    """Compute the user's typical location/time profile."""
    profile = {
        "avg_lat": df['latitude'].mean(),
        "avg_lon": df['longitude'].mean(),
        "common_hours": df['hour'].mode().tolist(),
        "common_days": df['day'].mode().tolist()
    }
    return profile

def get_user_profile_cached(user_id):
    """Return the cached profile or recompute if missing/outdated."""
    if user_id not in user_profiles or is_outdated(user_profiles[user_id]):
        df = get_user_data(user_id)
        if df.empty:
            return None
        profile = compute_profile(df)
        user_profiles[user_id] = {
            "profile": profile,
            "updated_at": datetime.now()
        }
    return user_profiles[user_id]['profile']


from user_behavior import get_user_profile_cached
from geopy.distance import geodesic

def is_anomalous(lat, lon, timestamp, user_id):
    profile = get_user_profile_cached(user_id)
    if not profile:
        return False  # No data to compare against
    
    hour = timestamp.hour
    day = timestamp.weekday()
    
    # Time anomaly
    if hour not in profile['common_hours']:
        print("Unusual hour")

    # Location anomaly
    user_loc = (profile['avg_lat'], profile['avg_lon'])
    current_loc = (lat, lon)
    distance = geodesic(user_loc, current_loc).km
    if distance > 1.0:
        print("Unusual location")

    return (hour not in profile['common_hours']) or (distance > 1.0)

# Main scoring
risky_score, _, risky_type = predict_risk(lat, lon, method="dbscan")
is_behavior_anomaly = is_anomalous(lat, lon, timestamp, user_id)

# Simple fusion logic
final_score = risky_score
if is_behavior_anomaly:
    final_score += 0.3  # Boost if behavior looks weird


In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import OPTICS
from sklearn.preprocessing import StandardScaler
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Configuration
current_date = datetime(2025, 6, 28, 16, 13)  # 4:13 PM CET, Saturday, June 28, 2025
distance_threshold = 0.05  # ~5.5 km for unusual location
prob_threshold = 0.04  # 5% probability for unusual time
late_night_hours = list(range(22, 24)) + list(range(0, 5))  # 10 PM - 5 AM
min_data_points = 10  # Minimum data points for reliable user profile
db_path = 'user_data.db'

# Generate synthetic user data (fallback)
def generate_synthetic_data(user_id, n_points=50):
    # Generate synthetic data for a single user
    if user_id == 'user1':
        lat = np.random.normal(36.8065, 0.01, n_points // 2)  # Tunis (home)
        lon = np.random.normal(10.1815, 0.01, n_points // 2)
        lat = np.concatenate([lat, np.random.normal(36.8000, 0.01, n_points - n_points // 2)])  # Nearby (work)
        lon = np.concatenate([lon, np.random.normal(10.1700, 0.01, n_points - n_points // 2)])
    elif user_id == 'user2':
        lat = np.random.normal(33.8815, 0.01, n_points)  # Gabes
        lon = np.random.normal(10.0982, 0.01, n_points)
    else:
        lat = np.random.normal(35.6754, 0.01, n_points)  # Kairouan (default for new users)
        lon = np.random.normal(10.1033, 0.01, n_points)
    
    synthetic_data = pd.DataFrame({
        'user_id': [user_id] * n_points,
        'latitude': lat,
        'longitude': lon,
        'timestamp': pd.date_range(start='2025-05-01', periods=n_points, freq='h'),
    })
    synthetic_data['hour'] = synthetic_data['timestamp'].dt.hour
    synthetic_data['weekday'] = synthetic_data['timestamp'].dt.dayofweek
    synthetic_data['month'] = synthetic_data['timestamp'].dt.month
    return synthetic_data

# Database setup and data retrieval
def setup_database(data=None):
    conn = sqlite3.connect(db_path)
    if data is not None:
        data.to_sql('user_locations', conn, if_exists='replace', index=False)
    return conn

def load_user_data(user_id, conn):
    query = f"SELECT * FROM user_locations WHERE user_id = '{user_id}'"
    try:
        user_df = pd.read_sql(query, conn)
        return user_df
    except:
        return pd.DataFrame()

# Build user profile
def build_user_profile(user_id, conn, fallback_data=None):
    user_df = load_user_data(user_id, conn)
    
    # Fallback to synthetic data if insufficient
    if user_df.empty or len(user_df) < min_data_points:
        print(f"Insufficient data for {user_id} (found {len(user_df)} points). Using synthetic data.")
        if fallback_data is None:
            #user_df = generate_synthetic_data().query(f"user_id == '{user_id}'")
            user_df = generate_synthetic_data(user_id)
            if user_df.empty:
                user_df = generate_synthetic_data(n_points=50)  # Ensure some data
                user_df['user_id'] = user_id
        else:
            user_df = fallback_data.query(f"user_id == '{user_id}'")
        # Update database with synthetic data
        user_df.to_sql('user_locations', conn, if_exists='append', index=False)
    
    # Ensure timestamp is datetime
    user_df['timestamp'] = pd.to_datetime(user_df['timestamp'], errors='coerce')
    user_df['hour'] = user_df['timestamp'].dt.hour
    user_df['weekday'] = user_df['timestamp'].dt.dayofweek
    user_df['month'] = user_df['timestamp'].dt.month
    
    # Cluster user locations
    user_locations = user_df[['latitude', 'longitude']].values
    if len(user_locations) < 5:  # Minimum for clustering
        print(f"Not enough location points for {user_id} to cluster.")
        return None, None, None, None
    
    user_scaler = StandardScaler()
    user_locations_scaled = user_scaler.fit_transform(user_locations)
    user_optics = OPTICS(min_samples=5, xi=0.1, metric='euclidean')
    user_clusters = user_optics.fit_predict(user_locations_scaled)
    user_df['user_cluster'] = user_clusters
    
    # Frequent zones (centroids of non-noise clusters)
    frequent_zones = user_df[user_df['user_cluster'] != -1].groupby('user_cluster')[['latitude', 'longitude']].mean()
    
    # Usual activity times
    usual_hours = user_df['hour'].value_counts(normalize=True)
    usual_weekdays = user_df['weekday'].value_counts(normalize=True)
    usual_months = user_df['month'].value_counts(normalize=True)
    
    return frequent_zones, usual_hours, usual_weekdays, usual_months



# Detect anomalies
def detect_user_anomalies(lat, lon, hour, weekday, month, user_id, conn):
    frequent_zones, usual_hours, usual_weekdays, usual_months = build_user_profile(user_id, conn)
    
    if frequent_zones is None:
        print(f"No profile for {user_id}. Assuming no anomalies.")
        return 0.0, 0.0
    # Load the user data to compute historical distances
    user_df = load_user_data(user_id, conn)
    if not user_df.empty:
        historical_distances = np.sqrt(((user_df['latitude'] - lat) ** 2 + (user_df['longitude'] - lon) ** 2))
        distance_threshold = np.percentile(historical_distances, 95) if len(historical_distances) > 0 else 0.05
    else:
        distance_threshold = 0.05  # Default if no data
    
    # Unusual location
    location_anomaly = 0.0
    min_distance = np.inf
    if not frequent_zones.empty:
        for _, zone in frequent_zones.iterrows():
            distance = np.sqrt((lat - zone['latitude'])**2 + (lon - zone['longitude'])**2)
            min_distance = min(min_distance, distance)
        if min_distance > distance_threshold:
            location_anomaly = 1.0
    print(f"Location anomaly score: {location_anomaly:.2f} (min distance: {min_distance:.2f})")

    # Unusual time
    time_anomaly = 0.0
    hour_prob = usual_hours.get(hour, 0.01)
    weekday_prob = usual_weekdays.get(weekday, 0.01)
    month_prob = usual_months.get(month, 0.01)
    if hour_prob < prob_threshold:
        time_anomaly += 0.5
    if weekday_prob < prob_threshold:
        time_anomaly += 0.3
    if month_prob < prob_threshold:
        time_anomaly += 0.15
    if hour in late_night_hours:
        time_anomaly += 0.5
    time_anomaly = min(time_anomaly, 1.0)
    print(f"Time anomaly score: {time_anomaly:.2f} (hour prob: {hour_prob:.2f}, weekday prob: {weekday_prob:.2f}, month prob: {month_prob:.2f})")

    return location_anomaly, time_anomaly


# New function to update database and profile
def update_user_profile(user_id, conn, new_data):
    # Append new data to the database
    new_data.to_sql('user_locations', conn, if_exists='append', index=False)
    print(f"Updated database with new data for {user_id} at {new_data['timestamp'].iloc[0]}")
    
    # Optional: Rebuild profile immediately (commented out for performance)
    frequent_zones, usual_hours, usual_weekdays, usual_months, user_scaler = build_user_profile(user_id, conn)

# Capture and store new data
def capture_and_store(user_id, latitude, longitude, conn):
    now = datetime.now()
    new_data = pd.DataFrame([{
        'user_id': user_id,
        'latitude': latitude,
        'longitude': longitude,
        'timestamp': now,
        'hour': now.hour,
        'weekday': now.weekday(),
        'month': now.month
    }])
    update_user_profile(user_id, conn, new_data)

# API endpoint (assuming FastAPI)
from fastapi import FastAPI

app = FastAPI()

@app.post("/capture")
def capture(user_id: str, lat: float, lon: float):
    conn = sqlite3.connect('user_data.db')
    try:
        capture_and_store(user_id, lat, lon, conn)
        return {"status": "success"}
    except Exception as e:
        return {"status": "error", "message": str(e)}
    finally:
        conn.close()

# Example periodic profile update (using a simple loop for demo)
import time

def periodic_profile_update():
    conn = sqlite3.connect('user_data.db')
    while True:
        for user_id in ['user1', 'user2', 'user3']:  # Add dynamic user list if needed
            build_user_profile(user_id, conn)
        print(f"Profiles updated at {datetime.now()}")
        time.sleep(3600)  # Update every hour (3600 seconds)


def process_new_user_data(user_id, new_data, conn):
    # Step 1: Store the new data
    new_data.to_sql('user_locations', conn, if_exists='append', index=False)
    print(f"✅ New data stored for user: {user_id}")

    # Step 2: Rebuild profile (frequent zones + usual hours)
    profile = build_user_profile(user_id, conn)
    if profile is None:
        print("⚠️ Could not build profile.")
        return None
    
    frequent_zones, usual_hours, usual_weekdays, usual_months = profile

    # Step 3: Detect anomalies for the new data points
    results = []
    for _, row in new_data.iterrows():
        lat = row['latitude']
        lon = row['longitude']
        timestamp = pd.to_datetime(row['timestamp'])
        hour = timestamp.hour
        weekday = timestamp.dayofweek
        month = timestamp.month

        loc_anomaly, time_anomaly = detect_user_anomalies(
            lat, lon, hour, weekday, month, user_id, conn
        )

        results.append({
            'user_id': user_id,
            'timestamp': timestamp,
            'location_anomaly': loc_anomaly,
            'time_anomaly': time_anomaly,
            'total_risk_score': round((loc_anomaly + time_anomaly) / 2, 2)
        })

    return results



# Main execution
if __name__ == "__main__":
    # Initialize database
    conn = setup_database() # SQLite connection
    
    # Optional: Initialize with synthetic data if database is empty
    try:
        test_df = pd.read_sql("SELECT * FROM user_locations LIMIT 1", conn)
        if test_df.empty:
            #synthetic_data = generate_synthetic_data()
            synthetic_data = pd.concat([generate_synthetic_data('user1'), generate_synthetic_data('user2')])
            synthetic_data.to_sql('user_locations', conn, if_exists='append', index=False)
    except:
        synthetic_data = generate_synthetic_data()
        synthetic_data.to_sql('user_locations', conn, if_exists='append', index=False)
    
    # Test anomaly detection
    test_points = [
        ('user1', 36.8065, 10.1815, 16, 5, 6),  # User1, Tunis (home), 4 PM, Saturday, June
        ('user1', 33.8815, 10.0982, 23, 5, 6),  # User1, Gabes (unusual), 11 PM (unusual), Saturday, June
        ('user2', 33.8815, 10.0982, 16, 5, 6),  # User2, Gabes (home), 4 PM, Saturday, June
        ('user2', 36.8065, 10.1815, 2, 5, 2),   # User2, Tunis (unusual), 2 AM (unusual), February
        ('user3', 35.6754, 10.1033, 3, 4, 2)    # User3, Kairouan (no data), 3 AM, Friday, February
    ]
    
    for user_id, lat, lon, hour, weekday, month in test_points:
        loc_anomaly, time_anomaly = detect_user_anomalies(lat, lon, hour, weekday, month, user_id, conn)
        print(f"Anomaly for {user_id} at ({lat}, {lon}, {hour}:00, Weekday: {weekday}, Month: {month}): "
            f"Location Anomaly: {loc_anomaly:.2f}, Time Anomaly: {time_anomaly:.2f}")
    # Visualize user locations
    user_data = pd.read_sql("SELECT * FROM user_locations", conn)
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=user_data, x='longitude', y='latitude', hue='user_id', size=1, alpha=0.5)
    plt.title('User Locations from Database')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.legend()
    plt.show()

    # Start periodic update in background (for demo; use a proper scheduler in production)
    import threading
    update_thread = threading.Thread(target=periodic_profile_update, daemon=True)
    update_thread.start()

    # Run the FastAPI app (for testing; use uvicorn in production)
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)



    # Example new user capture (e.g. from frontend)
    from datetime import datetime
    new_data = pd.DataFrame([{
        'user_id': 'user1',
        'latitude': 36.8211,
        'longitude': 10.2044,
        'timestamp': datetime.now()
    }])
    alerts = process_new_user_data('user1', new_data, conn)

    for alert in alerts:
        print(alert)

    
    # Close database connection
    conn.close()

ModuleNotFoundError: No module named 'fastapi'

In [None]:
def predict_risk_with_anomalies(lat, lon, hour, weekday, month, user_id, conn, model, scaler, cluster_weights, cluster_fatalities, cluster_event_types, cluster_temporal_density, X_scaled, low_risk_clusters, beta=2.0):
    loc_anomaly, time_anomaly = detect_user_anomalies(lat, lon, hour, weekday, month, user_id, conn)
    X_new = np.array([[lat, lon]], dtype=np.float32)
    X_new_scaled = scaler.transform(X_new)
    core_indices = np.where(model.core_distances_ != np.inf)[0]
    if len(core_indices) == 0:
        return 0.0, 0.0, 'None', loc_anomaly, time_anomaly
    core_points = X_scaled[core_indices]
    labels = model.labels_[core_indices]
    distances = np.sqrt(((X_new_scaled - core_points) ** 2).sum(axis=1))
    nearest_idx = np.argmin(distances)
    normalized_distance = distances[nearest_idx] / np.sqrt(((X_scaled - X_scaled.mean(axis=0)) ** 2).sum(axis=1)).max()
    nearest_cluster = labels[nearest_idx]
    event_type = cluster_event_types.get(nearest_cluster, 'Unknown')
    eps = np.percentile(model.core_distances_[core_indices], 95) if len(core_indices) > 0 else 0.05
    if distances[nearest_idx] <= eps:
        spatial_score = cluster_weights.get(nearest_cluster, 0) / (cluster_weights.max() if not cluster_weights.empty else 1)
        spatial_score *= (1 + 0.2 * cluster_fatalities.get(nearest_cluster, 0) + 0.3 * cluster_temporal_density.get(nearest_cluster, 0))
        spatial_score /= (cluster_weights / cluster_weights.max() * (1 + 0.2 * cluster_fatalities + 0.3 * cluster_temporal_density)).max()
    else:
        base_score = cluster_weights.get(nearest_cluster, 0) / (cluster_weights.max() if not cluster_weights.empty else 1)
        base_score *= (1 + 0.2 * cluster_fatalities.get(nearest_cluster, 0) + 0.3 * cluster_temporal_density.get(nearest_cluster, 0))
        spatial_score = max(base_score * np.exp(-beta * (distances[nearest_idx] - eps)), 0.2 * base_score)
        spatial_score /= (cluster_weights / cluster_weights.max() * (1 + 0.2 * cluster_fatalities + 0.3 * cluster_temporal_density)).max()
    if nearest_cluster in low_risk_clusters:
        loc_anomaly = 0.0
    combined_score = spatial_score * (1 + 0.5 * loc_anomaly + 0.5 * time_anomaly)
    combined_score = min(1.0, combined_score)
    return combined_score, normalized_distance, event_type, loc_anomaly, time_anomaly

In [None]:
from pymongo import MongoClient
from datetime import datetime
import pandas as pd
from fastapi import FastAPI
from apscheduler.schedulers.background import BackgroundScheduler

app = FastAPI()
client = MongoClient('mongodb://localhost:27017/')
db = client['hydatis']
collection = db['user_locations']
collection.create_index([("latitude", "2dsphere"), ("longitude", "2dsphere")])

def detect_user_anomalies(lat, lon, hour, weekday, month, user_id, collection):
    # Placeholder; implement full logic
    return 0.0, 0.0

def build_user_profile(user_id, collection):
    # Placeholder; implement full logic
    return None, None, None, None, None

def capture_and_store_mongodb(user_id, latitude, longitude, collection):
    now = datetime.now()
    new_data = pd.DataFrame([{
        'user_id': user_id,
        'latitude': latitude,
        'longitude': longitude,
        'timestamp': now,
        'hour': now.hour,
        'weekday': now.weekday(),
        'month': now.month
    }])
    collection.insert_many(new_data.to_dict('records'))
    print(f"Captured data for {user_id} at {now}")

def trigger_alert(user_id, lat, lon, loc_anomaly, time_anomaly):
    print(f"ALERT for {user_id} at ({lat}, {lon}): Location Anomaly {loc_anomaly:.2f}, Time Anomaly {time_anomaly:.2f}")
    # Integrate with emergency services API here

def sync_offline_data(user_id, collection, local_data):
    if not local_data.empty:
        collection.insert_many(local_data.to_dict('records'))
        print(f"Synced offline data for {user_id}")

@app.post("/capture")
def capture(user_id: str, lat: float, lon: float, emergency: bool = False):
    try:
        capture_and_store_mongodb(user_id, lat, lon, collection)
        loc_anomaly, time_anomaly = detect_user_anomalies(lat, lon, datetime.now().hour, datetime.now().weekday(), datetime.now().month, user_id, collection)
        if (loc_anomaly > 0.5 or time_anomaly > 0.5) or emergency:
            trigger_alert(user_id, lat, lon, loc_anomaly, time_anomaly)
        return {"status": "success"}
    except Exception as e:
        return {"status": "error", "message": str(e)}

@app.post("/sync")
def sync(user_id: str, local_data: list):  # Expect JSON array from client
    try:
        df = pd.DataFrame(local_data)
        sync_offline_data(user_id, collection, df)
        return {"status": "success"}
    except Exception as e:
        return {"status": "error", "message": str(e)}

def periodic_update():
    for user_id in collection.distinct('user_id'):
        build_user_profile(user_id, collection)
        threshold = datetime.now() - pd.Timedelta(days=30)
        collection.delete_many({'timestamp': {'$lt': threshold}, 'user_id': user_id})
    print(f"Profiles updated at {datetime.now()}")

scheduler = BackgroundScheduler()
scheduler.add_job(periodic_update, 'interval', minutes=30)
scheduler.start()

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

In [None]:
from pymongo import MongoClient
from datetime import datetime
import pandas as pd
import numpy as np
from sklearn.cluster import OPTICS
from sklearn.preprocessing import StandardScaler
from fastapi import FastAPI
from apscheduler.schedulers.background import BackgroundScheduler

app = FastAPI()
client = MongoClient('mongodb://localhost:27017/')
db = client['hydatis']
collection = db['user_locations']
collection.create_index([("latitude", "2dsphere"), ("longitude", "2dsphere")])

# Constants
distance_threshold = 0.05
prob_threshold = 0.05
late_night_hours = list(range(22, 24)) + list(range(0, 5))

# Build profile per user
def build_user_profile(user_id, collection):
    df = pd.DataFrame(list(collection.find({'user_id': user_id})))
    if df.empty or len(df) < 10:
        return None, None, None, None, None

    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['hour'] = df['timestamp'].dt.hour
    df['weekday'] = df['timestamp'].dt.dayofweek
    df['month'] = df['timestamp'].dt.month

    # Cluster locations
    locations = df[['latitude', 'longitude']].values
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(locations)
    optics = OPTICS(min_samples=5, xi=0.1)
    labels = optics.fit_predict(X_scaled)
    df['cluster'] = labels

    centroids = df[df['cluster'] != -1].groupby('cluster')[['latitude', 'longitude']].mean()
    hour_freq = df['hour'].value_counts(normalize=True)
    weekday_freq = df['weekday'].value_counts(normalize=True)
    month_freq = df['month'].value_counts(normalize=True)

    return centroids, hour_freq, weekday_freq, month_freq, scaler

# Detect anomalies

def detect_user_anomalies(lat, lon, hour, weekday, month, user_id, collection):
    profile = build_user_profile(user_id, collection)
    if profile[0] is None:
        return 0.0, 0.0

    centroids, hour_freq, weekday_freq, month_freq, scaler = profile

    # Location anomaly
    loc_anomaly = 0.0
    point = np.array([[lat, lon]])
    for _, zone in centroids.iterrows():
        dist = np.sqrt((lat - zone['latitude'])**2 + (lon - zone['longitude'])**2)
        if dist < distance_threshold:
            break
    else:
        loc_anomaly = 1.0

    # Time anomaly
    time_anomaly = 0.0
    hour_prob = hour_freq.get(hour, 0.01)
    weekday_prob = weekday_freq.get(weekday, 0.01)
    month_prob = month_freq.get(month, 0.01)

    if hour_prob < prob_threshold:
        time_anomaly += 0.5
    if weekday_prob < prob_threshold:
        time_anomaly += 0.3
    if month_prob < prob_threshold:
        time_anomaly += 0.2
    if hour in late_night_hours:
        time_anomaly += 0.5

    time_anomaly = min(time_anomaly, 1.0)
    return loc_anomaly, time_anomaly

# Store new point
def capture_and_store(user_id, latitude, longitude):
    now = datetime.now()
    data = {
        'user_id': user_id,
        'latitude': latitude,
        'longitude': longitude,
        'timestamp': now,
        'hour': now.hour,
        'weekday': now.weekday(),
        'month': now.month
    }
    collection.insert_one(data)

# Trigger

def trigger_alert(user_id, lat, lon, loc_a, time_a):
    print(f"\nALERT: {user_id} at ({lat}, {lon})\nLocation anomaly: {loc_a:.2f}, Time anomaly: {time_a:.2f}\n")

# FastAPI endpoint
@app.post("/capture")
def capture(user_id: str, lat: float, lon: float, emergency: bool = False):
    capture_and_store(user_id, lat, lon)
    loc_a, time_a = detect_user_anomalies(lat, lon, datetime.now().hour, datetime.now().weekday(), datetime.now().month, user_id, collection)
    if (loc_a > 0.5 or time_a > 0.5) or emergency:
        trigger_alert(user_id, lat, lon, loc_a, time_a)
    return {"status": "success", "loc_anomaly": loc_a, "time_anomaly": time_a}

@app.post("/sync")
def sync(user_id: str, local_data: list):
    df = pd.DataFrame(local_data)
    df['user_id'] = user_id
    collection.insert_many(df.to_dict('records'))
    return {"status": "synced"}

# Background profile update

def periodic_update():
    for uid in collection.distinct("user_id"):
        build_user_profile(uid, collection)
        collection.delete_many({"timestamp": {"$lt": datetime.now() - pd.Timedelta(days=30)}, "user_id": uid})
    print("[INFO] Profiles updated.")

scheduler = BackgroundScheduler()
scheduler.add_job(periodic_update, 'interval', minutes=30)
scheduler.start()

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)