In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns
from geopy.distance import geodesic

df = pd.read_csv("stalker_detection.csv")


def preprocess_data(df):
    df.dropna(inplace=True)
    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")  # Fix timestamp issues
    df.dropna(subset=["timestamp"], inplace=True)
    df.sort_values(by=["user_id", "timestamp"], inplace=True)
    return df

df = preprocess_data(df)

df["time_diff"] = df.groupby("user_id")["timestamp"].diff().dt.total_seconds().fillna(0)
df["distance_diff"] = df.groupby("user_id").apply(
    lambda group: [0] + [geodesic((group.latitude.iloc[i - 1], group.longitude.iloc[i - 1]),
                                  (group.latitude.iloc[i], group.longitude.iloc[i])).meters
                          for i in range(1, len(group))]
).explode().values

scaler = StandardScaler()
df[["latitude", "longitude", "time_diff", "distance_diff"]] = scaler.fit_transform(
    df[["latitude", "longitude", "time_diff", "distance_diff"]]
)

X = df[["latitude", "longitude", "time_diff", "distance_diff"]]

dbscan = DBSCAN(eps=0.5, min_samples=10).fit(X)
df["dbscan_cluster"] = dbscan.labels_

lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
df["lof_score"] = lof.fit_predict(X)


df["stalker_label"] = np.where((df["dbscan_cluster"] == -1) | (df["lof_score"] == -1), 1, 0)

X_resampled, y_resampled = SMOTE().fit_resample(X, df["stalker_label"])

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_resampled, y_resampled)

def create_sequences(data, seq_length=10):
    sequences, labels = [], []
    for i in range(len(data) - seq_length):
        sequences.append(data[i: i + seq_length])
        labels.append(data[i + seq_length])
    return np.array(sequences), np.array(labels)

seq_length = 10
X_seq, y_seq = create_sequences(X.values, seq_length)

lstm_model = Sequential([
    LSTM(50, activation="relu", return_sequences=True, input_shape=(seq_length, X.shape[1])),
    LSTM(50, activation="relu"),
    Dense(X.shape[1])
])

lstm_model.compile(optimizer="adam", loss="mse")

lstm_model.fit(X_seq, y_seq, epochs=10, batch_size=32, verbose=1, validation_split=0.2)


y_pred = lstm_model.predict(X_seq)
mse = np.mean(np.square(y_seq - y_pred))
print(f"Model MSE: {mse}")

y_proba = rf_model.predict_proba(X)[:, 1]
threshold = 0.7
df["final_prediction"] = (y_proba >= threshold).astype(int)

precision = precision_score(df["stalker_label"], df["final_prediction"])
recall = recall_score(df["stalker_label"], df["final_prediction"])
f1 = f1_score(df["stalker_label"], df["final_prediction"])
conf_matrix = confusion_matrix(df["stalker_label"], df["final_prediction"])
roc_auc = roc_auc_score(df["stalker_label"], y_proba)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Confusion Matrix:\n {conf_matrix}")
print(f"ROC AUC Score: {roc_auc}")

def check_stalker(lat, lon, time_diff, distance_diff):
    user_data = scaler.transform([[lat, lon, time_diff, distance_diff]])
    prediction = rf_model.predict(user_data)[0]
    if prediction == 1:
        print("⚠️ Warning: This location shows unusual movement patterns!")
    else:
        print("✅ No suspicious activity detected.")

# Accepting user input
user_lat = float(input("Enter latitude: "))
user_lon = float(input("Enter longitude: "))
user_time_diff = float(input("Enter time interval in seconds: "))
user_distance_diff = float(input("Enter distance covered in meters: "))

check_stalker(user_lat, user_lon, user_time_diff, user_distance_diff)


  df["distance_diff"] = df.groupby("user_id").apply(


Epoch 1/10


  super().__init__(**kwargs)


[1m856/856[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 13ms/step - loss: 1.0039 - val_loss: 0.9736
Epoch 2/10
[1m856/856[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 13ms/step - loss: 0.9629 - val_loss: 0.9678
Epoch 3/10
[1m856/856[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 0.9601 - val_loss: 0.9607
Epoch 4/10
[1m856/856[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 14ms/step - loss: 0.9534 - val_loss: 0.9545
Epoch 5/10
[1m856/856[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 14ms/step - loss: 0.9432 - val_loss: 0.9500
Epoch 6/10
[1m856/856[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 13ms/step - loss: 0.9417 - val_loss: 0.9502
Epoch 7/10
[1m856/856[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 13ms/step - loss: 0.9364 - val_loss: 0.9495
Epoch 8/10
[1m856/856[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 13ms/step - loss: 0.9366 - val_loss: 0.9484
Epoch 9/10
[1m856/856[0m [32m━━━

