# GPS Spoofing Detection - Quick Start

This notebook demonstrates the basic usage of the GPS spoofing detection pipeline with synthetic signals.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.insert(0, '..')

from src.preprocessing.signal_io import generate_synthetic_signal
from src.preprocessing.pipeline import preprocess_signal
from src.features.pipeline import extract_features_from_segment
from src.models.training import train_model, create_train_test_split
from src.models.evaluation import evaluate_model
from src.utils.plots import plot_confusion_matrix, plot_feature_distributions

## 1. Generate Synthetic GPS Signals

We'll create synthetic signals with and without spoofing to build a test dataset.

In [None]:
# Parameters
fs = 5e6  # 5 MHz sampling rate
prn = 1   # PRN satellite 1
segment_duration = 0.5  # seconds
num_samples = int(fs * segment_duration)

# Generate authentic signal
signal_authentic = generate_synthetic_signal(
    num_samples=num_samples,
    fs=fs,
    snr_db=10.0,
    prn=prn,
    add_spoofing=False
)

# Generate spoofed signal
signal_spoofed = generate_synthetic_signal(
    num_samples=num_samples,
    fs=fs,
    snr_db=15.0,  # Higher SNR for spoofing signal
    prn=prn,
    add_spoofing=True
)

print(f"Generated signals: {num_samples} samples each")
print(f"Authentic signal power: {np.mean(np.abs(signal_authentic)**2):.2f}")
print(f"Spoofed signal power: {np.mean(np.abs(signal_spoofed)**2):.2f}")

## 2. Preprocess Signals

Apply preprocessing pipeline to clean and normalize the signals.

In [None]:
# Preprocess both signals
signal_authentic_processed = preprocess_signal(signal_authentic, fs)
signal_spoofed_processed = preprocess_signal(signal_spoofed, fs)

print("Signals preprocessed successfully")
print(f"Processed authentic power: {np.mean(np.abs(signal_authentic_processed)**2):.3f}")
print(f"Processed spoofed power: {np.mean(np.abs(signal_spoofed_processed)**2):.3f}")

## 3. Extract Features

Extract correlation-based and statistical features from the signals.

In [None]:
# Extract features from authentic signal
features_authentic = extract_features_from_segment(
    signal_authentic_processed,
    fs=fs,
    prn=prn,
    include_statistical=True
)

# Extract features from spoofed signal
features_spoofed = extract_features_from_segment(
    signal_spoofed_processed,
    fs=fs,
    prn=prn,
    include_statistical=True
)

print(f"Extracted {len(features_authentic)} features")
print("\nKey feature comparison:")
print(f"  Peak-to-Secondary Ratio:")
print(f"    Authentic: {features_authentic['peak_to_secondary']:.2f}")
print(f"    Spoofed:   {features_spoofed['peak_to_secondary']:.2f}")
print(f"  C/N0 Estimate:")
print(f"    Authentic: {features_authentic['cn0_estimate']:.2f} dB-Hz")
print(f"    Spoofed:   {features_spoofed['cn0_estimate']:.2f} dB-Hz")
print(f"  Asymmetry:")
print(f"    Authentic: {features_authentic['asymmetry']:.4f}")
print(f"    Spoofed:   {features_spoofed['asymmetry']:.4f}")

## 4. Create Dataset

Generate a larger dataset with multiple segments for training.

In [None]:
# Generate dataset
n_segments = 100
features_list = []

for i in range(n_segments):
    # Half authentic, half spoofed
    is_spoofed = i >= n_segments // 2
    
    signal = generate_synthetic_signal(
        num_samples=num_samples,
        fs=fs,
        snr_db=15.0 if is_spoofed else 10.0,
        prn=prn,
        add_spoofing=is_spoofed
    )
    
    signal_processed = preprocess_signal(signal, fs)
    features = extract_features_from_segment(signal_processed, fs, prn)
    features['label'] = 1 if is_spoofed else 0
    features_list.append(features)

df = pd.DataFrame(features_list)
print(f"\nDataset created: {len(df)} segments")
print(f"  Authentic: {sum(df['label'] == 0)}")
print(f"  Spoofed:   {sum(df['label'] == 1)}")
print(f"\nFeature columns: {len(df.columns)}")

## 5. Visualize Features

Compare feature distributions between authentic and spoofed signals.

In [None]:
# Select key features to visualize
key_features = ['peak_to_secondary', 'asymmetry', 'cn0_estimate', 'fwhm']

plot_feature_distributions(df, key_features)

## 6. Train Model

Train a Random Forest classifier to detect spoofing.

In [None]:
# Prepare data
feature_cols = [col for col in df.columns if col != 'label']
X = df[feature_cols].values
y = df['label'].values

# Split data
X_train, X_test, y_train, y_test = create_train_test_split(X, y, test_size=0.3)

print(f"Training set: {len(X_train)} samples")
print(f"Test set: {len(X_test)} samples")

# Train model
model, cv_results = train_model(
    X_train, y_train,
    model_name='random_forest',
    balance_method='class_weight',
    verbose=True
)

## 7. Evaluate Model

Evaluate the trained model on the test set.

In [None]:
# Evaluate
results = evaluate_model(model, X_test, y_test, verbose=True)

# Plot confusion matrix
plot_confusion_matrix(results['confusion_matrix'])

## 8. Feature Importance

Analyze which features are most important for spoofing detection.

In [None]:
from src.models.evaluation import get_feature_importance
from src.utils.plots import plot_feature_importance

# Get feature importance
importance_df = get_feature_importance(model, feature_cols, top_n=15)

print("Top 10 Most Important Features:")
print(importance_df.head(10).to_string(index=False))

# Plot
plot_feature_importance(importance_df, top_n=15)

## Conclusion

This notebook demonstrated the complete pipeline:
1. Signal generation (synthetic)
2. Preprocessing
3. Feature extraction
4. Model training
5. Evaluation

The model successfully distinguishes between authentic and spoofed GPS signals using correlation-based features and statistical metrics.

**Next steps**:
- Try with real TEXBAT or FGI dataset
- Compare different models (SVM, MLP)
- Tune hyperparameters
- Analyze failure cases