In [1]:
import pandas as pd
import numpy as np

# Simulate synthetic traffic data
data = {
    'camera_id': np.random.choice(['camera_1', 'camera_2', 'camera_3'], size=1000),
    'average_speed': np.random.normal(60, 10, size=1000),  # Average speed around 60 mph
    'datetime': pd.date_range(start='2024-01-01', periods=1000, freq='H')
}

df = pd.DataFrame(data)

# Extract useful time-based features
df['hour'] = df['datetime'].dt.hour
df['day_of_week'] = df['datetime'].dt.dayofweek
df['month'] = df['datetime'].dt.month

# Normalize the data (excluding camera_id and datetime)
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[['average_speed', 'hour', 'day_of_week', 'month']] = scaler.fit_transform(df[['average_speed', 'hour', 'day_of_week', 'month']])

print(df.head())

  'datetime': pd.date_range(start='2024-01-01', periods=1000, freq='H')


  camera_id  average_speed            datetime      hour  day_of_week  \
0  camera_2      -0.223551 2024-01-01 00:00:00 -1.655100    -1.495605   
1  camera_1       0.172198 2024-01-01 01:00:00 -1.510373    -1.495605   
2  camera_2      -1.074852 2024-01-01 02:00:00 -1.365646    -1.495605   
3  camera_3       1.060477 2024-01-01 03:00:00 -1.220919    -1.495605   
4  camera_2      -0.550459 2024-01-01 04:00:00 -1.076191    -1.495605   

      month  
0 -0.586588  
1 -0.586588  
2 -0.586588  
3 -0.586588  
4 -0.586588  


In [5]:
from pyod.models.rcforest import RCForest
from pyod.utils.data import generate_data
import matplotlib.pyplot as plt

# Extract the feature columns (we don't need camera_id or datetime for the model)
X = df[['average_speed', 'hour', 'day_of_week', 'month']].values

# Train a Random Cut Forest model
rcf_model = RCForest(n_estimators=100, max_samples=256, contamination=0.05, random_state=42)
rcf_model.fit(X)

# Get the anomaly scores for each data point
anomaly_scores = rcf_model.decision_function(X)
df['anomaly_score'] = anomaly_scores

# Identify anomalies (for visualization purposes)
df['is_anomaly'] = rcf_model.labels_  # 1 for anomalies, 0 for normal

# Plot the average speed and anomaly scores
plt.figure(figsize=(12, 6))
plt.plot(df['average_speed'], label='Average Speed')
plt.plot(df['anomaly_score'], label='Anomaly Score', color='red')
plt.title('Anomaly Detection in Traffic Data with RCF')
plt.legend()
plt.show()

ModuleNotFoundError: No module named 'pyod.models.rcforest'

In [None]:
import joblib

# Save the model
joblib.dump(rcf_model, 'rcf_model.pkl')

# Load the model
loaded_model = joblib.load('rcf_model.pkl')