In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import os

print("\n=== Starting Tracking Data Analysis ===\n")


csv_path="data/combined_tracking_data.csv"
# Read the CSV file
df = pd.read_csv(csv_path)



=== Starting Tracking Data Analysis ===



In [5]:
df.head()

Unnamed: 0,frame_number,timestamp,tracker_id,x,y,x1,y1,x2,y2,prediction_type,sequence_index
0,-1,3.192178,1,232.092422,756.052856,211.405563,729.062744,252.779282,783.042969,actual,0
1,0,3.192182,1,232.132416,756.033752,211.468933,729.09845,252.795914,782.969055,actual,1
2,1,3.192183,1,232.23938,756.038696,211.631836,729.065491,252.846909,783.011963,actual,2
3,2,3.192185,1,232.200165,756.00293,211.573608,729.05658,252.826736,782.949219,actual,3
4,3,3.192186,1,232.156525,755.950806,211.584686,728.973389,252.728348,782.928223,actual,4


In [7]:
# Basic statistics
print("Basic Statistics:")
print(f"Total number of data points: {len(df)}")
print(f"Number of unique vehicles (tracker_ids): {df['tracker_id'].nunique()}")
print(f"Number of frames: {df['frame_number'].nunique()}")
print(f"Time span: {df['timestamp'].max() - df['timestamp'].min():.2f} seconds")

Basic Statistics:
Total number of data points: 13647040
Number of unique vehicles (tracker_ids): 1051
Number of frames: 9035
Time span: 761.50 seconds


In [6]:
# Analyze prediction types
print("\nPrediction Types Distribution:")
prediction_counts = df['prediction_type'].value_counts()
print(prediction_counts)

# Analyze bounding box statistics
print("\nBounding Box Statistics:")
bbox_stats = df[['x1', 'y1', 'x2', 'y2']].describe()
print(bbox_stats)

# Calculate vehicle sizes
df['width'] = df['x2'] - df['x1']
df['height'] = df['y2'] - df['y1']

print("\nVehicle Size Statistics:")
size_stats = df[['width', 'height']].describe()
print(size_stats)


Prediction Types Distribution:
prediction_type
actual    6823520
lstm      3411760
kalman    3411760
Name: count, dtype: int64

Bounding Box Statistics:
                 x1            y1            x2            y2
count  1.364704e+07  1.364704e+07  1.364704e+07  1.364704e+07
mean   1.281589e+03  6.166598e+02  1.328966e+03  6.626872e+02
std    1.406851e+03  2.139489e+02  1.410044e+03  2.166691e+02
min   -1.435941e+02 -8.947049e+00 -8.149813e+01  1.711926e+01
25%    6.472178e+02  4.446933e+02  7.042646e+02  4.871025e+02
50%    1.076685e+03  6.581343e+02  1.116409e+03  6.881245e+02
75%    1.604709e+03  7.375295e+02  1.647032e+03  7.852560e+02
max    5.342136e+04  3.343549e+03  5.356501e+04  3.380763e+03

Vehicle Size Statistics:
              width        height
count  1.364704e+07  1.364704e+07
mean   4.737770e+01  4.602745e+01
std    1.546831e+01  1.410569e+01
min    1.640936e+01  1.357483e+01
25%    3.526550e+01  3.132769e+01
50%    4.411121e+01  4.846133e+01
75%    5.817482e+01  5.6

In [8]:

    
# Create visualizations
output_dir = os.path.dirname(csv_path)
plots_dir = os.path.join(output_dir, 'analysis_plots')
os.makedirs(plots_dir, exist_ok=True)

# 1. Vehicle Trajectories
plt.figure(figsize=(12, 8))
for tracker_id in df['tracker_id'].unique()[:5]:  # Plot first 5 vehicles
    vehicle_data = df[df['tracker_id'] == tracker_id]
    plt.plot(vehicle_data['x'], vehicle_data['y'], label=f'Vehicle {tracker_id}')
plt.title('Vehicle Trajectories')
plt.xlabel('X Position')
plt.ylabel('Y Position')
plt.legend()
plt.savefig(os.path.join(plots_dir, 'vehicle_trajectories.png'))
plt.close()

# 2. Prediction Types Distribution
plt.figure(figsize=(8, 6))
prediction_counts.plot(kind='bar')
plt.title('Distribution of Prediction Types')
plt.xlabel('Prediction Type')
plt.ylabel('Count')
plt.savefig(os.path.join(plots_dir, 'prediction_types.png'))
plt.close()

# 3. Vehicle Size Distribution
plt.figure(figsize=(10, 6))
sns.boxplot(data=df[['width', 'height']])
plt.title('Vehicle Size Distribution')
plt.ylabel('Pixels')
plt.savefig(os.path.join(plots_dir, 'vehicle_sizes.png'))
plt.close()

# 4. Time Series of Vehicle Counts
plt.figure(figsize=(12, 6))
vehicle_counts = df.groupby('frame_number')['tracker_id'].nunique()
plt.plot(vehicle_counts.index, vehicle_counts.values)
plt.title('Number of Vehicles Over Time')
plt.xlabel('Frame Number')
plt.ylabel('Number of Vehicles')
plt.savefig(os.path.join(plots_dir, 'vehicle_counts.png'))
plt.close()

# Save summary statistics to a text file
with open(os.path.join(plots_dir, 'summary_statistics.txt'), 'w') as f:
    f.write("=== Tracking Data Analysis Summary ===\n\n")
    f.write(f"Total data points: {len(df)}\n")
    f.write(f"Unique vehicles: {df['tracker_id'].nunique()}\n")
    f.write(f"Total frames: {df['frame_number'].nunique()}\n")
    f.write(f"Time span: {df['timestamp'].max() - df['timestamp'].min():.2f} seconds\n\n")
    f.write("Prediction Types Distribution:\n")
    f.write(prediction_counts.to_string())
    f.write("\n\nBounding Box Statistics:\n")
    f.write(bbox_stats.to_string())
    f.write("\n\nVehicle Size Statistics:\n")
    f.write(size_stats.to_string())

print(f"\nAnalysis complete! Plots and statistics saved to: {plots_dir}")
print("Generated files:")
print("1. vehicle_trajectories.png - Visualization of vehicle paths")
print("2. prediction_types.png - Distribution of prediction types")
print("3. vehicle_sizes.png - Distribution of vehicle sizes")
print("4. vehicle_counts.png - Number of vehicles over time")
print("5. summary_statistics.txt - Detailed numerical statistics")



Analysis complete! Plots and statistics saved to: data/analysis_plots
Generated files:
1. vehicle_trajectories.png - Visualization of vehicle paths
2. prediction_types.png - Distribution of prediction types
3. vehicle_sizes.png - Distribution of vehicle sizes
4. vehicle_counts.png - Number of vehicles over time
5. summary_statistics.txt - Detailed numerical statistics
