# SCATS Data Preprocessing
This notebook handles the loading, cleaning, reshaping, and visualizing of SCATS traffic data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load SCATS data (adjust the path if needed)
df = pd.read_csv("Scats Data October 2006.csv")
df.head()

In [None]:
# Melt the time columns
time_cols = [f'V{str(i).zfill(2)}' for i in range(1, 97)]
df_melted = df.melt(
    id_vars=['SCATS Number', 'Location', 'Date'],
    value_vars=time_cols,
    var_name='Interval',
    value_name='Traffic_flow'
)
df_melted.head()

In [None]:
# Map interval to 15-min offsets
interval_time_map = {
    f'V{str(i).zfill(2)}': pd.Timedelta(minutes=15 * (i - 1))
    for i in range(1, 97)
}

df_melted['Date'] = pd.to_datetime(df_melted['Date'])
df_melted['TimeOffset'] = df_melted['Interval'].map(interval_time_map)
df_melted['Datetime'] = df_melted['Date'] + df_melted['TimeOffset']
df_melted.drop(columns=['TimeOffset', 'Date', 'Interval'], inplace=True)
df_melted = df_melted.sort_values(by=['SCATS Number', 'Datetime']).reset_index(drop=True)
df_melted.head()

In [None]:
df_melted['Hour'] = df_melted['Datetime'].dt.hour
df_melted['Day'] = df_melted['Datetime'].dt.day

In [None]:
df_melted.to_csv("Cleaned_dataset.csv", index=False)
print("✅ Saved Cleaned_dataset.csv")

In [None]:
# Sample site visualization
sample_site = df_melted['SCATS Number'].unique()[0]
site_df = df_melted[df_melted['SCATS Number'] == sample_site]

plt.figure(figsize=(12, 4))
plt.plot(site_df['Datetime'], site_df['Traffic_flow'])
plt.title(f"Traffic Flow Over Time for SCATS Site {sample_site}")
plt.xlabel("Datetime")
plt.ylabel("Traffic Flow")
plt.grid(True)
plt.tight_layout()
plt.show()