In [4]:
import pandas as pd
import numpy as np

data = pd.read_csv("/content/data.csv")

data['count_date'] = pd.to_datetime(data['count_date'])

# Time-based features
data['day_of_week'] = data['count_date'].dt.dayofweek
data['month'] = data['count_date'].dt.month
data['is_weekend'] = data['day_of_week'].isin([5, 6]).astype(int)  # 1 if weekend, else 0
data['hour_bin'] = pd.cut(data['hour'], bins=[0, 6, 12, 18, 24], labels=['Night', 'Morning', 'Afternoon', 'Evening'], right=False)

# Total vehicles and total cycles
data['total_vehicles'] = data[['two_wheeled_motor_vehicles', 'cars_and_taxis', 'buses_and_coaches', 'lgvs', 'all_hgvs']].sum(axis=1)
data['total_cycles'] = data['pedal_cycles']

# Vehicle density (vehicles per km)
data['vehicle_density_km'] = data['total_vehicles'] / data['link_length_km']

# Flag for heavy goods vehicle (HGV) dominance
data['hgv_dominant'] = (data['all_hgvs'] / data['total_vehicles'] > 0.5).astype(int)

# Regions by direction of travel and calculate mean vehicles
region_group = data.groupby(['region_name', 'direction_of_travel'])['total_vehicles'].mean().reset_index(name='mean_vehicles_by_direction')

# Region stats back to the original data
data = pd.merge(data, region_group, on=['region_name', 'direction_of_travel'], how='left')

# Longitude and latitude deltas (feature for spatial analysis)
data['lat_lon_delta'] = np.sqrt((data['latitude'] - data['latitude'].mean())**2 + (data['longitude'] - data['longitude'].mean())**2)

data.to_csv("transformed_data.csv", index=False)

print(data.head())

   count_point_id direction_of_travel  year count_date  hour  region_id  \
0              52                   E  2019 2019-09-06    17          1   
1              52                   E  2019 2019-09-06    12          1   
2              52                   E  2019 2019-09-06    14          1   
3              52                   W  2019 2019-09-06    14          1   
4              52                   W  2019 2019-09-06    12          1   

  region_name  local_authority_id local_authority_name road_name  ...  \
0  South West                   1      Isles of Scilly     A3112  ...   
1  South West                   1      Isles of Scilly     A3112  ...   
2  South West                   1      Isles of Scilly     A3112  ...   
3  South West                   1      Isles of Scilly     A3112  ...   
4  South West                   1      Isles of Scilly     A3112  ...   

  day_of_week month is_weekend   hour_bin  total_vehicles  total_cycles  \
0           4     9          0  Aft