In [8]:
# =======================================
# MERGING MEMBER 1 + MEMBER 2 DATASETS
# =======================================

import pandas as pd
from pathlib import Path

# Folder where your cleaned datasets are stored
base_path = Path(r"../cleaned dataset")

# Load cleaned datasets
traffic = pd.read_csv(base_path / "traffic_daily_cleaned_member1_v2.csv")
weather = pd.read_csv(base_path / "weather_daily_cleaned_member2.csv")

# Convert date column to datetime (VERY IMPORTANT)
traffic["date"] = pd.to_datetime(traffic["date"], errors="coerce")
weather["date"] = pd.to_datetime(weather["date"], errors="coerce")

# Sort just for neatness
traffic = traffic.sort_values("date")
weather = weather.sort_values("date")

# =======================
# MERGE ON DATE COLUMN
# =======================
merged = traffic.merge(weather, on="date", how="inner")

print("Merged dataset shape:", merged.shape)
print("\nPreview of merged dataset:")
display(merged.head(10))

# Save merged result
out_path = base_path / "merged_traffic_weather_main_dataset.csv"
merged.to_csv(out_path, index=False)

print("\nSaved merged dataset to:", out_path)


Merged dataset shape: (177, 13)

Preview of merged dataset:


Unnamed: 0,date,daily_flow_total,daily_flow_mean,daily_cong_mean,daily_dsat_mean,num_records,rain_mm,tmax_c,tmin_c,tmean,wind_speed_knots,sunshine_hours,soil_temp_c
0,2023-01-03,649194,153.764567,0.207958,27.206774,4222,1.5,13.3,5.6,9.45,12.5,0.0,6.125
1,2023-01-04,551602,90.189993,0.173479,21.30739,6116,0.1,12.6,8.8,10.7,13.7,0.5,8.2
2,2023-01-05,464146,75.902862,0.309076,24.752412,6115,2.7,12.6,6.7,9.65,10.3,0.0,8.025
3,2023-01-06,860701,140.936794,0.490093,25.563124,6107,2.3,11.5,5.6,8.55,9.7,0.2,6.325
4,2023-01-07,713575,116.388028,0.294569,21.666939,6131,1.9,10.7,6.4,8.55,12.1,5.2,6.675
5,2023-01-08,582720,95.044854,0.157397,17.114011,6131,0.0,7.8,4.4,6.1,11.1,3.2,5.125
6,2023-01-09,817644,133.798724,0.446572,23.800033,6111,0.5,7.9,3.7,5.8,12.8,4.8,4.475
7,2023-01-10,848945,138.966279,0.809625,24.910624,6109,11.2,13.6,5.5,9.55,13.4,0.1,6.725
8,2023-01-11,885515,144.786625,0.642413,26.035154,6116,6.2,11.8,5.5,8.65,13.4,1.2,5.6
9,2023-01-12,887731,145.07779,0.660239,26.148717,6119,2.8,11.8,5.3,8.55,17.0,2.7,6.875



Saved merged dataset to: ..\cleaned dataset\merged_traffic_weather_main_dataset.csv


In [9]:
import pandas as pd
from pathlib import Path

# Base folder
base_path = Path(r"../cleaned dataset")

# 1) Load the cleaned daily files
traffic = pd.read_csv(base_path / "traffic_daily_cleaned_member1_v2.csv")
weather = pd.read_csv(base_path / "weather_daily_cleaned_member2.csv")

print("Traffic shape:", traffic.shape)
print("Weather shape:", weather.shape)

# 2) Convert date columns to proper datetime (VERY IMPORTANT)
#    Your dates are in format dd-mm-YYYY â†’ use dayfirst=True
traffic["date"] = pd.to_datetime(traffic["date"], dayfirst=True, errors="coerce")
weather["date"] = pd.to_datetime(weather["date"], dayfirst=True, errors="coerce")

# 3) Sort by date (for neatness)
traffic = traffic.sort_values("date")
weather = weather.sort_values("date")

# 4) Merge on date with INNER JOIN
merged = traffic.merge(weather, on="date", how="inner")

print("\nMerged shape:", merged.shape)
print("Unique dates:", merged["date"].nunique())
print("Date range:", merged["date"].min(), "to", merged["date"].max())

# Quick preview
display(merged.head(10))

# 5) Save the correct merged file
out_path = base_path / "merged_traffic_weather_main_dataset_clean.csv"
merged.to_csv(out_path, index=False)

print("\nSaved CLEAN merged dataset to:")
print(out_path)


Traffic shape: (177, 6)
Weather shape: (181, 8)

Merged shape: (11949, 13)
Unique dates: 68
Date range: 2023-01-02 00:00:00 to 2023-12-06 00:00:00


Unnamed: 0,date,daily_flow_total,daily_flow_mean,daily_cong_mean,daily_dsat_mean,num_records,rain_mm,tmax_c,tmin_c,tmean,wind_speed_knots,sunshine_hours,soil_temp_c
0,2023-01-02,911876,149.194372,0.852421,27.116656,6112,0.0,9.8,5.6,7.7,12.9,1.0,5.625
1,2023-01-03,8119,21.309711,0.493438,4.275591,381,0.0,8.8,4.7,6.75,11.9,3.8,6.575
2,2023-01-04,797405,132.064425,0.355747,23.707188,6038,12.2,11.4,7.2,9.3,8.6,0.4,10.45
3,2023-01-05,617175,102.384705,0.142668,17.991871,6028,0.3,17.0,4.5,10.75,7.6,4.3,14.25
4,2023-01-06,876155,145.540698,0.813455,26.909967,6020,0.0,15.9,6.7,11.3,8.9,10.1,18.275
5,2023-02-02,924939,151.109132,0.743506,27.627185,6121,0.2,10.5,8.2,9.35,12.0,0.3,7.225
6,2023-02-03,688300,181.370224,0.670883,30.680369,3795,1.1,8.0,3.4,5.7,7.6,1.3,6.725
7,2023-02-04,646470,107.031457,0.178311,19.701987,6040,0.8,11.1,6.0,8.55,6.8,6.2,10.325
8,2023-02-05,892960,148.504906,0.504573,26.431898,6013,0.0,17.2,6.6,11.9,7.2,3.9,13.975
9,2023-02-06,859986,142.807373,0.81169,26.801893,6022,0.0,14.9,8.9,11.9,8.5,8.6,18.45



Saved CLEAN merged dataset to:
..\cleaned dataset\merged_traffic_weather_main_dataset_clean.csv
