In [1]:
print("Hello World!")

Hello World!


In [3]:
import pandas as pd
import numpy as np

# ------------------ USER PARAMETERS ------------------
baseline_file = "6- Final clean baseline data\\Road1 - 26th july corridor.xlsx"  # your baseline Excel file (28x24)
output_file = "Synthetic_datasets\\26 july - synthetic dataset.xlsx"
noise_std = 0.03  # standard deviation of noise for each 5-min period
window_size = 12  # sliding window size
np.random.seed(42)
# -----------------------------------------------------

# 1️⃣ Read the baseline Excel (28 rows × 24 columns)
baseline = pd.read_excel(baseline_file, header=None).values  # shape: (28, 24)

# Map seasons and days
seasons = ['winter', 'spring', 'summer', 'autumn']
days_of_week = ['Saturday','Sunday','Monday','Tuesday','Wednesday','Thursday','Friday']

# 2️⃣ Generate 288 periods per day (12 per hour) with smooth interpolation
all_days_arrays = []  # will contain 28 arrays of size 288

for row_idx in range(28):
    day_array = []
    for hour in range(24):
        curr = baseline[row_idx, hour]
        prev = baseline[row_idx, hour-1] if hour > 0 else curr
        next_ = baseline[row_idx, hour+1] if hour < 23 else curr

        # First 6 periods: prev -> curr
        first_half = np.linspace(prev, curr, 6, endpoint=False)
        # Last 6 periods: curr -> next
        second_half = np.linspace(curr, next_, 6, endpoint=False)

        hour_periods = np.concatenate([first_half, second_half])
        # Add small Gaussian noise
        hour_periods = np.clip(hour_periods + np.random.normal(0, noise_std, 12), 0, 1)
        day_array.extend(hour_periods)

    all_days_arrays.append(day_array)  # length 288

# 3️⃣ Build the sliding-window dataset
records = []
for season_idx, season in enumerate(seasons):
    for day_idx in range(7):
        row_idx = season_idx*7 + day_idx
        day_array = all_days_arrays[row_idx]
        for i in range(window_size, len(day_array)):
            window = day_array[i-window_size:i]
            output = day_array[i]
            record = {
                "season": season,
                "day": days_of_week[day_idx],
                "time": i  # period index in the day: 0..287
            }
            for j in range(window_size):
                record[f"per{j+1}"] = window[j]
            record["output"] = output
            records.append(record)

# 4️⃣ Convert to DataFrame and save to Excel
df = pd.DataFrame(records)
df.to_excel(output_file, index=False)
print(f"Synthetic dataset saved to: {output_file}")
print(f"Shape of dataset: {df.shape} (should be 8064 x 15)")

Synthetic dataset saved to: Synthetic_datasets\26 july - synthetic dataset.xlsx
Shape of dataset: (7728, 16) (should be 8064 x 15)


In [4]:
import pandas as pd
import numpy as np

# ------------------ USER PARAMETERS ------------------
baseline_file = "6- Final clean baseline data\\Road2 - Ring road (el-maryotiya).xlsx"  # your baseline Excel file (28x24)
output_file = "Synthetic_datasets\\ring road (MR) - synthetic dataset.xlsx"
noise_std = 0.03  # standard deviation of noise for each 5-min period
window_size = 12  # sliding window size
np.random.seed(42)
# -----------------------------------------------------

# 1️⃣ Read the baseline Excel (28 rows × 24 columns)
baseline = pd.read_excel(baseline_file, header=None).values  # shape: (28, 24)

# Map seasons and days
seasons = ['winter', 'spring', 'summer', 'autumn']
days_of_week = ['Saturday','Sunday','Monday','Tuesday','Wednesday','Thursday','Friday']

# 2️⃣ Generate 288 periods per day (12 per hour) with smooth interpolation
all_days_arrays = []  # will contain 28 arrays of size 288

for row_idx in range(28):
    day_array = []
    for hour in range(24):
        curr = baseline[row_idx, hour]
        prev = baseline[row_idx, hour-1] if hour > 0 else curr
        next_ = baseline[row_idx, hour+1] if hour < 23 else curr

        # First 6 periods: prev -> curr
        first_half = np.linspace(prev, curr, 6, endpoint=False)
        # Last 6 periods: curr -> next
        second_half = np.linspace(curr, next_, 6, endpoint=False)

        hour_periods = np.concatenate([first_half, second_half])
        # Add small Gaussian noise
        hour_periods = np.clip(hour_periods + np.random.normal(0, noise_std, 12), 0, 1)
        day_array.extend(hour_periods)

    all_days_arrays.append(day_array)  # length 288

# 3️⃣ Build the sliding-window dataset
records = []
for season_idx, season in enumerate(seasons):
    for day_idx in range(7):
        row_idx = season_idx*7 + day_idx
        day_array = all_days_arrays[row_idx]
        for i in range(window_size, len(day_array)):
            window = day_array[i-window_size:i]
            output = day_array[i]
            record = {
                "season": season,
                "day": days_of_week[day_idx],
                "time": i  # period index in the day: 0..287
            }
            for j in range(window_size):
                record[f"per{j+1}"] = window[j]
            record["output"] = output
            records.append(record)

# 4️⃣ Convert to DataFrame and save to Excel
df = pd.DataFrame(records)
df.to_excel(output_file, index=False)
print(f"Synthetic dataset saved to: {output_file}")
print(f"Shape of dataset: {df.shape} (should be 7728 x 16)")

Synthetic dataset saved to: Synthetic_datasets\ring road (MR) - synthetic dataset.xlsx
Shape of dataset: (7728, 16) (should be 7728 x 16)


In [5]:
import pandas as pd
import numpy as np

# ------------------ USER PARAMETERS ------------------
baseline_file = "6- Final clean baseline data\\Road3 - Ring road (AlQasr eleeiny).xlsx"  # your baseline Excel file (28x24)
output_file = "Synthetic_datasets\\ring road (QA) - synthetic dataset.xlsx"
noise_std = 0.03  # standard deviation of noise for each 5-min period
window_size = 12  # sliding window size
np.random.seed(42)
# -----------------------------------------------------

# 1️⃣ Read the baseline Excel (28 rows × 24 columns)
baseline = pd.read_excel(baseline_file, header=None).values  # shape: (28, 24)

# Map seasons and days
seasons = ['winter', 'spring', 'summer', 'autumn']
days_of_week = ['Saturday','Sunday','Monday','Tuesday','Wednesday','Thursday','Friday']

# 2️⃣ Generate 288 periods per day (12 per hour) with smooth interpolation
all_days_arrays = []  # will contain 28 arrays of size 288

for row_idx in range(28):
    day_array = []
    for hour in range(24):
        curr = baseline[row_idx, hour]
        prev = baseline[row_idx, hour-1] if hour > 0 else curr
        next_ = baseline[row_idx, hour+1] if hour < 23 else curr

        # First 6 periods: prev -> curr
        first_half = np.linspace(prev, curr, 6, endpoint=False)
        # Last 6 periods: curr -> next
        second_half = np.linspace(curr, next_, 6, endpoint=False)

        hour_periods = np.concatenate([first_half, second_half])
        # Add small Gaussian noise
        hour_periods = np.clip(hour_periods + np.random.normal(0, noise_std, 12), 0, 1)
        day_array.extend(hour_periods)

    all_days_arrays.append(day_array)  # length 288

# 3️⃣ Build the sliding-window dataset
records = []
for season_idx, season in enumerate(seasons):
    for day_idx in range(7):
        row_idx = season_idx*7 + day_idx
        day_array = all_days_arrays[row_idx]
        for i in range(window_size, len(day_array)):
            window = day_array[i-window_size:i]
            output = day_array[i]
            record = {
                "season": season,
                "day": days_of_week[day_idx],
                "time": i  # period index in the day: 0..287
            }
            for j in range(window_size):
                record[f"per{j+1}"] = window[j]
            record["output"] = output
            records.append(record)

# 4️⃣ Convert to DataFrame and save to Excel
df = pd.DataFrame(records)
df.to_excel(output_file, index=False)
print(f"Synthetic dataset saved to: {output_file}")
print(f"Shape of dataset: {df.shape} (should be 7728 x 16)")

Synthetic dataset saved to: Synthetic_datasets\ring road (QA) - synthetic dataset.xlsx
Shape of dataset: (7728, 16) (should be 7728 x 16)


In [2]:
import pandas as pd
import numpy as np

# ------------------ USER PARAMETERS ------------------
baseline_file = "6- Final clean baseline data\\Road1 - 26th july corridor.xlsx"  # your baseline Excel file (28x24)
output_file = "Synthetic_datasets\\26 july - synthetic dataset.xlsx"
noise_std = 0.03  # standard deviation of noise for each 5-min period
window_size = 12  # sliding window size
np.random.seed(42)
# -----------------------------------------------------

# 1️⃣ Read the baseline Excel (28 rows × 24 columns)
baseline = pd.read_excel(baseline_file, header=None).values  # shape: (28, 24)

# Map seasons and days
seasons = ['winter', 'spring', 'summer', 'autumn']
days_of_week = ['Saturday','Sunday','Monday','Tuesday','Wednesday','Thursday','Friday']

# 2️⃣ Generate 288 periods per day (12 per hour) with smooth interpolation
all_days_arrays = []  # will contain 28 arrays of size 288

for row_idx in range(28):
    day_array = []
    for hour in range(24):
        curr = baseline[row_idx, hour]
        prev = baseline[row_idx, hour-1] if hour > 0 else curr
        next_ = baseline[row_idx, hour+1] if hour < 23 else curr

        # First 6 periods: prev -> curr
        first_half = np.linspace(prev, curr, 6, endpoint=False)
        # Last 6 periods: curr -> next
        second_half = np.linspace(curr, next_, 6, endpoint=False)

        hour_periods = np.concatenate([first_half, second_half])
        # Add small Gaussian noise
        hour_periods = np.clip(hour_periods + np.random.normal(0, noise_std, 12), 0, 1)
        day_array.extend(hour_periods)

    all_days_arrays.append(day_array)  # length 288

# 3️⃣ Build the sliding-window dataset
records = []
for season_idx, season in enumerate(seasons):
    for day_idx in range(7):
        row_idx = season_idx*7 + day_idx
        day_array = all_days_arrays[row_idx]
        for i in range(window_size, len(day_array)):
            window = day_array[i-window_size:i]
            output = day_array[i]

            # NEW: time sin/cos
            angle = 2 * np.pi * (i / 288)
            time_sin = np.sin(angle)
            time_cos = np.cos(angle)

            record = {
                "season": season,
                "day": days_of_week[day_idx],
                "time": i,          # original time index (kept)
                "time_sin": time_sin,
                "time_cos": time_cos
            }

            for j in range(window_size):
                record[f"per{j+1}"] = window[j]

            record["output"] = output
            records.append(record)

# 4️⃣ Convert to DataFrame and save to Excel
df = pd.DataFrame(records)
df.to_excel(output_file, index=False)
print(f"Synthetic dataset saved to: {output_file}")
print(f"Shape of dataset: {df.shape}")


Synthetic dataset saved to: Synthetic_datasets\26 july - synthetic dataset.xlsx
Shape of dataset: (7728, 18)


In [3]:
# ------------------ USER PARAMETERS ------------------
baseline_file = "6- Final clean baseline data\\Road2 - Ring road (el-maryotiya).xlsx"  # your baseline Excel file (28x24)
output_file = "Synthetic_datasets\\ring road (MR) - synthetic dataset.xlsx"
noise_std = 0.03  # standard deviation of noise for each 5-min period
window_size = 12  # sliding window size
np.random.seed(42)
# -----------------------------------------------------

# 1️⃣ Read the baseline Excel (28 rows × 24 columns)
baseline = pd.read_excel(baseline_file, header=None).values  # shape: (28, 24)

# Map seasons and days
seasons = ['winter', 'spring', 'summer', 'autumn']
days_of_week = ['Saturday','Sunday','Monday','Tuesday','Wednesday','Thursday','Friday']

# 2️⃣ Generate 288 periods per day (12 per hour) with smooth interpolation
all_days_arrays = []  # will contain 28 arrays of size 288

for row_idx in range(28):
    day_array = []
    for hour in range(24):
        curr = baseline[row_idx, hour]
        prev = baseline[row_idx, hour-1] if hour > 0 else curr
        next_ = baseline[row_idx, hour+1] if hour < 23 else curr

        # First 6 periods: prev -> curr
        first_half = np.linspace(prev, curr, 6, endpoint=False)
        # Last 6 periods: curr -> next
        second_half = np.linspace(curr, next_, 6, endpoint=False)

        hour_periods = np.concatenate([first_half, second_half])
        # Add small Gaussian noise
        hour_periods = np.clip(hour_periods + np.random.normal(0, noise_std, 12), 0, 1)
        day_array.extend(hour_periods)

    all_days_arrays.append(day_array)  # length 288

# 3️⃣ Build the sliding-window dataset
records = []
for season_idx, season in enumerate(seasons):
    for day_idx in range(7):
        row_idx = season_idx*7 + day_idx
        day_array = all_days_arrays[row_idx]
        for i in range(window_size, len(day_array)):
            window = day_array[i-window_size:i]
            output = day_array[i]

            # NEW: time sin/cos
            angle = 2 * np.pi * (i / 288)
            time_sin = np.sin(angle)
            time_cos = np.cos(angle)

            record = {
                "season": season,
                "day": days_of_week[day_idx],
                "time": i,          # original time index (kept)
                "time_sin": time_sin,
                "time_cos": time_cos
            }

            for j in range(window_size):
                record[f"per{j+1}"] = window[j]

            record["output"] = output
            records.append(record)

# 4️⃣ Convert to DataFrame and save to Excel
df = pd.DataFrame(records)
df.to_excel(output_file, index=False)
print(f"Synthetic dataset saved to: {output_file}")
print(f"Shape of dataset: {df.shape}")


Synthetic dataset saved to: Synthetic_datasets\ring road (MR) - synthetic dataset.xlsx
Shape of dataset: (7728, 18)


In [4]:
# ------------------ USER PARAMETERS ------------------
baseline_file = "6- Final clean baseline data\\Road3 - Ring road (AlQasr eleeiny).xlsx"  # your baseline Excel file (28x24)
output_file = "Synthetic_datasets\\ring road (QA) - synthetic dataset.xlsx"
noise_std = 0.03  # standard deviation of noise for each 5-min period
window_size = 12  # sliding window size
np.random.seed(42)
# -----------------------------------------------------

# 1️⃣ Read the baseline Excel (28 rows × 24 columns)
baseline = pd.read_excel(baseline_file, header=None).values  # shape: (28, 24)

# Map seasons and days
seasons = ['winter', 'spring', 'summer', 'autumn']
days_of_week = ['Saturday','Sunday','Monday','Tuesday','Wednesday','Thursday','Friday']

# 2️⃣ Generate 288 periods per day (12 per hour) with smooth interpolation
all_days_arrays = []  # will contain 28 arrays of size 288

for row_idx in range(28):
    day_array = []
    for hour in range(24):
        curr = baseline[row_idx, hour]
        prev = baseline[row_idx, hour-1] if hour > 0 else curr
        next_ = baseline[row_idx, hour+1] if hour < 23 else curr

        # First 6 periods: prev -> curr
        first_half = np.linspace(prev, curr, 6, endpoint=False)
        # Last 6 periods: curr -> next
        second_half = np.linspace(curr, next_, 6, endpoint=False)

        hour_periods = np.concatenate([first_half, second_half])
        # Add small Gaussian noise
        hour_periods = np.clip(hour_periods + np.random.normal(0, noise_std, 12), 0, 1)
        day_array.extend(hour_periods)

    all_days_arrays.append(day_array)  # length 288

# 3️⃣ Build the sliding-window dataset
records = []
for season_idx, season in enumerate(seasons):
    for day_idx in range(7):
        row_idx = season_idx*7 + day_idx
        day_array = all_days_arrays[row_idx]
        for i in range(window_size, len(day_array)):
            window = day_array[i-window_size:i]
            output = day_array[i]

            # NEW: time sin/cos
            angle = 2 * np.pi * (i / 288)
            time_sin = np.sin(angle)
            time_cos = np.cos(angle)

            record = {
                "season": season,
                "day": days_of_week[day_idx],
                "time": i,          # original time index (kept)
                "time_sin": time_sin,
                "time_cos": time_cos
            }

            for j in range(window_size):
                record[f"per{j+1}"] = window[j]

            record["output"] = output
            records.append(record)

# 4️⃣ Convert to DataFrame and save to Excel
df = pd.DataFrame(records)
df.to_excel(output_file, index=False)
print(f"Synthetic dataset saved to: {output_file}")
print(f"Shape of dataset: {df.shape}")

Synthetic dataset saved to: Synthetic_datasets\ring road (QA) - synthetic dataset.xlsx
Shape of dataset: (7728, 18)
