In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("default")

## Timeline creation (September, hourly)

In [4]:
start_date = "2026-09-01 00:00"
end_date = "2026-09-30 23:00"

datetime_index = pd.date_range(
    start = start_date,
    end = end_date,
    freq = "h"
)

len(datetime_index)

720

## DataFrame creation

In [6]:
df = pd.DataFrame({"datetime":datetime_index})
df["hour"] = df["datetime"].dt.hour
df["day"]= df["datetime"].dt.day
df.head()

Unnamed: 0,datetime,hour,day
0,2026-09-01 00:00:00,0,1
1,2026-09-01 01:00:00,1,1
2,2026-09-01 02:00:00,2,1
3,2026-09-01 03:00:00,3,1
4,2026-09-01 04:00:00,4,1


## Generate Solar Irradiation

In [72]:
def generate_solar_irradiation (hour, max_irradiation = 712):
    if hour<8 or hour>20:
        return 0
    return max_irradiation * np.sin(
        np.pi * (hour-8)/(20-8)
    )

In [73]:
df["solar_irradiation"] = df["hour"].apply(generate_solar_irradiation)
#Random noise, average = 0, standar deviation = 0, random value for each dataframe row
noise = np.random.normal(0, 50, size=len(df))
#Sum the noise and if negative equals 0
df["solar_irradiation"] = np.maximum(df["solar_irradiation"] + noise, 0)
df.loc[(df["hour"] < 8) | (df["hour"] > 20), "solar_irradiation"] = 0
df[["datetime", "hour", "solar_irradiation"]].head(48)

Unnamed: 0,datetime,hour,solar_irradiation
0,2026-09-01 00:00:00,0,0.0
1,2026-09-01 01:00:00,1,0.0
2,2026-09-01 02:00:00,2,0.0
3,2026-09-01 03:00:00,3,0.0
4,2026-09-01 04:00:00,4,0.0
5,2026-09-01 05:00:00,5,0.0
6,2026-09-01 06:00:00,6,0.0
7,2026-09-01 07:00:00,7,0.0
8,2026-09-01 08:00:00,8,33.624376
9,2026-09-01 09:00:00,9,186.829709


## Temperature Generation

In [81]:
base_temp = 19
df["ambient_temperature"] = (
    base_temp + 5 * np.sin(np.pi * (df["hour"] - 6) / 24) + np.random.normal(0, 1.5, size=len(df))
)

df[["datetime","hour", "ambient_temperature"]].head(48)

Unnamed: 0,datetime,hour,ambient_temperature
0,2026-09-01 00:00:00,0,16.633703
1,2026-09-01 01:00:00,1,19.218144
2,2026-09-01 02:00:00,2,14.052287
3,2026-09-01 03:00:00,3,16.229916
4,2026-09-01 04:00:00,4,17.078144
5,2026-09-01 05:00:00,5,17.009701
6,2026-09-01 06:00:00,6,18.752725
7,2026-09-01 07:00:00,7,20.27904
8,2026-09-01 08:00:00,8,22.298447
9,2026-09-01 09:00:00,9,20.763807


## Generation of Solar Energy Produced 

In [85]:
system_capacity = 5 #kWp
constant = 0.0009 

df["solar_energy_generated"] = (
    df["solar_irradiation"] * system_capacity * constant
)

df[["datetime" ,"solar_irradiation", "solar_energy_generated"]].head(48)

Unnamed: 0,datetime,solar_irradiation,solar_energy_generated
0,2026-09-01 00:00:00,0.0,0.0
1,2026-09-01 01:00:00,0.0,0.0
2,2026-09-01 02:00:00,0.0,0.0
3,2026-09-01 03:00:00,0.0,0.0
4,2026-09-01 04:00:00,0.0,0.0
5,2026-09-01 05:00:00,0.0,0.0
6,2026-09-01 06:00:00,0.0,0.0
7,2026-09-01 07:00:00,0.0,0.0
8,2026-09-01 08:00:00,33.624376,0.15131
9,2026-09-01 09:00:00,186.829709,0.840734


## Generation of energy comsuption

In [86]:
def generate_consumption(hour):
    if 7 <= hour <= 9:
        return np.random.uniform(0.6, 0.9)
    elif 18 <= hour <= 22:
        return np.random.uniform(0.7, 1.0)
    elif 0 <= hour <= 5:
        return np.random.uniform(0.2, 0.4)
    else:
        return np.random.uniform(0.3, 0.6)

df["energy_consumption"] = df["hour"].apply(generate_consumption)

df[["hour", "energy_consumption"]].head(48)

Unnamed: 0,hour,energy_consumption
0,0,0.294577
1,1,0.234276
2,2,0.352419
3,3,0.3322
4,4,0.396713
5,5,0.340713
6,6,0.424321
7,7,0.834096
8,8,0.683769
9,9,0.609497


## Generation of Net Energy

In [88]:
df["net_energy"] = (
    df["solar_energy_generated"] -
    df["energy_consumption"]
)

df.head(48)

Unnamed: 0,datetime,hour,day,solar_irradiation,ambient_temperature,solar_energy_generated,energy_consumption,net_energy
0,2026-09-01 00:00:00,0,1,0.0,16.633703,0.0,0.294577,-0.294577
1,2026-09-01 01:00:00,1,1,0.0,19.218144,0.0,0.234276,-0.234276
2,2026-09-01 02:00:00,2,1,0.0,14.052287,0.0,0.352419,-0.352419
3,2026-09-01 03:00:00,3,1,0.0,16.229916,0.0,0.3322,-0.3322
4,2026-09-01 04:00:00,4,1,0.0,17.078144,0.0,0.396713,-0.396713
5,2026-09-01 05:00:00,5,1,0.0,17.009701,0.0,0.340713,-0.340713
6,2026-09-01 06:00:00,6,1,0.0,18.752725,0.0,0.424321,-0.424321
7,2026-09-01 07:00:00,7,1,0.0,20.27904,0.0,0.834096,-0.834096
8,2026-09-01 08:00:00,8,1,33.624376,22.298447,0.15131,0.683769,-0.53246
9,2026-09-01 09:00:00,9,1,186.829709,20.763807,0.840734,0.609497,0.231237


## Persitence

In [90]:
df.to_csv("synthetic_solar_data_september_bilbao.csv", index=False)
df_loaded = pd.read_csv("synthetic_solar_data_september_bilbao.csv")
df_loaded.head()

Unnamed: 0,datetime,hour,day,solar_irradiation,ambient_temperature,solar_energy_generated,energy_consumption,net_energy
0,2026-09-01 00:00:00,0,1,0.0,16.633703,0.0,0.294577,-0.294577
1,2026-09-01 01:00:00,1,1,0.0,19.218144,0.0,0.234276,-0.234276
2,2026-09-01 02:00:00,2,1,0.0,14.052287,0.0,0.352419,-0.352419
3,2026-09-01 03:00:00,3,1,0.0,16.229916,0.0,0.3322,-0.3322
4,2026-09-01 04:00:00,4,1,0.0,17.078144,0.0,0.396713,-0.396713
