## Electricity Consumption Profile Generation<a class="anchor" id="ele_profile"></a>

This notebook describes the generation of energy consumption data

This notebook builds on top of the notebook presented in [this repository](https://github.com/PeijieZ/Load-profile-generation).

In [45]:
import csv
import random
from pathlib import Path
from typing import List, Dict, Callable

import pandas as pd

### Settings

In [46]:
# Precision of generated data
DECIMAL_PRECISION = 4

# Total number of households to generate data for.
TOTAL_NR_HOUSEHOLDS = 150

# Number of samples to generate
SAMPLES_PER_DAY = 24
DAYS = 31
SAMPLES = DAYS * SAMPLES_PER_DAY 

### Loading data

In [47]:
data_dir = Path("./src")

# Gather data file paths
data_files = []
for f in data_dir.iterdir():
    assert f.exists()
    data_files.append(f)

print(f"Nr. of data file gathered: {len(data_files)}")

Nr. of data file gathered: 10


In [48]:
# Load household data
households: List[Dict] = []
for f in data_files:
    with f.open('r') as f:
        f_csv = csv.reader(f, delimiter=";")

        # Header check
        header = next(f_csv)
        assert header[2] == "Sum [kWh]"

        # Load data
        household = []
        for _, _, consumption in f_csv:
            consumption = round(float(consumption), DECIMAL_PRECISION)
            household.append(consumption)
        households.append(household)

print(f"Loaded data for {len(households)} households.")

Loaded data for 10 households.


### Generating data

In [49]:
Vector = List[float]

def generate_vector(
        input_vectors: List[Vector], 
        randomizer: Callable[[float], float]
    ) -> List[Vector]:
    """
    Generate random vector, using uniform sampling.
    :param input_data: set of vectors to sample from
    :param randomizer: callback used to compute a new value.
    :returns: uniformly random sampled vector
    """
    # 1. Select random existing data vector
    base_vector = random.choice(input_vectors)

    # 2. Generate random vector using uniformly random sampling
    new_vec = []
    for val in base_vector:
        new_val = randomizer(val)
        rounded_new_val = round(new_val, DECIMAL_PRECISION)
        new_vec.append(rounded_new_val)
    
    return new_vec

def extend_vectors(
        vectors: List[Vector], 
        randomizer: Callable[[float], float],
        new_size: int,
        period: float = 24,
    ) -> Vector:
    """
    Randomly extend vectors.
    :param vectors: vectors to extend
    :param randomizer: call back to help randomize values
    :param new_size: desired vector size
    :param period: period used in randomization
    """
    first_vector = vectors[0]
    nr_samples = len(first_vector)
    assert nr_samples < new_size

    # Compute number of periods already in the vector.
    assert nr_samples % period == 0
    nr_periods = nr_samples // period

    # Compute number of periods that need to be added.
    nr_new_samples = new_size - nr_samples
    assert nr_new_samples % period == 0
    nr_new_periods = nr_new_samples // period

    for _ in range(nr_new_periods):

        # Randomly select existing period
        old_period_idx = random.randint(0, nr_periods-1)
        start = old_period_idx * period
        end = start + period

        # Extend every vector with randomized data from the selected period
        for vec in vectors:
            for elt in vec[start:end]:
                new_elt = randomizer(elt)
                vec.append(new_elt)

In [50]:
# To randomize the generated data, each consumption sample is multiplied with a different factor.
# These factors are drawn uniformly at random from this interval.
RANDOMIZATION_INTERVAL = 0.9, 1.1

# Function used to generate new value from old value
def randomizer(val: float) -> float:
    return val * random.uniform(*RANDOMIZATION_INTERVAL)

# Generate data for new house holds
new_households = []
nr_extra_households = TOTAL_NR_HOUSEHOLDS - len(households)
for _ in range(nr_extra_households):
    new_household = generate_vector(households, randomizer)
    new_households.append(new_household)
households.extend(new_households)

# Generate data for the desired timespan
extend_vectors(households, randomizer, SAMPLES)

In [51]:
df = pd.DataFrame(households).T
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,140,141,142,143,144,145,146,147,148,149
0,0.054200,0.081200,0.107100,0.039100,0.078900,0.095000,0.051900,0.060900,0.341800,0.103300,...,0.048400,0.097200,0.112900,0.085800,0.037100,0.063400,0.074300,0.078400,0.053100,0.099200
1,0.057100,0.081300,0.121600,0.039200,0.053100,0.048700,0.049900,0.053200,0.140500,0.108400,...,0.047500,0.121600,0.130300,0.087300,0.039600,0.051100,0.078700,0.050100,0.052600,0.105100
2,0.056800,0.068600,0.123300,0.037800,0.113900,0.047200,0.048900,0.068500,0.188000,0.119700,...,0.045600,0.120900,0.132700,0.061800,0.037700,0.064100,0.066700,0.116900,0.047300,0.112800
3,0.145500,0.076800,0.104400,0.037700,0.089900,0.046800,0.120900,0.045800,0.209700,0.102400,...,0.124800,0.110500,0.104500,0.077900,0.037000,0.045500,0.077400,0.084500,0.117700,0.110600
4,0.064300,0.081200,0.112100,0.040400,0.109600,0.072800,0.096500,0.070000,0.232000,0.103300,...,0.100800,0.110200,0.108000,0.089300,0.038900,0.075900,0.076100,0.108500,0.088400,0.094600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
739,0.273381,1.124332,1.282179,5.146244,0.751818,0.507709,1.789415,0.688450,3.908970,1.396861,...,1.673437,1.249243,1.313094,1.138186,4.457980,0.591524,1.013133,0.649655,1.717332,1.406918
740,0.214338,0.419267,0.434877,2.060070,0.655079,0.078843,0.946416,1.366636,2.928256,5.610863,...,0.975676,0.414162,0.401814,0.393596,1.950847,1.473652,0.450576,0.752100,0.775211,6.028541
741,0.138138,0.494836,0.444714,0.335288,1.201833,0.064779,0.278660,0.742275,1.540368,2.781415,...,0.293864,0.487005,0.396246,0.556242,0.359143,0.680834,0.514568,1.152892,0.331755,2.714432
742,0.154334,0.321405,0.368903,0.307501,0.407810,0.079409,0.332318,0.353807,0.477428,0.944246,...,0.341323,0.339413,0.317219,0.244685,0.280129,0.387054,0.298294,0.401076,0.322947,0.879847


## Export data

In [57]:
def export_data(nr_days: int):
    samples = nr_days * 24
    selection = df.iloc[:samples]
    selection.to_json(f'out/consumption_{nr_days}_days.json', indent=4)

export_data(1)