# Masterarbeit - Exponential Smoothing: Erste Experimente



Fist experiments for baselines

- Run for 2-3 different datasets 
- Run in 3 different modes: basic, vectorized, parallelized 
- Implementation for Numpy here
- Implementation for R and Julia and SystemDS will follow

In [1]:
import numpy as np
import time
import matplotlib as mpl 
from numba import jit

___
___
## Wind Turbine Dataset: Wind speed

https://www.kaggle.com/datasets/berkerisen/wind-turbine-scada-dataset

In [2]:
# Get the data in an numpy array 
data = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/wind_turbine_scada.csv', delimiter=',', skip_header=1)

# Slice the data to only get the wind speed column
wind_speed = data[:, 2]
wind_speed = wind_speed[~np.isnan(wind_speed)]

# Replication factor
factor = 500
base_wind_speed = wind_speed

for i in range(factor - 1):
    wind_speed = np.concatenate((wind_speed, base_wind_speed))

print(f"Dataframe wind_speed has {wind_speed.size} rows.")

Dataframe wind_speed has 25265000 rows.


___
### #1 Basic For Loop 

In [3]:
# #1
# Wind speed

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for i in range(number_of_executions):
    start_time = time.time()

    # Initialize the smoothed value first data point
    smoothed_value = wind_speed[0]   

    # Perform exponential smoothing on the data
    for j in range(1, len(wind_speed)):
        smoothed_value = alpha * wind_speed[j] + (1 - alpha) * smoothed_value    

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset wind_speed")
print("### #1 Basic For Loop \n")
print("The last smoothed value for wind_speed is:", smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset wind_speed
### #1 Basic For Loop 

The last smoothed value for wind_speed is: 9.729056156830051
The function was executed in 7.859822988510132 seconds


___
### #2 Vectorized

In [4]:
# #2
# Wind Speed

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for i in range(number_of_executions):
    start_time = time.time()

    # Perform exponential smoothing on the data
    n = wind_speed.shape[0]
    weights = alpha * (1 - alpha) ** np.arange(n)[::-1]
    smoothed = np.cumsum(weights * wind_speed)[::-1]
    smoothed_value = smoothed[0] / weights.sum()

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset wind_speed")
print("### #2 Vectorized \n")
print("The last smoothed value for wind_speed is:", smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset wind_speed
### #2 Vectorized 

The last smoothed value for wind_speed is: 9.729056156830051
The function was executed in 0.6840405464172363 seconds


___
### #3 Parallelized Basic For Loop

In [5]:
# #3
# Wind speed

from numba import jit, prange

@jit(nopython=True)
def smooth_wind_speed(wind_speed, alpha):
    smoothed_value = wind_speed[0]  # Initialize with the first value
    for i in prange(1, len(wind_speed)):
        smoothed_value = alpha * wind_speed[i] + (1 - alpha) * smoothed_value
    return smoothed_value

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for i in range(number_of_executions):
    start_time = time.time()

    # Perform exponential smoothing on the data
    last_smoothed_value = smooth_wind_speed(wind_speed, alpha)

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset wind_speed")
print("### #3 Parallelized Basic For Loop \n")
print("The last smoothed value for wind_speed is:", last_smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset wind_speed
### #3 Parallelized Basic For Loop 

The last smoothed value for wind_speed is: 9.729056156830051
The function was executed in 0.05615413188934326 seconds


___
### #4 Parallelized Vectrorized

In [6]:
# #4
# Wind speed

from numba import jit, prange

@jit(nopython=True, parallel=True)
def smooth_wind_speed(wind_speed, alpha):
    n = wind_speed.shape[0]
    weights = alpha * (1 - alpha) ** np.arange(n)[::-1]
    smoothed = np.cumsum(weights * wind_speed)[::-1]
    smoothed_value = smoothed[0] / weights.sum()
    return smoothed_value

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for i in range(number_of_executions):
    start_time = time.time()

    # Perform exponential smoothing on the data
    last_smoothed_value = smooth_wind_speed(wind_speed, alpha)

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset wind_speed")
print("### #4 Parallelized Vectrorized \n")
print("The last smoothed value for wind_speed is:", last_smoothed_value)
print(f"The function was executed in {function_time} seconds")

OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


### Dataset wind_speed
### #4 Parallelized Vectrorized 

The last smoothed value for wind_speed is: 9.729056156830051
The function was executed in 0.26382601261138916 seconds


___
## Energy Dataset: Solar energy production

https://www.kaggle.com/datasets/nicholasjhana/energy-consumption-generation-prices-and-weather

In [7]:
# Get the data in an numpy array 
data = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/energy_dataset.csv', delimiter=',', skip_header=1)

# Slice the data to only get generation_solar
column_generation_solar = data[:, 18]
column_generation_solar = column_generation_solar[~np.isnan(column_generation_solar)]

# Replication factor
factor = 1400
base_column_generation_solar = column_generation_solar

for i in range(factor - 1):
    column_generation_solar = np.concatenate((column_generation_solar, base_column_generation_solar))

print(f"Dataframe column_generation_solar has {column_generation_solar.size} rows.")

Dataframe column_generation_solar has 49064400 rows.


___
### #1 - Basic For Loop

In [8]:
# #1
# Energy Generation Solar

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for i in range(number_of_executions):
    start_time = time.time()

    # Initialize the smoothed value first data point
    smoothed_value = column_generation_solar[0]   

    # Perform exponential smoothing on the data
    for j in range(1, len(column_generation_solar)):
        smoothed_value = alpha * column_generation_solar[j] + (1 - alpha) * smoothed_value    

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset generation_solar")
print("### #1 Basic For Loop \n")
print("The last smoothed value for generation_solar is:", smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset generation_solar
### #1 Basic For Loop 

The last smoothed value for generation_solar is: 33.09027077319566
The function was executed in 15.070680975914001 seconds


___
### #2: Vectorized 

In [9]:
# #2
# Wind Speed

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for i in range(number_of_executions):
    start_time = time.time()

    # Perform exponential smoothing on the data
    n = column_generation_solar.shape[0]
    weights = alpha * (1 - alpha) ** np.arange(n)
    smoothed = np.cumsum(weights * column_generation_solar[::-1])[::-1]
    smoothed_value = smoothed[0] / weights.sum()

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset wind_speed")
print("### #2 Vectorized \n")
print("The last smoothed value for wind_speed is:", smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset wind_speed
### #2 Vectorized 

The last smoothed value for wind_speed is: 33.090270773195684
The function was executed in 1.4763070344924927 seconds


___
### #3 Parallelized Basic For Loop

In [10]:
# #3
# Generation Solar Energy

from numba import jit, prange

@jit(nopython=True)
def smooth_generation_solar(wind_speed, alpha):
    smoothed_value = column_generation_solar[0]  # Initialize with the first value
    for i in prange(1, len(column_generation_solar)):
        smoothed_value = alpha * column_generation_solar[i] + (1 - alpha) * smoothed_value
    return smoothed_value

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for i in range(number_of_executions):
    start_time = time.time()

    # Perform exponential smoothing on the data
    last_smoothed_value = smooth_wind_speed(column_generation_solar, alpha)

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset generation_solar")
print("### #3 Parallelized Basic For Loop \n")
print("The last smoothed value for generation_solar is:", last_smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset generation_solar
### #3 Parallelized Basic For Loop 

The last smoothed value for generation_solar is: 33.09027077319566
The function was executed in 0.49577343463897705 seconds


___
### #4 Paralellized Vectorized

In [11]:
# #4
# Solar Energy

from numba import jit, prange

@jit(nopython=True)
def smooth_solar_energy(column_generation_solar, alpha):
    n = column_generation_solar.shape[0]
    weights = alpha * (1 - alpha) ** np.arange(n)[::-1]
    smoothed = np.cumsum(weights * column_generation_solar)[::-1]
    smoothed_value = smoothed[0] / weights.sum()
    return smoothed_value

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for j in range(number_of_executions):
    start_time = time.time()

    # Perform exponential smoothing on the data
    last_smoothed_value = smooth_solar_energy(column_generation_solar, alpha)

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset gerneration_solar")
print("### #4 Parallelized Vectrorized \n")
print("The last smoothed value for generation_solar is:", last_smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset gerneration_solar
### #4 Parallelized Vectrorized 

The last smoothed value for generation_solar is: 33.09027077319566
The function was executed in 1.1732815504074097 seconds


___
___
## HR Dataset: Heart Rate Seconds

https://www.kaggle.com/datasets/arashnic/fitbit

In [12]:
# Get the data in an numpy array 
data = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/heartrate_seconds_merged.csv', delimiter=',', skip_header=1)

# Slice the data to only get HR
heart_rate_data = data[:, 2]
heart_rate_data = heart_rate_data[~np.isnan(heart_rate_data)]

# Replication factor
factor = 90
base_heart_rate_data = heart_rate_data

for i in range(factor - 1):
    heart_rate_data = np.concatenate((heart_rate_data, base_heart_rate_data))

print(f"Dataframe heart_rate_data has {heart_rate_data.size} rows.")

Dataframe heart_rate_data has 103921290 rows.


___
### #1 Basic for Loop

In [13]:
# #1
# Heart Rate 

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for i in range(number_of_executions):
    start_time = time.time()

    # Initialize the smoothed value first data point
    smoothed_value = heart_rate_data[0]   

    # Perform exponential smoothing on the data
    for j in range(1, len(heart_rate_data)):
        smoothed_value = alpha * heart_rate_data[j] + (1 - alpha) * smoothed_value    

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset heart_rate_data")
print("### #1 Basic For Loop \n")
print("The last smoothed value for heart_rate_data is:", smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset heart_rate_data
### #1 Basic For Loop 

The last smoothed value for heart_rate_data is: 98.77752694571056
The function was executed in 30.742368817329407 seconds


___
### #2 Vectorized

In [14]:
# #2
# Heart Rate Data 

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for i in range(number_of_executions):
    start_time = time.time()

    # Perform exponential smoothing on the data
    n = heart_rate_data.shape[0]
    weights = alpha * (1 - alpha) ** np.arange(n)[::-1]
    smoothed = np.cumsum(weights * heart_rate_data)[::-1]
    smoothed_value = smoothed[0] / weights.sum()

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset heart_rate_data")
print("### #2 Vectorized \n")
print("The last smoothed value for heart_rate_data is:", smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset heart_rate_data
### #2 Vectorized 

The last smoothed value for heart_rate_data is: 98.77752694571055
The function was executed in 3.0845961570739746 seconds


___
### #3 Parallelized Basic For Loop

In [15]:
# #3
# Heart Rate Data 

from numba import jit, prange

@jit(nopython=True)
def smooth_heart_rate(heart_rate_data, alpha):
    smoothed_value = heart_rate_data[0]  # Initialize with the first value
    for i in prange(1, len(heart_rate_data)):
        smoothed_value = alpha * heart_rate_data[i] + (1 - alpha) * smoothed_value
    return smoothed_value

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for j in range(number_of_executions):
    start_time = time.time()

    # Perform exponential smoothing on the data
    last_smoothed_value = smooth_heart_rate(heart_rate_data, alpha)

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset heart_rate_data")
print("### #3 Parallelized Basic For Loop \n")
print("The last smoothed value for heart_rate_data is:", last_smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset heart_rate_data
### #3 Parallelized Basic For Loop 

The last smoothed value for heart_rate_data is: 98.77752694571056
The function was executed in 0.23646986484527588 seconds


___
### #4 Parallelized Vectorized

In [16]:
# #4
# Wind speed

from numba import jit, prange

@jit(nopython=True)
def smooth_heart_rate(heart_rate_data, alpha):
    n = heart_rate_data.shape[0]
    weights = alpha * (1 - alpha) ** np.arange(n)[::-1]
    smoothed = np.cumsum(weights * heart_rate_data)[::-1]
    smoothed_value = smoothed[0] / weights.sum()
    return smoothed_value

# Alpha is the smoothing factor.
alpha = 0.7

# List to store execution times
number_of_executions = 10
execution_times = []

# Perform exponential smoothing on the data
for i in range(number_of_executions):
    start_time = time.time()

    # Perform exponential smoothing on the data
    last_smoothed_value = smooth_heart_rate(heart_rate_data, alpha)

    # Stop the timer
    end_time = time.time()
    execution_times.append(end_time - start_time)

# Calculate the function time
function_time = np.median(execution_times)

# Print the results
print("### Dataset heart_rate_data")
print("### #4 Parallelized Vectrorized \n")
print("The last smoothed value for heart_rate_data is:", last_smoothed_value)
print(f"The function was executed in {function_time} seconds")

### Dataset heart_rate_data
### #4 Parallelized Vectrorized 

The last smoothed value for heart_rate_data is: 98.77752694571055
The function was executed in 2.63571560382843 seconds


___
___