In [None]:
import pandas as pd
import numpy as np
from numba import jit
import os

# Load your CSV file and parse the datetime index
ConsumptionPath = 'C:/Users/Nik/Documents/GitHub/Thesis/CSV/Consumption'
df = pd.read_csv('C:/Users/Nik/Documents/GitHub/Thesis/CSV/Consumption/switched_data_columns.csv', index_col=0, parse_dates=True)

# Starting Tables for every optimization try
WorkingTables_Path = 'C:/Users/Nik/Documents/GitHub/Thesis/CSV/Working Tables'
DF_m2_Residential = pd.read_csv(os.path.join(WorkingTables_Path, 'DF_m2_Residential.csv'), sep=",", on_bad_lines='skip', header=0, index_col=0)
DF_m2_Industrial = pd.read_csv(os.path.join(WorkingTables_Path, 'DF_m2_Industrial.csv'), sep=",", on_bad_lines='skip', header=0, index_col=0)
DF_m2_Commercial = pd.read_csv(os.path.join(WorkingTables_Path, 'DF_m2_Commercial.csv'), sep=",", on_bad_lines='skip', header=0, index_col=0)

# Define Fourier Transform variation function without Numba due to unsupported FFT operations
def fourier_transform_variation(series, noise_level=0.1):
    fft_values = np.fft.fft(series)
    noise = np.random.normal(0, noise_level, size=len(fft_values))
    fft_values = fft_values * (1 + noise)
    return np.fft.ifft(fft_values).real

# Define function to add noise using Numba
@jit(nopython=True)
def add_noise_to_series(series, noise_level=0.05):
    noise = np.random.normal(0, noise_level, size=len(series))
    return series * (1 + noise)

# Define rolling perturbation function without Numba as it involves Pandas
def rolling_perturbation(series, window_size=24, perturbation_level=0.1):
    rolled = series.rolling(window=window_size, min_periods=1).mean()
    perturbation = np.random.normal(1, perturbation_level, size=len(series))
    return rolled * perturbation

# Helper function to apply all methods sequentially for each user
def apply_mixed_methods(df, base_column, users, prefix):
    # Assume df already has multiple curves; use a subset of them
    existing_columns = [col for col in df.columns if col.startswith(base_column)]
    
    # Generate a single mixed curve for each user
    transformed_df = pd.DataFrame(index=df.index)
    user_indices = list(users.index)
    
    for i, user in enumerate(user_indices):
        col = existing_columns[i % len(existing_columns)]  # Use available curves cyclically if fewer
        
        # Start with original curve
        mixed_curve = df[col].values
        
        # Apply Fourier Transform
        mixed_curve = fourier_transform_variation(mixed_curve, 0.1)
        
        # Apply Noise Addition
        mixed_curve = add_noise_to_series(mixed_curve, 0.05)
        
        # Apply Rolling Perturbation
        mixed_curve = rolling_perturbation(pd.Series(mixed_curve), 24, 0.1)
        
        # Add the resulting mixed curve to the DataFrame
        transformed_df[f'{prefix}_User_{user}_Mixed'] = mixed_curve
        
    return transformed_df

# Apply mixed methods to generate new consumption data based on existing curves
residential_consumption = apply_mixed_methods(df, 'Residential', DF_m2_Residential, 'Residential')
industrial_consumption = apply_mixed_methods(df, 'Industrial', DF_m2_Industrial, 'Industrial')
commercial_consumption = apply_mixed_methods(df, 'Commercial', DF_m2_Commercial, 'Commercial')

# Save the results to CSV files
residential_consumption.to_csv('C:/Users/Nik/Documents/GitHub/Thesis/CSV/Consumption/Residential_Consumption_Optimized_From_Switched.csv')
industrial_consumption.to_csv('C:/Users/Nik/Documents/GitHub/Thesis/CSV/Consumption/Industrial_Consumption_Optimized_From_Switched.csv')
commercial_consumption.to_csv('C:/Users/Nik/Documents/GitHub/Thesis/CSV/Consumption/Commercial_Consumption_Optimized_From_Switched.csv')

print("Optimized consumption files created and saved using the new dataset.")


In [5]:
import pandas as pd
import numpy as np
from numba import jit
import os

# Load your CSV file and parse the datetime index
ConsumptionPath = 'C:/Users/Nik/Documents/GitHub/Thesis/CSV/Consumption'
df = pd.read_csv('C:/Users/Nik/Documents/GitHub/Thesis/CSV/Consumption/switched_data_columns.csv', index_col=0, parse_dates=True)

# Starting Tables for every optimization try
WorkingTables_Path = 'C:/Users/Nik/Documents/GitHub/Thesis/CSV/Working Tables'
DF_m2_Residential = pd.read_csv(os.path.join(WorkingTables_Path, 'DF_m2_Residential.csv'), sep=",", on_bad_lines='skip', header=0, index_col=0)
DF_m2_Industrial = pd.read_csv(os.path.join(WorkingTables_Path, 'DF_m2_Industrial.csv'), sep=",", on_bad_lines='skip', header=0, index_col=0)
DF_m2_Commercial = pd.read_csv(os.path.join(WorkingTables_Path, 'DF_m2_Commercial.csv'), sep=",", on_bad_lines='skip', header=0, index_col=0)

# Define Fourier Transform variation function without Numba due to unsupported FFT operations
def fourier_transform_variation(series, noise_level=0.1):
    fft_values = np.fft.fft(series)
    noise = np.random.normal(0, noise_level, size=len(fft_values))
    fft_values = fft_values * (1 + noise)
    transformed_series = np.fft.ifft(fft_values).real
    return transformed_series

# Define function to add noise using Numba
@jit(nopython=True)
def add_noise_to_series(series, noise_level=0.05):
    noise = np.random.normal(0, noise_level, size=len(series))
    noisy_series = series * (1 + noise)
    return noisy_series

# Define rolling perturbation function without Numba as it involves Pandas
def rolling_perturbation(series, window_size=24, perturbation_level=0.1):
    rolled = series.rolling(window=window_size, min_periods=1).mean()
    perturbation = np.random.normal(1, perturbation_level, size=len(series))
    perturbed_series = rolled * perturbation
    return perturbed_series

# Helper function to apply all methods sequentially for each user
def apply_mixed_methods(df, base_column, users, prefix):
    # Assume df already has multiple curves; use a subset of them
    existing_columns = [col for col in df.columns if col.startswith(base_column)]
    
    # Generate a single mixed curve for each user
    curve_list = []
    user_indices = list(users.index)
    
    for i, user in enumerate(user_indices):
        col = existing_columns[i % len(existing_columns)]  # Use available curves cyclically if fewer
        
        # Start with original curve
        mixed_curve = df[col].values
        
        # Apply Fourier Transform
        mixed_curve = fourier_transform_variation(mixed_curve, 0.1)
        
        # Apply Noise Addition
        mixed_curve = add_noise_to_series(mixed_curve, 0.05)
        
        # Apply Rolling Perturbation
        mixed_curve = rolling_perturbation(pd.Series(mixed_curve, index=df.index), 24, 0.1)
        
        # Ensure that mixed_curve is converted to a numpy array after perturbation
        mixed_curve = mixed_curve.to_numpy()
        
        # Store the resulting mixed curve as a DataFrame column
        curve_list.append(pd.Series(mixed_curve, index=df.index, name=f'{prefix}_User_{user}_Mixed'))
        
        # Debugging: Print to verify the result at each step
        print(f'Processed curve for user {user}: mean={np.mean(mixed_curve):.2f}, std={np.std(mixed_curve):.2f}')
    
    # Combine all columns into a single DataFrame at once
    transformed_df = pd.concat(curve_list, axis=1)
    
    return transformed_df

# Apply mixed methods to generate new consumption data based on existing curves
residential_consumption = apply_mixed_methods(df, 'Residential', DF_m2_Residential, 'Residential')
industrial_consumption = apply_mixed_methods(df, 'Industrial', DF_m2_Industrial, 'Industrial')
commercial_consumption = apply_mixed_methods(df, 'Commercial', DF_m2_Commercial, 'Commercial')

# Save the results to CSV files
residential_consumption.to_csv('C:/Users/Nik/Documents/GitHub/Thesis/CSV/Consumption/Residential_Consumption_Optimized_From_Switched_2.csv')
industrial_consumption.to_csv('C:/Users/Nik/Documents/GitHub/Thesis/CSV/Consumption/Industrial_Consumption_Optimized_From_Switched_2.csv')
commercial_consumption.to_csv('C:/Users/Nik/Documents/GitHub/Thesis/CSV/Consumption/Commercial_Consumption_Optimized_From_Switched_2.csv')

print("Optimized consumption files created and saved using the new dataset.")


Processed curve for user w158853557: mean=1.89, std=0.36
Processed curve for user w158853558: mean=1.70, std=0.34
Processed curve for user w158853559: mean=1.76, std=0.36
Processed curve for user w211715359: mean=1.90, std=0.36
Processed curve for user w246792829: mean=2.17, std=0.37
Processed curve for user w276462198: mean=1.71, std=0.36
Processed curve for user w276462425: mean=1.87, std=0.36
Processed curve for user w276612247: mean=1.86, std=0.36
Processed curve for user w276614920: mean=1.82, std=0.36
Processed curve for user w276734031: mean=1.57, std=0.35
Processed curve for user w298690124: mean=1.75, std=0.36
Processed curve for user w298890320: mean=1.58, std=0.34
Processed curve for user w298890321: mean=1.70, std=0.34
Processed curve for user w298890322: mean=1.93, std=0.35
Processed curve for user w298890323: mean=1.42, std=0.33
Processed curve for user w298890324: mean=1.99, std=0.36
Processed curve for user w298890325: mean=1.64, std=0.34
Processed curve for user w29889

KeyboardInterrupt: 