In [1]:
import pandas as pd
# 부하 원본 데이터 경로
file_path = '2020_5E_load_cleaned.csv'
data = pd.read_csv(file_path)

In [2]:
# Convert the 'datetime' column to datetime format
data['datetime'] = pd.to_datetime(data['datetime'])

# Extract the day of the week from the 'datetime' column
data['day_of_week'] = data['datetime'].dt.day_name()

# Group the data by 'day_of_week' and apply linear interpolation for each group
grouped_data = data.groupby('day_of_week')

# Fill missing values using linear interpolation for each group
for name, group in grouped_data:
    data.loc[group.index, 'load'] = group['load'].interpolate(method='linear')

In [3]:
# Save the updated dataset to a new CSV file
output_file_path = '2020_5E_load_cleaned_interpolated.csv'
data.to_csv(output_file_path, index=False)

In [4]:
data = pd.read_csv('2020_5E_load_cleaned_interpolated.csv')

In [5]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from scipy.ndimage import gaussian_filter1d
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt

# 데이터 생성
def data_gen(seed,sig):
    np.random.seed(seed)

    time_intervals = [
        (0, 10000, 250, 300),
        (10000, 20000, 300, 400),
        (20000, 30000, 450, 700),
        (30000, 40000, 700, 800),
        (40000, 50000, 800, 600),
        (50000, 60000, 600, 400),
        (60000, 70000, 400, 750),
        (70000, 80000, 750, 450),
        (80000, 86400, 450, 350),
    ]

    time_points = np.arange(0, 86400, 300)
    power_usage = []

    for start, end, min_power, max_power in time_intervals:
        mask = (time_points >= start) & (time_points < end)
        power_usage.extend(np.random.uniform(min_power, max_power, mask.sum()))
    power_usage = np.array(power_usage)
    power_usage_smoothed = gaussian_filter1d(power_usage, sigma=sig)
    return power_usage_smoothed

# Load the CSV file
def load_data(data):
    data = pd.DataFrame(data)
    data = data.reset_index(drop=False)
    data.columns = ['Index', 'Power_Usage']
    return data

# Normalize the data to 288 points (5-minute intervals over 24 hours)
def normalize_data(data, num_points=288):
    new_index = np.linspace(1, data['Index'].max(), num_points)
    interp_func = interp1d(data['Index'], data['Power_Usage'], kind='linear')
    expanded_data = interp_func(new_index)
    return new_index, expanded_data

# Calculate the total power usage for the normalized dataset
def calculate_total_usage(expanded_data):
    return np.sum(expanded_data)

# Generate 5-minute interval power usage data given a total daily usage
def generate_power_usage(normalized_pattern, total_daily_usage):
    return normalized_pattern * total_daily_usage

# Plot the original and normalized data for comparison
def plot_data(original_index, original_data, new_index, expanded_data, predicted_usage=None):
    plt.figure(figsize=(14, 7))
    plt.plot(original_index, original_data, label='Original Data')
    plt.plot(new_index, expanded_data, label='Interpolated Data (288 points)', linestyle='--')
    if predicted_usage is not None:
        plt.plot(new_index, predicted_usage, label=f'Predicted Usage (Total = {np.sum(predicted_usage)})', linestyle='-.')
    plt.xlabel('Index')
    plt.ylabel('Power Usage')
    plt.legend()
    plt.title('Power Usage Data')
    plt.show()

# Main function to process the data and generate usage predictions
def main(smoothed_data, load):
    data = load_data(smoothed_data)
    original_index, original_data = data['Index'], data['Power_Usage']
    new_index, expanded_data = normalize_data(data)
    total_usage = calculate_total_usage(expanded_data)
    normalized_pattern = expanded_data / total_usage
    predicted_usage = generate_power_usage(normalized_pattern, total_daily_usage)
    #plot_data(original_index, original_data, new_index, expanded_data, predicted_usage)

    # Step 1: Fit a Polynomial Model
    X = data['Index'].values.reshape(-1, 1)
    y = data['Power_Usage'].values
    # Degree of the polynomial
    degree = 6
    # Create polynomial features
    poly = PolynomialFeatures(degree)
    X_poly = poly.fit_transform(X)
    # Fit the polynomial model
    model = LinearRegression()
    model.fit(X_poly, y)

    # Generate 288 time points for a day (5-minute intervals)
    # time_points = np.linspace(1, 227, 288).reshape(-1, 1)
    # time_points_poly = poly.transform(time_points)
    # Predict the values
    predicted_values = model.predict(X_poly)
    # Step 2: Add Noise to the Predicted Values
    noise_level = 0.2  # Adjust the noise level as needed
    noise = np.random.normal(0, noise_level, predicted_values.shape)
    noisy_values = predicted_values + noise

    # Step 3: Normalize the Noisy Values
    normalized_noisy_values = noisy_values / noisy_values.sum()
    generated_load_data = normalized_noisy_values * load
    return generated_load_data #predicted_usage

In [12]:
from tqdm import tqdm

data_list = np.empty((0, 1))
total_daily_usage = 1000

for i in tqdm(range(len(data))):
    date, load, day = data.iloc[i]
    #generate_power_usage_pattern(load)
    gen_load = main(data_gen(42,15), load)
    gen_load = pd.DataFrame(gen_load, columns=['Load'])
    #start_time = pd.Timestamp(date) # 시작 날짜와 시간
    #time_intervals = pd.date_range(start=start_time, end=start_time + pd.Timedelta(days=1) - pd.Timedelta(minutes=5), freq='5T')
    #time_df = pd.DataFrame(time_intervals, columns=['DateTime'])
    data_list = np.append(data_list, gen_load)

100%|██████████| 366/366 [00:00<00:00, 784.00it/s]


In [14]:
data_list = pd.DataFrame(data_list)
data_list.to_csv('2020_load_data.csv')

In [None]:
import pandas as pd
from datetime import datetime, timedelta

# CSV 파일을 읽어 들임
df = pd.read_csv('2020_load_data.csv')  # 'your_file.csv'는 CSV 파일의 경로로 변경해야 함

# 시작 시간과 종료 시간 설정
start = datetime(2020, 1, 1)
end = datetime(2020, 12, 31, 23, 55)

# 5분 간격으로 시간 생성
times = pd.date_range(start, end, freq='5T')

# 데이터프레임의 행 수와 시간 범위의 길이를 비교하여 확인
if len(df) > len(times):
    raise ValueError("CSV 파일의 행 수가 시간 범위의 길이보다 많습니다.")
else:
    # 시간을 새로운 열로 추가
    df['DateTime'] = times[:len(df)]

# 변경된 데이터프레임을 새 CSV 파일로 저장
df.to_csv('updated_file.csv', index=False)
