<a href="https://colab.research.google.com/github/G36maid/electricity_monitor/blob/main/COIL_data_generating.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import random
import pandas as pd

# 台北市の緯度と経度の範囲
latitude_range = (24.960, 25.200)
longitude_range = (121.440, 121.620)

# ランダムなポイントを生成する関数
def generate_random_coordinates(n):
    coordinates = []
    for _ in range(n):
        latitude = round(random.uniform(*latitude_range), 6)
        longitude = round(random.uniform(*longitude_range), 6)
        coordinates.append((latitude, longitude))
    return coordinates

# 30件分のランダムポイントを生成
random_coordinates = generate_random_coordinates(30)

# 緯度経度データをデータフレームに変換
coordinates_df = pd.DataFrame(random_coordinates, columns=["Latitude", "Longitude"])

# データフレームの確認
print(coordinates_df.head())

    Latitude   Longitude
0  25.171506  121.572323
1  25.011450  121.586835
2  25.107502  121.515934
3  25.007797  121.564123
4  24.993073  121.535009


In [17]:
import time
import random
import datetime
import pandas as pd

# Set base consumption levels and peak times for each category
base_consumption = {
    "residential": 2,   # residential: base consumption (kW)
    "commercial": 10,   # commercial; base consumption (kW)
    "industrial": 50    # industrial: base consumption (kW)
}

peak_hours = {
    "residential": [18, 19, 20],  # residential:evening peak hours
    "commercial": [9, 10, 11],    # commercial: morning peak hours
    "industrial": [14, 15, 16]    # industrial: afternoon peak hours
}

# Generate data by residential, commercial and industrial
def generate_hourly_consumption(category, hour):
    """時間ごとの電力消費データを生成する関数"""
    base = base_consumption[category]
    peak_factor = 3 if hour in peak_hours[category] else 1  # Peak hours have higher consumption
    variation = random.uniform(0.8, 1.2)  # add random variation
    return round(base * peak_factor * variation, 2)

# generate real-time power consumption data
def simulate_realtime_consumption(interval=1):
    """simulate real-time power consumption data"""
    try:
        while True:
            current_time = datetime.datetime.now()
            hour = current_time.hour

            # Generate consumption data for each category
            data = {
                "Time": current_time,
                "Residential": generate_hourly_consumption("residential", hour),
                "Commercial": generate_hourly_consumption("commercial", hour),
                "Industrial": generate_hourly_consumption("industrial", hour)
            }

            # Display in DataFrame
            df = pd.DataFrame([data])
            print(df)

            # インターバル
            time.sleep(interval)
    except KeyboardInterrupt:
        print("リアルタイムデータ生成を終了しました。")

# 1秒ごとにデータを生成するシミュレーションを開始
simulate_realtime_consumption(interval=1)


                        Time  Residential  Commercial  Industrial
0 2024-08-14 09:10:55.178602         1.86       27.63       55.36
                        Time  Residential  Commercial  Industrial
0 2024-08-14 09:10:56.183524          1.7       24.58       51.25
                        Time  Residential  Commercial  Industrial
0 2024-08-14 09:10:57.188113         2.29        32.1       55.83
                        Time  Residential  Commercial  Industrial
0 2024-08-14 09:10:58.192279         1.94       29.98       46.16
                        Time  Residential  Commercial  Industrial
0 2024-08-14 09:10:59.196626         2.32       30.39       45.18
                        Time  Residential  Commercial  Industrial
0 2024-08-14 09:11:00.200849         2.22        30.7        52.9
                        Time  Residential  Commercial  Industrial
0 2024-08-14 09:11:01.206749         1.92       30.25       48.55
                        Time  Residential  Commercial  Industrial
0 2024-08-

In [20]:
import random
import pandas as pd
import datetime

# カテゴリごとの基本消費とピーク時間帯を設定
base_consumption = {
    "residential": 2,   # 住宅: 基本消費 (kW)
    "commercial": 10,   # 商業: 基本消費 (kW)
    "industrial": 50    # 産業: 基本消費 (kW)
}

peak_hours = {
    "residential": [18, 19, 20],  # 住宅: 夕方のピーク時間
    "commercial": [9, 10, 11],    # 商業: 午前中のピーク時間
    "industrial": [14, 15, 16]    # 産業: 午後のピーク時間
}

# 平日と休日の消費の違いを反映
weekend_factor = {
    "residential": 1.2,  # 住宅: 休日は消費が増加
    "commercial": 0.7,   # 商業: 休日は消費が減少
    "industrial": 0.5    # 産業: 休日は消費が大幅に減少
}

# 30棟の建物を住宅、商業、産業にランダムに割り当てる
buildings = []
for i in range(1, 31):
    category = random.choice(["residential", "commercial", "industrial"])
    buildings.append({"building_id": f"Building_{i:02d}", "category": category})

# 時間ごとの電力消費データを生成する関数
def generate_hourly_consumption(category, hour, is_weekend):
    base = base_consumption[category]
    peak_factor = 3 if hour in peak_hours[category] else 1  # ピーク時間の消費増加
    variation = random.uniform(0.8, 1.2)  # ランダムな変動を追加
    weekend_multiplier = weekend_factor[category] if is_weekend else 1
    return round(base * peak_factor * variation * weekend_multiplier, 2)

# 時系列データを生成する関数
def generate_timeseries_data(start_date, end_date):
    date_range = pd.date_range(start=start_date, end=end_date, freq='H')
    data = []

    for date_time in date_range:
        hour = date_time.hour
        is_weekend = date_time.weekday() >= 5  # 5(土曜日)以上なら週末
        weekday_or_weekend = "Weekend" if is_weekend else "Weekday"

        for building in buildings:
            consumption = generate_hourly_consumption(building['category'], hour, is_weekend)
            data.append({
                "Datetime": date_time,
                "Building_ID": building["building_id"],
                "Category": building["category"],
                "Weekday_Weekend": weekday_or_weekend,
                "Consumption_kW": consumption
            })

    return pd.DataFrame(data)

# 1週間分のデータを生成する（リアルタイムではなくバッチ生成）
start_date = "2024-08-01"
end_date = "2024-08-07"
consumption_df = generate_timeseries_data(start_date, end_date)

# データフレームの先頭を表示
print(consumption_df.head())

# ピーク需要分析用のデータ集計
peak_df = consumption_df.groupby(["Building_ID", "Category", "Weekday_Weekend", df["Datetime"].dt.hour]).agg({
    "Consumption_kW": ["mean", "max"]
}).reset_index()

# ピーク時の消費が高い順に表示
peak_df.columns = ["Building_ID", "Category", "Weekday_Weekend", "Hour", "Mean_Consumption_kW", "Max_Consumption_kW"]
print(peak_df.sort_values(by="Max_Consumption_kW", ascending=False).head(10))

# 必要に応じてCSVファイルに保存
consumption_df.to_csv("electricity_consumption_data.csv", index=False)


    Datetime  Building_ID    Category Weekday_Weekend  Consumption_kW
0 2024-08-01  Building_01  commercial         Weekday            8.91
1 2024-08-01  Building_02  industrial         Weekday           59.58
2 2024-08-01  Building_03  commercial         Weekday           10.29
3 2024-08-01  Building_04  industrial         Weekday           56.98
4 2024-08-01  Building_05  commercial         Weekday            8.36
      Building_ID    Category Weekday_Weekend  Hour  Mean_Consumption_kW  \
1358  Building_29  industrial         Weekday    14             144.6900   
736   Building_16  industrial         Weekday    16             153.5250   
494   Building_11  industrial         Weekday    14             163.5175   
975   Building_21  industrial         Weekday    15             167.6150   
1360  Building_29  industrial         Weekday    16             149.3550   
1070  Building_23  industrial         Weekday    14             161.2225   
160   Building_04  industrial         Weekday   

In [22]:
# Building IDを生成して緯度経度データに追加
coordinates_df["Building_ID"] = [f"Building_{i+1}" for i in range(30)]

# 緯度経度データと電力消費データを紐づける
combined_consumption_df = pd.merge(coordinates_df, consumption_df, on="Building_ID")

# 結果の確認
print(combined_consumption_df.head())

combined_consumption_df.to_csv("electricity_consumption_data_with_location.csv", index=False)

    Latitude   Longitude  Building_ID            Datetime     Category  \
0  25.158234  121.502811  Building_10 2024-08-01 00:00:00  residential   
1  25.158234  121.502811  Building_10 2024-08-01 01:00:00  residential   
2  25.158234  121.502811  Building_10 2024-08-01 02:00:00  residential   
3  25.158234  121.502811  Building_10 2024-08-01 03:00:00  residential   
4  25.158234  121.502811  Building_10 2024-08-01 04:00:00  residential   

  Weekday_Weekend  Consumption_kW  
0         Weekday            1.86  
1         Weekday            1.63  
2         Weekday            2.28  
3         Weekday            2.20  
4         Weekday            1.76  


In [24]:
import random
import pandas as pd
import datetime

# エネルギー生成カテゴリとその基本出力レベル (MW)
generation_sources = {
    "solar": {"base_output": 10, "variability": 0.2},  # 太陽光: 基本出力と変動率
    "wind": {"base_output": 20, "variability": 0.3},   # 風力: 基本出力と変動率
    "hydropower": {"base_output": 15, "variability": 0.1}, # 水力: 基本出力と変動率
    "fossil_fuel": {"base_output": 100, "variability": 0.05}, # 化石燃料: 基本出力と変動率
    "nuclear": {"base_output": 80, "variability": 0.02}, # 原子力: 基本出力と変動率
    "rooftop_solar": {"base_output": 5, "variability": 0.3}, # 屋上ソーラー: 基本出力と変動率
    "microgrid": {"base_output": 2, "variability": 0.4}, # マイクログリッド: 基本出力と変動率
    "community_energy": {"base_output": 3, "variability": 0.25} # コミュニティエネルギー: 基本出力と変動率
}

# 時間ごとの発電量を生成する関数
def generate_hourly_generation(source, hour, is_sunny, is_windy):
    base_output = generation_sources[source]["base_output"]
    variability = generation_sources[source]["variability"]

    # 太陽光発電は昼間のみに発電し、曇りで変動
    if source == "solar" or source == "rooftop_solar":
        if 6 <= hour <= 18:
            base_output *= 1 if is_sunny else 0.5  # 晴天で増加、曇りで減少
        else:
            return 0  # 夜間は発電しない

    # 風力発電は風が強いと出力が増加
    if source == "wind":
        base_output *= 1 if is_windy else 0.7  # 風が強ければ増加

    # 変動率を適用して発電量を決定
    variation = random.uniform(1 - variability, 1 + variability)
    return round(base_output * variation, 2)

# 時系列データを生成する関数
def generate_generation_timeseries(start_date, end_date):
    date_range = pd.date_range(start=start_date, end=end_date, freq='H')
    data = []

    for date_time in date_range:
        hour = date_time.hour
        is_sunny = random.choice([True, False])  # ランダムな晴天/曇り
        is_windy = random.choice([True, False])  # ランダムな風の強さ

        for source in generation_sources.keys():
            generation = generate_hourly_generation(source, hour, is_sunny, is_windy)
            data.append({
                "Datetime": date_time,
                "Source": source,
                "Generation_MW": generation
            })

    return pd.DataFrame(data)

# 1週間分のデータを生成
start_date = "2024-08-01"
end_date = "2024-08-07"
generation_df = generate_generation_timeseries(start_date, end_date)

# データフレームの先頭を表示
print(generation_df.head())

# 必要に応じてCSVファイルに保存
generation_df.to_csv("power_generation_data.csv", index=False)


    Datetime       Source  Generation_MW
0 2024-08-01        solar           0.00
1 2024-08-01         wind          15.18
2 2024-08-01   hydropower          14.28
3 2024-08-01  fossil_fuel          95.31
4 2024-08-01      nuclear          78.98


In [26]:
import random
import pandas as pd
import datetime

# 夏季における気温、湿度、太陽放射、風速の基本範囲
weather_params = {
    "temperature": {"mean": 30, "stddev": 5, "min": 25, "max": 40},  # 気温 (摂氏)
    "humidity": {"mean": 60, "stddev": 10, "min": 40, "max": 90},    # 湿度 (%)
    "solar_radiation": {"mean": 800, "stddev": 200, "min": 200, "max": 1000},  # 太陽放射 (W/m^2)
    "wind_speed": {"mean": 3, "stddev": 1.5, "min": 0, "max": 10}    # 風速 (m/s)
}

# 時間ごとの天気データを生成する関数
def generate_hourly_weather_data(date_time):
    temperature = round(random.gauss(weather_params["temperature"]["mean"], weather_params["temperature"]["stddev"]), 1)
    temperature = max(min(temperature, weather_params["temperature"]["max"]), weather_params["temperature"]["min"])

    humidity = round(random.gauss(weather_params["humidity"]["mean"], weather_params["humidity"]["stddev"]), 1)
    humidity = max(min(humidity, weather_params["humidity"]["max"]), weather_params["humidity"]["min"])

    # 太陽放射は日中のみ大きく、夜間はゼロに
    if 6 <= date_time.hour <= 18:
        solar_radiation = round(random.gauss(weather_params["solar_radiation"]["mean"], weather_params["solar_radiation"]["stddev"]), 1)
    else:
        solar_radiation = 0
    solar_radiation = max(min(solar_radiation, weather_params["solar_radiation"]["max"]), weather_params["solar_radiation"]["min"])

    wind_speed = round(random.gauss(weather_params["wind_speed"]["mean"], weather_params["wind_speed"]["stddev"]), 1)
    wind_speed = max(min(wind_speed, weather_params["wind_speed"]["max"]), weather_params["wind_speed"]["min"])

    return {
        "Datetime": date_time,
        "Temperature_C": temperature,
        "Humidity_%": humidity,
        "Solar_Radiation_W/m^2": solar_radiation,
        "Wind_Speed_m/s": wind_speed
    }

# 1週間分の天気データを生成する関数
def generate_weather_timeseries(start_date, end_date):
    date_range = pd.date_range(start=start_date, end=end_date, freq='H')
    data = []

    for date_time in date_range:
        weather_data = generate_hourly_weather_data(date_time)
        data.append(weather_data)

    return pd.DataFrame(data)

# 1週間分のデータを生成
start_date = "2024-08-01"
end_date = "2024-08-07"
weather_df = generate_weather_timeseries(start_date, end_date)

# データフレームの先頭を表示
print(weather_df.head())

# 必要に応じてCSVファイルに保存
weather_df.to_csv("weather_data_summer.csv", index=False)


             Datetime  Temperature_C  Humidity_%  Solar_Radiation_W/m^2  \
0 2024-08-01 00:00:00           25.0        64.0                  200.0   
1 2024-08-01 01:00:00           40.0        65.6                  200.0   
2 2024-08-01 02:00:00           25.0        56.1                  200.0   
3 2024-08-01 03:00:00           25.0        48.1                  200.0   
4 2024-08-01 04:00:00           27.0        82.1                  200.0   

   Wind_Speed_m/s  
0             2.4  
1             4.2  
2             2.8  
3             1.0  
4             5.1  
