In [4]:
import pandas as pd
import numpy as np

# Define the date range with daily frequency from 2018 to 2025
date_range = pd.date_range(start='2018-01-01', end='2023-12-31', freq='D')
num_days = len(date_range)

# Generate unique random house numbers
np.random.seed(42)  # For reproducibility
house_numbers = np.random.choice(range(10002, 100001), size=10, replace=False)  # 500 unique house numbers

# List of district names
district_names = [
    "Boondheere", "Cabdicasiis", "Deyniile", "Dharkeenle", "Darusalam", 
    "Hodan", "Wadajir", "Warta nabada", "Xmjajab", "Xmweyne", 
    "Yaaqshiid", "Howlwadaag", "Huriwaa", "Kaxda", "Kaaraan", 
    "Shibis", "Shingaani", "Waaberi"
]

# Randomly assign districts to houses
np.random.seed(42)  # For reproducibility
assigned_districts = np.random.choice(district_names, len(house_numbers))

# Create a DataFrame with house numbers and their corresponding districts
house_district_df = pd.DataFrame({
    'guri_num': house_numbers,
    'degmada': assigned_districts
}).sample(frac=1, random_state=42).reset_index(drop=True)

# Function to generate consumption data with adjustments for each month and year
def generate_consumption_data(date_range, house_factor, yearly_monthly_factors):
    consumption_data = np.zeros(len(date_range))
    for i, date in enumerate(date_range):
        base_consumption = np.random.normal(loc=2, scale=1.6)  # Lower base consumption
        year_month_factor = yearly_monthly_factors[(date.year, date.month)]
        seasonal_factor = 1 + 0.3 * np.sin(2 * np.pi * date.timetuple().tm_yday / 365.25)  # Enhanced seasonal variation
        random_noise = np.random.normal(loc=0, scale=0.2)  # Increased random noise
        
        consumption_data[i] = base_consumption * house_factor * year_month_factor * seasonal_factor + random_noise

        # Clip values to ensure they are within the desired range
        consumption_data[i] = np.clip(consumption_data[i], 0.5, 8)
    return consumption_data

# Create an array to store the data
datetime_array = np.tile(date_range, len(house_district_df))
guri_num_array = np.repeat(house_district_df['guri_num'], num_days)
district_array = np.repeat(house_district_df['degmada'], num_days)

# Generate consumption data for all houses
total_consumption_array = np.array([])

for house_number in house_district_df['guri_num']:
    house_factor = np.random.uniform(0.5, 1.6)  # House-specific factor
    
    yearly_monthly_factors = {}
    for year in range(2018, 2026):  # Updated to include 2025
        for month in range(1, 13):
            monthly_factor = np.random.uniform(0.8, 1.2)  # Monthly variability
            yearly_increase = 1 + 0.05 * (year - 2018)  # Yearly increase factor
            yearly_monthly_factors[(year, month)] = monthly_factor * yearly_increase
    
    # Increase consumption for February, March, April, and May
    for year in range(2018, 2026):  # Updated to include 2025
        yearly_monthly_factors[(year, 2)] *= 1.6  # February
        yearly_monthly_factors[(year, 3)] *= 1.8  # March
        yearly_monthly_factors[(year, 4)] *= 1.7  # April
        yearly_monthly_factors[(year, 5)] *= 1.6  # May

    house_consumption = generate_consumption_data(date_range, house_factor, yearly_monthly_factors)
    total_consumption_array = np.append(total_consumption_array, house_consumption)

# Create a DataFrame
df = pd.DataFrame({
    'datetime': datetime_array,
    'guri_num': guri_num_array,
    'degmada': district_array,
    'total_KW': total_consumption_array
})

# Save to a CSV file
df.to_csv('modified_electricity_consumption_data5.csv', index=False)

print("Data generation complete. Saved to 'modified_electricity_consumption_data2.csv'")


Data generation complete. Saved to 'modified_electricity_consumption_data2.csv'


In [5]:
pip install pytorch-forecasting

Collecting pytorch-forecasting
  Using cached pytorch_forecasting-0.10.1-py3-none-any.whl.metadata (11 kB)
Collecting optuna<3.0.0,>=2.3.0 (from pytorch-forecasting)
  Using cached optuna-2.10.1-py3-none-any.whl.metadata (15 kB)
Collecting pytorch-lightning<2.0.0,>=1.2.4 (from pytorch-forecasting)
  Using cached pytorch_lightning-1.9.5-py3-none-any.whl.metadata (23 kB)
Collecting scikit-learn<1.1,>=0.24 (from pytorch-forecasting)
  Using cached scikit-learn-1.0.2.tar.gz (6.7 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'error'
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  Preparing metadata (pyproject.toml) did not run successfully.
  exit code: 1
  
  [66 lines of output]
  Partial import of sklearn during the build process.
  
    `numpy.distutils` is deprecated since NumPy 1.23.0, as a result
    of the deprecation of `distutils` itself. It will be removed for
    Python >= 3.12. For older Python versions it will remain present.
    It is recommended to use `setuptools < 60.0` for those Python versions.
    For more details, see:
      https://numpy.org/devdocs/reference/distutils_status_migration.html
  
  
    from numpy.distutils.command.build_ext import build_ext  # noqa
  INFO: No module named 'numpy.distutils._msvccompiler' in numpy.distutils; trying from distutils
  Traceback (most recent call last):
    File "C:\Users\xusee\anaconda3\Lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 353, in <module>
      main()
    File "C:\Users\xusee\anaconda3\Lib\site-packages\pip\_v

In [None]:
pip install pytorch-lightning