# Loading the Datasets

After module imports, data is loaded into Pandas DataFrames for analysis and modelling.

In [3]:
# Module Imports
import numpy as np
import pandas as pd
import seaborn as sns
import sys

print("Numpy versions:", np.__version__)
print("Pandas version:", pd.__version__)
print("Seaborn version:", sns.__version__)
print("Python version:", sys.version)

Numpy versions: 1.26.2
Pandas version: 2.1.3
Seaborn version: 0.13.0
Python version: 3.9.10 (tags/v3.9.10:f2f3f53, Jan 17 2022, 15:14:21) [MSC v.1929 64 bit (AMD64)]


In [11]:
# Load Datasets into DataFrames
root = r'C:/Users/paulm/OneDrive/Cloudforest Technologies/M. Projects/crimson-hornbill/data/1_raw/'

client_df = pd.read_csv(root+'client.csv')
elec_price_df = pd.read_csv(root+'electricity_prices.csv')
gas_price_df = pd.read_csv(root+'gas_prices.csv')
historic_weather_df = pd.read_csv(root+'historical_weather.csv')
forecast_weather_df = pd.read_csv(root+'forecast_weather.csv')

# Data Munging

Before exploring or modelling the data, it is important to fix any errors and ensure columns are presented in the correct datatype. 

In [5]:
# Fixing Datatypes - client.csv

print(client_df.head(2))

# Perform datetime conversion
client_df['date'] = pd.to_datetime(client_df['date'])

# Confirm final datatypes
client_cols = client_df.columns
for col in client_cols:

    print(col, ":", client_df[col].dtype)

   product_type  county  eic_count  installed_capacity  is_business  \
0             1       0        108              952.89            0   
1             2       0         17              166.40            0   

         date  data_block_id  
0  2021-09-01              2  
1  2021-09-01              2  
product_type : int64
county : int64
eic_count : int64
installed_capacity : float64
is_business : int64
date : datetime64[ns]
data_block_id : int64


In [6]:
# Fixing Datatypes - electricity_prices

print(elec_price_df.head(2))

# Perform datetime conversion
elec_price_df['forecast_date'] = pd.to_datetime(elec_price_df['forecast_date'])
elec_price_df['origin_date'] = pd.to_datetime(elec_price_df['origin_date'])

# Confirm final datatypes
elec_price_cols = elec_price_df.columns
for col in elec_price_cols:

    print(col, ":", elec_price_df[col].dtype)

         forecast_date  euros_per_mwh          origin_date  data_block_id
0  2021-09-01 00:00:00          92.51  2021-08-31 00:00:00              1
1  2021-09-01 01:00:00          88.90  2021-08-31 01:00:00              1
forecast_date : datetime64[ns]
euros_per_mwh : float64
origin_date : datetime64[ns]
data_block_id : int64


In [7]:
# Fixing Datatypes - gas_prices

print(gas_price_df.head(2))

# Perform datetime conversion
gas_price_df['forecast_date'] = pd.to_datetime(gas_price_df['forecast_date'])
gas_price_df['origin_date'] = pd.to_datetime(gas_price_df['origin_date'])

# Confirm final datatypes
gas_price_cols = gas_price_df.columns
for col in gas_price_cols:

    print(col, ":", gas_price_df[col].dtype)

  forecast_date  lowest_price_per_mwh  highest_price_per_mwh origin_date  \
0    2021-09-01                 45.23                  46.32  2021-08-31   
1    2021-09-02                 45.62                  46.29  2021-09-01   

   data_block_id  
0              1  
1              2  
forecast_date : datetime64[ns]
lowest_price_per_mwh : float64
highest_price_per_mwh : float64
origin_date : datetime64[ns]
data_block_id : int64


In [10]:
# Fixing Datatypes - historic_weather

print(historic_weather_df.head(2))

# Perform datetime conversion
historic_weather_df['datetime'] = pd.to_datetime(historic_weather_df['datetime'])   # This is UTC

# Confirm final datatypes
historic_weather_cols = historic_weather_df.columns
for col in historic_weather_cols:

    print(col, ":", historic_weather_df[col].dtype)

              datetime  temperature  dewpoint  rain  snowfall  \
0  2021-09-01 00:00:00         14.4      12.0   0.0       0.0   
1  2021-09-01 00:00:00         14.0      12.0   0.0       0.0   

   surface_pressure  cloudcover_total  cloudcover_low  cloudcover_mid  \
0            1015.8                 4               4               0   
1            1010.6                 7               8               0   

   cloudcover_high  windspeed_10m  winddirection_10m  shortwave_radiation  \
0                0       6.694444                  3                  0.0   
1                0       4.944444                353                  0.0   

   direct_solar_radiation  diffuse_radiation  latitude  longitude  \
0                     0.0                0.0      57.6       21.7   
1                     0.0                0.0      57.6       22.2   

   data_block_id  
0              1  
1              1  
datetime : datetime64[ns]
temperature : float64
dewpoint : float64
rain : float64
snowf

In [14]:
# Fixing Datatypes - forecast_weather

print(forecast_weather_df.head(2))

# Perform datetime conversion
forecast_weather_df['origin_datetime'] = pd.to_datetime(forecast_weather_df['origin_datetime'])         # This is UTC
forecast_weather_df['forecast_datetime'] = pd.to_datetime(forecast_weather_df['forecast_datetime'])     # This is UTC

# Confirm final datatypes
forecast_weather_cols = forecast_weather_df.columns
for col in forecast_weather_cols:

    print(col, ":", forecast_weather_df[col].dtype)

   latitude  longitude            origin_datetime  hours_ahead  temperature  \
0      57.6       21.7  2021-09-01 00:00:00+00:00            1    15.655786   
1      57.6       22.2  2021-09-01 00:00:00+00:00            1    13.003931   

    dewpoint  cloudcover_high  cloudcover_low  cloudcover_mid  \
0  11.553613         0.904816        0.019714             0.0   
1  10.689844         0.886322        0.004456             0.0   

   cloudcover_total  10_metre_u_wind_component  10_metre_v_wind_component  \
0          0.905899                  -0.411328                  -9.106137   
1          0.886658                   0.206347                  -5.355405   

   data_block_id          forecast_datetime  direct_solar_radiation  \
0              1  2021-09-01 01:00:00+00:00                     0.0   
1              1  2021-09-01 01:00:00+00:00                     0.0   

   surface_solar_radiation_downwards  snowfall  total_precipitation  
0                                0.0       0.0    

# Feature Engineering

# Exploratory Data Analysis