In [None]:
#install tensorflow package if non-exist
!pip install tensorflow

In [1]:
# Import our dependencies
import matplotlib.pyplot as plt
import sklearn as skl
import pandas as pd
import tensorflow as tf
import json
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


# Data preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

# Neural Net modules
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping


In [2]:
# Import our input dataset
weather_df = pd.read_csv('https://raw.githubusercontent.com/Alphaomegainfinity/energy_consumption_vs_weather_data/main/Resources/weather.csv', encoding='latin-1')
price_demand_df = pd.read_csv('https://raw.githubusercontent.com/Alphaomegainfinity/energy_consumption_vs_weather_data/main/Resources/price_and_demand.csv', encoding='latin-1')

In [3]:
# Checking any missing values per column and per rows
def data_checking(weather):
    # Identify NaN or empty values
    missing_values = weather.isna().sum()

    # Identify incorrect value types
    incorrect_types = weather.apply(lambda x: pd.to_numeric(x, errors='coerce').isna().sum())

    # Combine the results into a DataFrame
    cleaning_report = pd.DataFrame({'Missing Values': missing_values, 'Incorrect Types': incorrect_types})

    return cleaning_report

data_checking (weather_df)

Unnamed: 0,Missing Values,Incorrect Types
Location,0,175
Date,0,175
Minimum temperature (°C),0,0
Maximum temperature (°C),1,1
Rainfall (mm),0,0
Evaporation (mm),175,175
Sunshine (hours),175,175
Direction of maximum wind gust,1,175
Speed of maximum wind gust (km/h),1,1
Time of maximum wind gust,1,175


In [4]:
data_checking (price_demand_df)

Unnamed: 0,Missing Values,Incorrect Types
VIC1,0,8351
1/11/2022 0:30,0,8351
4178.18,0,0
8.94,0,0
TRADE,0,8351


In [5]:
# Checking info and shape of dataframe
price_demand_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8351 entries, 0 to 8350
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   VIC1            8351 non-null   object 
 1   1/11/2022 0:30  8351 non-null   object 
 2   4178.18         8351 non-null   float64
 3   8.94            8351 non-null   float64
 4   TRADE           8351 non-null   object 
dtypes: float64(2), object(3)
memory usage: 326.3+ KB


In [6]:
# Adding headers for price_demand_df:
header = ["State", "Date_Time", "Demand", "Price", "Trading_Status"]
price_demand_df.columns = header
price_demand_df

Unnamed: 0,State,Date_Time,Demand,Price,Trading_Status
0,VIC1,1/11/2022 1:00,4086.02,0.14,TRADE
1,VIC1,1/11/2022 1:30,4033.37,0.02,TRADE
2,VIC1,1/11/2022 2:00,3985.64,0.00,TRADE
3,VIC1,1/11/2022 2:30,3885.64,0.02,TRADE
4,VIC1,1/11/2022 3:00,3919.59,1.04,TRADE
...,...,...,...,...,...
8346,VIC1,23/04/2023 22:00,4717.55,84.09,TRADE
8347,VIC1,23/04/2023 22:30,4520.39,81.25,TRADE
8348,VIC1,23/04/2023 23:00,4472.49,93.97,TRADE
8349,VIC1,23/04/2023 23:30,4621.36,83.96,TRADE


In [7]:
# Convert the Date_Time column from object type to datetime type,
price_demand_df['Date_Time'] = pd.to_datetime(price_demand_df['Date_Time'], dayfirst=True)

# Split the Date_Time column into separate Date and Time columns
price_demand_df['Date'] = pd.to_datetime(((price_demand_df['Date_Time']).dt.date), dayfirst=True)
# price_demand_df['Time'] = price_demand_df['Date_Time'].dt.time
price_demand_df['Time'] = pd.to_datetime(price_demand_df['Date_Time'].dt.strftime('%H:%M:%S')).dt.time

# Rearrange all the columns
price_demand = price_demand_df[['State', 'Date', 'Time', 'Demand', 'Price', 'Trading_Status']]
price_demand

Unnamed: 0,State,Date,Time,Demand,Price,Trading_Status
0,VIC1,2022-11-01,01:00:00,4086.02,0.14,TRADE
1,VIC1,2022-11-01,01:30:00,4033.37,0.02,TRADE
2,VIC1,2022-11-01,02:00:00,3985.64,0.00,TRADE
3,VIC1,2022-11-01,02:30:00,3885.64,0.02,TRADE
4,VIC1,2022-11-01,03:00:00,3919.59,1.04,TRADE
...,...,...,...,...,...,...
8346,VIC1,2023-04-23,22:00:00,4717.55,84.09,TRADE
8347,VIC1,2023-04-23,22:30:00,4520.39,81.25,TRADE
8348,VIC1,2023-04-23,23:00:00,4472.49,93.97,TRADE
8349,VIC1,2023-04-23,23:30:00,4621.36,83.96,TRADE


In [8]:
 # rechecking the dataframe information and type

price_demand.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8351 entries, 0 to 8350
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   State           8351 non-null   object        
 1   Date            8351 non-null   datetime64[ns]
 2   Time            8351 non-null   object        
 3   Demand          8351 non-null   float64       
 4   Price           8351 non-null   float64       
 5   Trading_Status  8351 non-null   object        
dtypes: datetime64[ns](1), float64(2), object(3)
memory usage: 391.6+ KB
