In [1]:
#import our dependencies

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Load the dataset
combined_df = pd.read_csv("../Data Source/combined_single_sealevel.csv")
combined_df

Unnamed: 0,Year,Coal Consumption,Natural Gas Consumption (Excluding Supplemental Gaseous Fuels),Petroleum Consumption (Excluding Biofuels),Total Fossil Fuels Consumption,Nuclear Electric Power Consumption,Biomass Energy Consumption,Geothermal Energy Consumption,Hydroelectric Power Consumption,Solar Energy Consumption,Wind Energy Consumption,Total Renewable Energy Consumption,Total Primary Energy Consumption,Adjusted sea level (inches),Precipitation Change,Temperature Change (Celcius)
0,1949,11.980905,5.145142,11.868994,28.988371,0.000000,1.549262,0.000000,0.323365,0.000000,0.000000,1.872627,30.866419,3.511811,0.36,0.000
1,1950,12.347109,5.968371,13.298283,31.614755,0.000000,1.562307,0.000000,0.344218,0.000000,0.000000,1.906525,33.527374,3.598425,0.93,0.000
2,1951,12.552996,7.048518,14.407674,33.987736,0.000000,1.534669,0.000000,0.356131,0.000000,0.000000,1.890800,35.885997,3.972441,1.31,0.000
3,1952,11.306479,7.549621,14.933895,33.778116,0.000000,1.474369,0.000000,0.374325,0.000000,0.000000,1.848694,35.634550,3.870079,-3.60,0.000
4,1953,11.372684,7.906645,15.531959,34.802286,0.000000,1.418601,0.000000,0.374015,0.000000,0.000000,1.792616,36.601753,4.043307,-1.63,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,2019,11.315595,32.263692,36.866381,80.424546,8.451852,5.056000,0.116293,0.982225,0.429747,1.009551,7.593816,96.603433,9.480223,4.88,1.037
71,2020,9.181109,31.640017,32.331134,73.139322,8.251075,4.545130,0.117716,0.973355,0.511315,1.153045,7.300560,88.852392,9.592477,0.44,1.332
72,2021,10.548526,31.711113,35.242737,77.453620,8.130913,4.750550,0.118007,0.858407,0.626863,1.290407,7.644234,93.362899,9.797220,0.54,1.149
73,2022,9.887939,33.347492,35.318630,78.498234,8.061020,4.856665,0.118389,0.869339,0.764580,1.481823,8.090795,94.790665,0.000000,0.00,1.223


In [3]:
#Data Cleaning 
combined_df = combined_df.rename(columns={'Temperature Change (Celcius)': 'Temperature Change (Celsius)'})
combined_df

Unnamed: 0,Year,Coal Consumption,Natural Gas Consumption (Excluding Supplemental Gaseous Fuels),Petroleum Consumption (Excluding Biofuels),Total Fossil Fuels Consumption,Nuclear Electric Power Consumption,Biomass Energy Consumption,Geothermal Energy Consumption,Hydroelectric Power Consumption,Solar Energy Consumption,Wind Energy Consumption,Total Renewable Energy Consumption,Total Primary Energy Consumption,Adjusted sea level (inches),Precipitation Change,Temperature Change (Celsius)
0,1949,11.980905,5.145142,11.868994,28.988371,0.000000,1.549262,0.000000,0.323365,0.000000,0.000000,1.872627,30.866419,3.511811,0.36,0.000
1,1950,12.347109,5.968371,13.298283,31.614755,0.000000,1.562307,0.000000,0.344218,0.000000,0.000000,1.906525,33.527374,3.598425,0.93,0.000
2,1951,12.552996,7.048518,14.407674,33.987736,0.000000,1.534669,0.000000,0.356131,0.000000,0.000000,1.890800,35.885997,3.972441,1.31,0.000
3,1952,11.306479,7.549621,14.933895,33.778116,0.000000,1.474369,0.000000,0.374325,0.000000,0.000000,1.848694,35.634550,3.870079,-3.60,0.000
4,1953,11.372684,7.906645,15.531959,34.802286,0.000000,1.418601,0.000000,0.374015,0.000000,0.000000,1.792616,36.601753,4.043307,-1.63,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,2019,11.315595,32.263692,36.866381,80.424546,8.451852,5.056000,0.116293,0.982225,0.429747,1.009551,7.593816,96.603433,9.480223,4.88,1.037
71,2020,9.181109,31.640017,32.331134,73.139322,8.251075,4.545130,0.117716,0.973355,0.511315,1.153045,7.300560,88.852392,9.592477,0.44,1.332
72,2021,10.548526,31.711113,35.242737,77.453620,8.130913,4.750550,0.118007,0.858407,0.626863,1.290407,7.644234,93.362899,9.797220,0.54,1.149
73,2022,9.887939,33.347492,35.318630,78.498234,8.061020,4.856665,0.118389,0.869339,0.764580,1.481823,8.090795,94.790665,0.000000,0.00,1.223


In [5]:
#Data Preprocessing 

#Target variable (y): Total Primary Energy Consumption
#Feature variables (X): the other columns that will be used to predict the total energy consumption 

y = combined_df["Total Primary Energy Consumption"]
X = combined_df.drop(columns="Total Primary Energy Consumption")

In [6]:
#Split our data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=42)

In [7]:
#Creating our logistic regression model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)
classifier

In [8]:
#train the model using our logistic regression model 
#classifier.fit(X_train, y_train)

## Seasonal Autoregressive Integrated Moving Average Model

In [9]:
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

from sklearn.metrics import mean_squared_error, r2_score
from time import time
import seaborn as sns

In [10]:
# Set 'Year' as index
#combined_df['Year'] = pd.to_datetime(combined_df['Year'], format='%Y')
#combined_df.set_index('Year', inplace=True)
combined_df

Unnamed: 0,Year,Coal Consumption,Natural Gas Consumption (Excluding Supplemental Gaseous Fuels),Petroleum Consumption (Excluding Biofuels),Total Fossil Fuels Consumption,Nuclear Electric Power Consumption,Biomass Energy Consumption,Geothermal Energy Consumption,Hydroelectric Power Consumption,Solar Energy Consumption,Wind Energy Consumption,Total Renewable Energy Consumption,Total Primary Energy Consumption,Adjusted sea level (inches),Precipitation Change,Temperature Change (Celsius)
0,1949,11.980905,5.145142,11.868994,28.988371,0.000000,1.549262,0.000000,0.323365,0.000000,0.000000,1.872627,30.866419,3.511811,0.36,0.000
1,1950,12.347109,5.968371,13.298283,31.614755,0.000000,1.562307,0.000000,0.344218,0.000000,0.000000,1.906525,33.527374,3.598425,0.93,0.000
2,1951,12.552996,7.048518,14.407674,33.987736,0.000000,1.534669,0.000000,0.356131,0.000000,0.000000,1.890800,35.885997,3.972441,1.31,0.000
3,1952,11.306479,7.549621,14.933895,33.778116,0.000000,1.474369,0.000000,0.374325,0.000000,0.000000,1.848694,35.634550,3.870079,-3.60,0.000
4,1953,11.372684,7.906645,15.531959,34.802286,0.000000,1.418601,0.000000,0.374015,0.000000,0.000000,1.792616,36.601753,4.043307,-1.63,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,2019,11.315595,32.263692,36.866381,80.424546,8.451852,5.056000,0.116293,0.982225,0.429747,1.009551,7.593816,96.603433,9.480223,4.88,1.037
71,2020,9.181109,31.640017,32.331134,73.139322,8.251075,4.545130,0.117716,0.973355,0.511315,1.153045,7.300560,88.852392,9.592477,0.44,1.332
72,2021,10.548526,31.711113,35.242737,77.453620,8.130913,4.750550,0.118007,0.858407,0.626863,1.290407,7.644234,93.362899,9.797220,0.54,1.149
73,2022,9.887939,33.347492,35.318630,78.498234,8.061020,4.856665,0.118389,0.869339,0.764580,1.481823,8.090795,94.790665,0.000000,0.00,1.223


In [11]:
# Select relevant features for modeling
selected_features = ['Coal Consumption', 'Natural Gas Consumption (Excluding Supplemental Gaseous Fuels)', 
                     'Petroleum Consumption (Excluding Biofuels)',
                     'Total Fossil Fuels Consumption', 
                     'Nuclear Electric Power Consumption', 'Biomass Energy Consumption', 
                     'Geothermal Energy Consumption', 'Hydroelectric Power Consumption', 
                     'Solar Energy Consumption', 'Wind Energy Consumption', 
                     'Total Renewable Energy Consumption', 'Total Primary Energy Consumption', 
                     'Adjusted sea level (inches)', 'Precipitation Change', 'Temperature Change (Celsius)']

In [12]:
# Train-test split
train_size = int(len(combined_df) * 0.8)
train, test = combined_df[selected_features][:train_size], combined_df[selected_features][train_size:]

# SARIMA model training
sarima_model = SARIMAX(train['Total Primary Energy Consumption'], order=(0, 1, 0), seasonal_order=(0, 1, 0, 12))
sarima_result = sarima_model.fit()

# SARIMA model evaluation
predictions = sarima_result.predict(start=test.index[0], end=test.index[-1])
mse = mean_squared_error(test['Total Primary Energy Consumption'], predictions)

print(f'Mean Squared Error: {mse}')

# SARIMA model forecasting
forecast = sarima_result.forecast(steps=48)  # Forecasting next 48 months

Mean Squared Error: 65.09253910118298


In [13]:
r2 = r2_score(test['Total Primary Energy Consumption'], predictions)
r2

-15.169234994196838

In [14]:
forecast

60      97.199364
61      97.844706
62      99.474650
63     101.936702
64      99.658825
65     100.818543
66     101.049605
67     103.276280
68     103.344036
69     102.478056
70     104.208224
71     101.889840
72     102.442476
73     103.087818
74     104.717762
75     107.179814
76     104.901937
77     106.061655
78     106.292717
79     108.519392
80     108.587148
81     107.721168
82     109.451336
83     107.132952
84     107.685588
85     108.330930
86     109.960874
87     112.422926
88     110.145049
89     111.304767
90     111.535829
91     113.762504
92     113.830260
93     112.964280
94     114.694448
95     112.376064
96     112.928700
97     113.574042
98     115.203986
99     117.666038
100    115.388161
101    116.547879
102    116.778941
103    119.005616
104    119.073372
105    118.207392
106    119.937560
107    117.619176
Name: predicted_mean, dtype: float64

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
data = pd.DataFrame({
    'Year': [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023],
    'Total Renewable Energy Consumption': [6.587023, 6.799011, 6.829465, 7.119713, 7.38348, 7.534561, 7.593816, 7.30056, 7.644234, 8.090795, 8.244574],
    'Total Primary Energy Consumption': [94.253199, 95.334861, 94.484143, 94.092392, 93.901708, 97.404845, 96.603433, 88.852392, 93.362899, 94.790665, 93.685776]
})

# Setting 'Year' as index
data.set_index('Year', inplace=True)

# Split the data into features (X) and target variable (y)
y = data['Total Renewable Energy Consumption']

# SARIMA model parameters
order = (1, 1, 1)  # ARIMA parameters
seasonal_order = (2, 2, 2, 2)  # Seasonal parameters

# Train the SARIMA model
sarima_model = SARIMAX(y, order=order, seasonal_order=seasonal_order)
sarima_results = sarima_model.fit()

# Predict energy consumption for the next 10 years (2 years in this example)
future_years = pd.Series(range(2024, 2034))
predicted_energy_consumption = sarima_results.forecast(steps=len(future_years))

# Print the predicted energy consumption for the next 10 years
print("Predicted Energy Consumption for the Next 10 Years:")
for year, consumption in zip(future_years, predicted_energy_consumption):
    print(f"Year {year}: {consumption} units")


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  warn('Too few observations to estimate starting parameters%s.'


Predicted Energy Consumption for the Next 10 Years:
Year 2024: 7.843600802644488 units
Year 2025: 8.17412775117722 units
Year 2026: 8.148135394043743 units
Year 2027: 8.452599118556432 units
Year 2028: 8.207910218803317 units
Year 2029: 8.553306226170584 units
Year 2030: 8.241513207003043 units
Year 2031: 8.630178743696527 units
Year 2032: 8.29697854913454 units
Year 2033: 8.706786635123741 units


  return get_prediction_index(
  return get_prediction_index(
