In [46]:
# Dependencies

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Using get dummies to classify states

- Doesn't work as expected

## Renewables share data

- Found new data on the share of renewables in the total energy consumption in Australia. 
- Data in petajoules 1960-2020
- Found that the data was not linear and did not work with basic linear regressions, so tried two different methods for using the polynomial form of the data

In [None]:
renewable_share = pd.read_csv("../data/renewable_share.csv")
renewable_share.rename(columns={'Renewables share': 'Renewable_Share'}, inplace=True)

renewable_share.dtypes


In [None]:
X = renewable_share['Renewable_Share'].values.reshape(-1, 1)
y = renewable_share['Year'].values.reshape(-1, 1)

print("Shape: ", X.shape, y.shape)

In [None]:
# Create the model and fit the model to the data

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

pf = PolynomialFeatures(degree=2)
poly_X = pf.fit_transform(X)

model = LinearRegression()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(poly_X, y)

model.fit(X_train, y_train)
training_score = model.score(X_train, y_train)
testing_score = model.score(X_test, y_test)

print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")

In [None]:
predictions = model.predict(X_test)
model.fit(X_train, y_train)

# # Plot Residuals
plt.scatter(model.predict(X_train), y_train- model.predict(X_train), c="blue", label="Training Data")
plt.scatter(model.predict(X_test), y_test - model.predict(X_test), c="orange", label="Testing Data")
plt.legend()
plt.hlines(y=0, xmin=y_test.min(), xmax=y_test.max())

### RENEWABLE SHARE USING SVM KERNEL POLY

In [None]:
X = renewable_share['Renewable_Share'].values.reshape(-1, 1)
y = renewable_share['Year']

print("Shape: ", X.shape, y.shape)

In [None]:
from sklearn import svm

svm_model = svm.SVR(kernel='poly')
svm_model.fit(X, y)
print(svm_model.predict(X_test))

## Energy consumption in petajoules of coal, gas, oil and renewables 1974 - 2019

- This data returns the most accurate training and testing scores
- Need to work out how to add prediction data for years we want to predict

In [71]:
energy_consumption = pd.read_csv("../data/energy_consumption_fueltype.csv")
energy_consumption.replace(' ','', regex=True, inplace=True)
energy_consumption.head()

Unnamed: 0,Year,Coal,Oil,Gas,Renewables
0,1974,981.1,1327.7,189.2,204.0
1,1975,981.2,1339.7,211.2,206.2
2,1976,1047.7,1410.6,256.2,199.3
3,1977,1070.4,1438.3,283.0,201.2
4,1978,1103.0,1440.9,314.9,199.6


In [72]:
energy_consumption['Coal'] = pd.to_numeric(energy_consumption['Coal'])
energy_consumption['Oil'] = pd.to_numeric(energy_consumption['Oil'])
energy_consumption['Gas'] = pd.to_numeric(energy_consumption['Gas'])

energy_consumption['Non-Renewables'] = energy_consumption.loc[:,['Coal','Oil', 'Gas']].sum(axis=1)
energy_consumption

Unnamed: 0,Year,Coal,Oil,Gas,Renewables,Non-Renewables
0,1974,981.1,1327.7,189.2,204.0,2498.0
1,1975,981.2,1339.7,211.2,206.2,2532.1
2,1976,1047.7,1410.6,256.2,199.3,2714.5
3,1977,1070.4,1438.3,283.0,201.2,2791.7
4,1978,1103.0,1440.9,314.9,199.6,2858.8
5,1979,1168.9,1411.6,362.6,193.6,2943.1
6,1980,1169.4,1358.6,416.0,206.0,2944.0
7,1981,1229.2,1339.0,462.0,210.7,3030.2
8,1982,1194.9,1262.7,466.2,203.5,2923.8
9,1983,1228.3,1306.7,490.0,200.9,3025.0


In [73]:
X = energy_consumption['Year'].values.reshape(-1, 1)
y = energy_consumption['Renewables']

print("Shape: ", X.shape, y.shape)

Shape:  (46, 1) (46,)


In [74]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

model.fit(X, y)
print(model)

predictions = model.predict(X)

renewable_predicitions = pd.DataFrame({"Predicted": predictions, "Actual": y, "Error": predictions - y})[["Predicted", "Actual", "Error"]]
renewable_predicitions.to_csv('../export_data/renewable_machine_learning.csv')

LinearRegression()


In [75]:
years_to_predict = [[2025], [2030], [2035], [2040], [2045], [2050], [2055], [2060], [2065], [2070]]

In [77]:
predictions = model.predict(years_to_predict)

future_renewable_predicitions = pd.DataFrame({"Year": years_to_predict, "Predicted": predictions})
future_renewable_predicitions['Year'] = future_renewable_predicitions['Year'].str[0]

future_renewable_predicitions.to_csv('../export_data/future_renewable_predictions.csv')

In [None]:
future_renewable_predicitions

In [78]:
#Non-Renewable Machine Learning
X = energy_consumption['Year'].values.reshape(-1, 1)
y = energy_consumption['Non-Renewables']

print("Shape: ", X.shape, y.shape)

Shape:  (46, 1) (46,)


In [79]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

model.fit(X, y)
print(model)

predictions = model.predict(X)

non_renewable_predicitions = pd.DataFrame({"Predicted": predictions, "Actual": y, "Error": predictions - y})[["Predicted", "Actual", "Error"]]
non_renewable_predicitions.to_csv('../export_data/non_renewable_machine_learning.csv')

LinearRegression()


In [81]:
predictions = model.predict(years_to_predict)

future_non_renewable_predicitions = pd.DataFrame({"Year": years_to_predict, "Predicted": predictions})
future_non_renewable_predicitions['Year'] = future_non_renewable_predicitions['Year'].str[0]

future_non_renewable_predicitions.to_csv('../export_data/future_non_renewable_predictions.csv')
future_non_renewable_predicitions

Unnamed: 0,Year,Predicted
0,2025,6652.224292
1,2030,7061.052353
2,2035,7469.880413
3,2040,7878.708474
4,2045,8287.536534
5,2050,8696.364595
6,2055,9105.192655
7,2060,9514.020715
8,2065,9922.848776
9,2070,10331.676836


In [82]:
energy_consumption_limited = energy_consumption.loc[energy_consumption['Year'] >= 2006]
energy_consumption_limited

Unnamed: 0,Year,Coal,Oil,Gas,Renewables,Non-Renewables
32,2006,2316.6,1988.3,1131.9,288.3,5436.8
33,2007,2300.2,1981.8,1168.7,286.4,5450.7
34,2008,2351.0,2000.3,1252.9,239.9,5604.2
35,2009,2224.8,2057.8,1257.8,282.6,5540.4
36,2010,2123.9,2211.6,1274.2,294.6,5609.7
37,2011,2074.7,2183.8,1338.5,293.8,5597.0
38,2012,1930.9,2276.2,1376.5,333.5,5583.6
39,2013,1845.8,2293.9,1407.2,344.2,5546.9
40,2014,1907.2,2233.8,1420.0,347.7,5561.0
41,2015,1954.9,2276.7,1464.4,360.2,5696.0


In [83]:
#Recent Coal usage machine learning
X = energy_consumption_limited['Year'].values.reshape(-1, 1)
y = energy_consumption_limited['Coal']

print("Shape: ", X.shape, y.shape)

Shape:  (14, 1) (14,)


In [84]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

model.fit(X, y)

predictions = model.predict(X)
limited_coal_predicitions = pd.DataFrame({"Predicted": predictions, "Actual": y, "Error": predictions - y})

limited_coal_predicitions['Year'] = X
limited_coal_predicitions.to_csv('../export_data/limited_coal_predictions.csv')
limited_coal_predicitions


Unnamed: 0,Predicted,Actual,Error,Year
32,2328.88,2316.6,12.28,2006
33,2281.75011,2300.2,-18.44989,2007
34,2234.62022,2351.0,-116.37978,2008
35,2187.49033,2224.8,-37.30967,2009
36,2140.36044,2123.9,16.46044,2010
37,2093.230549,2074.7,18.530549,2011
38,2046.100659,1930.9,115.200659,2012
39,1998.970769,1845.8,153.170769,2013
40,1951.840879,1907.2,44.640879,2014
41,1904.710989,1954.9,-50.189011,2015


In [85]:
predictions = model.predict(years_to_predict)

future_coal_predicitions = pd.DataFrame({"Year": years_to_predict, "Predicted": predictions})
future_coal_predicitions['Year'] = future_coal_predicitions['Year'].str[0]

future_coal_predicitions.to_csv('../export_data/coal_predictions.csv')
future_coal_predicitions

Unnamed: 0,Year,Predicted
0,2025,1433.412088
1,2030,1197.762637
2,2035,962.113187
3,2040,726.463736
4,2045,490.814286
5,2050,255.164835
6,2055,19.515385
7,2060,-216.134066
8,2065,-451.783516
9,2070,-687.432967


In [86]:
#Recent Coal usage machine learning
X = energy_consumption_limited['Year'].values.reshape(-1, 1)
y = energy_consumption_limited['Oil']

print("Shape: ", X.shape, y.shape)

Shape:  (14, 1) (14,)


In [87]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

model.fit(X, y)

predictions = model.predict(X)
limited_oil_predicitions = pd.DataFrame({"Predicted": predictions, "Actual": y, "Error": predictions - y})

limited_oil_predicitions['Year'] = X
limited_oil_predicitions.to_csv('../export_data/limited_oil_predictions.csv')
limited_oil_predicitions

Unnamed: 0,Predicted,Actual,Error,Year
32,2006.828571,1988.3,18.528571,2006
33,2037.295604,1981.8,55.495604,2007
34,2067.762637,2000.3,67.462637,2008
35,2098.22967,2057.8,40.42967,2009
36,2128.696703,2211.6,-82.903297,2010
37,2159.163736,2183.8,-24.636264,2011
38,2189.630769,2276.2,-86.569231,2012
39,2220.097802,2293.9,-73.802198,2013
40,2250.564835,2233.8,16.764835,2014
41,2281.031868,2276.7,4.331868,2015


In [88]:
predictions = model.predict(years_to_predict)

future_oil_predicitions = pd.DataFrame({"Year": years_to_predict, "Predicted": predictions})
future_oil_predicitions['Year'] = future_oil_predicitions['Year'].str[0]

future_oil_predicitions.to_csv('../export_data/oil_predictions.csv')
future_oil_predicitions

Unnamed: 0,Year,Predicted
0,2025,2585.702198
1,2030,2738.037363
2,2035,2890.372527
3,2040,3042.707692
4,2045,3195.042857
5,2050,3347.378022
6,2055,3499.713187
7,2060,3652.048352
8,2065,3804.383516
9,2070,3956.718681
