In [43]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sqlite3

In [44]:
# Load and reshape the data
conn = sqlite3.connect('energy.db')

df = pd.read_sql_query('SELECT * FROM "Critical minerals (IEA) total demand"', conn)
df = df.melt(id_vars=['Mineral', 'Scenario'], var_name='Year', value_name='Total Demand')

conn.close()

In [45]:
# Select features and target variable
X = df[['Year', 'Mineral', 'Scenario']]
y = df['Total Demand']

In [46]:
# Convert categorical variables into dummy
X = pd.get_dummies(X)

In [47]:
# Split tand train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)

model = RandomForestRegressor()
model.fit(X_train, y_train)

In [48]:
# Make predictions and evaluate
predictions = model.predict(X_test)

mse = mean_squared_error(y_test, predictions)
rounded_mse = round(mse, 2)
print("Mean Squared Error:", rounded_mse)

rmse = mse ** 0.5
rounded_rmse = round(rmse, 2)
print(f'The model is mistaken by {rounded_rmse} kt.')

Mean Squared Error: 1087326.93
The model is mistaken by 1042.75 kt.


In [49]:
minerals = ['Copper', 'Cobalt', 'Lithium', 'Nickel', 'Neodymium']
scenarios = ['Stated policies', 'Announced pledges', 'Zero Emissions']

fig1 = make_subplots(rows=len(minerals), cols=len(scenarios), 
                    subplot_titles=[f"{mineral} - {scenario}" for mineral in minerals for scenario in scenarios])

for i, mineral in enumerate(minerals):
    for j, scenario in enumerate(scenarios):
        # Prepare the input data for the current mineral and scenario
        input_data = pd.DataFrame({'Mineral': [mineral], 'Scenario': [scenario]})
        input_data = pd.concat([input_data]*len(df['Year'].unique()), ignore_index=True)
        input_data['Year'] = df['Year'].unique()
        input_data = pd.get_dummies(input_data)
        
        # Ensure all necessary dummy variables are present
        all_columns = set(X.columns)
        input_columns = set(input_data.columns)
        missing_columns = all_columns - input_columns
        for col in missing_columns:
            input_data[col] = 0

        # Reorder the columns to match the order during training
        input_data = input_data[X.columns]

        # Make predictions
        predictions = model.predict(input_data)

        # Get the real total demands
        real_demands = df[(df['Mineral'] == mineral) & (df['Scenario'] == scenario)]['Total Demand'].values

        # Add bar traces to the subplot
        fig1.add_trace(go.Bar(x=df['Year'].unique(), y=predictions, name='Predicted'), row=i+1, col=j+1)
        fig1.add_trace(go.Bar(x=df['Year'].unique(), y=real_demands, name='Real'), row=i+1, col=j+1)

fig1.update_layout(title="Predicted vs Real Total Demand for Different Minerals and Scenarios",
                  xaxis_title="Year",
                  yaxis_title="Total Demand (kt)",
                  showlegend=False,
                  height=1200, width=1300,
                  template='plotly_dark')

fig1.show()

In [50]:
# Train the model and get errors for different test sizes
test_sizes = [0.1, 0.2, 0.3, 0.4, 0.5]
mses = []
rmses = []
for size in test_sizes:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=size, random_state=100)
    mse, rmse = train_model_get_errors(X_train, y_train, X_test, y_test)
    mses.append(mse)
    rmses.append(rmse)

# Subplots
fig2 = make_subplots(rows=1, cols=2, subplot_titles=("MSE vs. Test Size", "RMSE vs. Test Size"))

# Add traces
fig2.add_trace(go.Scatter(x=test_sizes, y=mses, mode='lines+markers', name='MSE'), row=1, col=1)
fig2.add_trace(go.Scatter(x=test_sizes, y=rmses, mode='lines+markers', name='RMSE'), row=1, col=2)

fig2.update_layout(xaxis_title="Test Size",
                  yaxis_title="Error",
                  xaxis=dict(tickvals=test_sizes, ticktext=[str(size) for size in test_sizes]),
                  yaxis=dict(type='log'),  # log scale 
                  legend=dict(x=0.01, y=0.99),
                  margin=dict(l=20, r=20, t=40, b=20),
                  template='plotly_dark')

fig2.show()

In [51]:

predictions = model.predict(X_test)
 
# Calculate residuals -  the error or the deviation of the model's prediction from the true value
residuals = y_test - predictions

fig3 = go.Figure()

fig3.add_trace(go.Scatter(x=predictions, y=residuals, mode='markers', 
    marker=dict(color='blue'), 
    name='Residuals'))

# Add a horizontal line at y=0
fig3.add_shape(type="line", x0=min(predictions), y0=0, x1=max(predictions), y1=0, 
    line=dict(color="red", width=2, dash="dash"), 
    name="Zero Error Line")

fig3.update_layout(title="Residual Plot",
    xaxis_title="Predicted Values",
    yaxis_title="Residuals",
    showlegend=True,
    template='plotly_dark')

fig3.show()

In [52]:
# Predict for a specific year, mineral, and scenario
selected_year = '2050'
selected_mineral = 'Cobalt'
selected_scenario = 'Announced pledges'

# Prepare the input data
input_data = pd.DataFrame({'Year': [selected_year], 'Mineral': [selected_mineral], 'Scenario': [selected_scenario]})
input_data = pd.get_dummies(input_data)

# Ensure all necessary dummy variables are present
all_columns = set(X.columns)
input_columns = set(input_data.columns)
missing_columns = all_columns - input_columns
for col in missing_columns:
    input_data[col] = 0

# Reorder the columns to match the order during training
input_data = input_data[X.columns]

# Make prediction
prediction = model.predict(input_data)

# Print the predicted total demand
print("\nFor Year:", selected_year)
print("Mineral:", selected_mineral)
print("Scenario:", selected_scenario)
print("Predicted Total Demand:", prediction[0])
real_demand = df[(df['Year'] == selected_year) & 
                 (df['Mineral'] == selected_mineral) & 
                 (df['Scenario'] == selected_scenario)]['Total Demand'].values[0]

print("Real Total Demand:", real_demand)


For Year: 2050
Mineral: Cobalt
Scenario: Announced pledges
Predicted Total Demand: 456.8501339999997
Real Total Demand: 524.779


In [53]:
import plotly.express as px
from jinja2 import Template
import json

figs = [fig1 ,fig2, fig3]  

# Convert each Plotly figure to JSON
fig_jsons = [fig.to_json() for fig in figs]

# Load the Jinja2 template
input_template_path = "html_samples/input.html"
with open(input_template_path) as template_file:
    j2_template = Template(template_file.read())
rendered_html = j2_template.render(fig_jsons=fig_jsons)

# Save HTML
output_html_path = "html_samples/Model.html"
with open(output_html_path, "w", encoding="utf-8") as output_file:
    output_file.write(rendered_html)