
*Objective*

Based on available data on proximate analysis, calorific value, and elemental analysis, develop a machine learning model to predict the elemental analysis of different biomasses.

In [25]:

# General-purpose libraries
import os
import glob
import pickle
import numpy as np
import pandas as pd
from random import sample

# Visualization
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns
from IPython.display import Image
%matplotlib inline
# Scikit-learn - Preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OrdinalEncoder

# Scikit-learn - Models
from sklearn.preprocessing import OneHotEncoder

import keras

In [29]:
from tensorflow.keras import backend as K

def r2_score(y_train, y_pred):
    SS_res = K.sum(K.square(y_train - y_pred))
    SS_tot = K.sum(K.square(y_train - K.mean(y_train)))
    return 1 - SS_res / (SS_tot + K.epsilon())

In [31]:
def global_r2_score(y_true, y_pred):
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - ss_res / (ss_tot + 1e-10)  # add small epsilon to avoid division by zero

In [33]:
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
#Load the file with data from the proximate analysis, calorific value and cluester.
archivo = "Codigos/Datosdebiomasarango.xlsx"
df_Nuevo = pd.read_excel(archivo, engine='openpyxl')

In [40]:
import joblib
# Load saved models
X_scaler = joblib.load('corrida/experiment_170925007/scaler_X_units70_run8.pkl')
y_scaler = joblib.load('corrida/experiment_170925007/scaler_y_units70_run8.pkl') 
encoder = joblib.load('encoder.pkl')

In [41]:
from tensorflow.keras.models import load_model
modelo_cargado = load_model('corrida/experiment_170925007/modelo_170925007.keras', custom_objects={'r2_score_keras': r2_score})


In [42]:
XN = df_Nuevo[['AR_Moisturecontent','AR_Ashcontent','AR_Volatilematter','AR_Fixedcarbon','AR_Netcalorificvalue(LHV)', 'cluster']]

In [47]:
YN = df_Nuevo[['AR_Carbon','AR_Hydrogen','AR_Nitrogen','AR_Sulphur', 'AR_Oxygen']]

In [49]:
# Process numeric columns
XN_numericas = XN[['AR_Moisturecontent', 'AR_Ashcontent', 'AR_Volatilematter', 
                  'AR_Fixedcarbon', 'AR_Netcalorificvalue(LHV)']].astype(float)

# Scale numeric columns
XN_numericas_scaled = X_scaler .transform(XN_numericas)
XN_numericas_scaled_df = pd.DataFrame(XN_numericas_scaled, 
                                     columns=['AR_Moisturecontent', 'AR_Ashcontent', 'AR_Volatilematter', 
                                             'AR_Fixedcarbon', 'AR_Netcalorificvalue(LHV)'], 
                                     dtype=np.float32)


# Use the saved encoder for the cluster column
XN_cluster_encoded = encoder.transform(XN[['cluster']])
XN_cluster_encoded_df = pd.DataFrame(XN_cluster_encoded, 
                                    columns=encoder.get_feature_names_out(['cluster']), 
                                    dtype=np.float32)

# Combine all
XN_numericas_scaled_df.reset_index(drop=True, inplace=True)
XN_cluster_encoded_df.reset_index(drop=True, inplace=True)
XN_final = pd.concat([XN_numericas_scaled_df, XN_cluster_encoded_df], axis=1)

print(f"Shape final: {XN_final.shape}")
print(f"Columnas finales: {list(XN_final.columns)}")
print(f"Columnas del encoder: {list(encoder.get_feature_names_out(['cluster']))}")

Shape final: (75, 9)
Columnas finales: ['AR_Moisturecontent', 'AR_Ashcontent', 'AR_Volatilematter', 'AR_Fixedcarbon', 'AR_Netcalorificvalue(LHV)', 'cluster_0.0', 'cluster_1.0', 'cluster_2.0', 'cluster_3.0']
Columnas del encoder: ['cluster_0.0', 'cluster_1.0', 'cluster_2.0', 'cluster_3.0']


In [53]:
#Elemental analysis prediction
y_predicciones = modelo_cargado.predict( XN_final)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step


In [55]:
#Data descaling
xNuevo_predictN1=y_scaler.inverse_transform(y_predicciones)
xNuevo_predictN2= np.array(xNuevo_predictN1).reshape(-1)
YP=pd.DataFrame(xNuevo_predictN1)

In [57]:
YP.describe()

Unnamed: 0,0,1,2,3,4
count,75.0,75.0,75.0,75.0,75.0
mean,40.794071,5.102915,0.774577,0.169027,34.343811
std,8.183843,0.821752,0.542385,0.14456,5.441303
min,16.675745,2.284978,0.303493,0.057798,15.734706
25%,38.221226,4.938545,0.476823,0.109637,32.820202
50%,43.548668,5.388448,0.653476,0.132878,36.513329
75%,45.480244,5.57601,0.838604,0.176738,37.706001
max,55.027359,6.082721,4.100028,1.182762,39.300362
