In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_path = r"C:\Users\29200\Downloads\Dataset\oversampled_resampled.xlsx"
df = pd.read_excel(file_path)

# Assuming your X and y are prepared based on your specific columns
X = df[['Conc.', 'Cell Viability 24h', 'Cell Viability 48h', 'Cell Viability 72h', 
        'Cell Viability 96h', 'ALP 7d','Surface area m2/g', 'Pore volume cm3/g', 'Pore size nm']]

y = df[['SiO2', 'B2O3', 'CaO', 'Na2O', 'P2O5', 'Ce', 'Ce2O3', 'CeO2']]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.28, random_state=42)

# Choose a multioutput regression model (Example: MultiOutputRegressor with ExtraTreesRegressor)
model = MultiOutputRegressor(ExtraTreesRegressor(n_estimators=100, random_state=42))

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate RMSE and R² score for each output variable
overall_rmse = 0
for i, column in enumerate(y.columns):
    rmse = np.sqrt(mean_squared_error(y_test.iloc[:, i], y_pred[:, i]))
    r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
    overall_rmse += rmse
    print(f"{column}: RMSE = {rmse:.4f}, R² = {r2:.4f}")

# Calculate overall RMSE
overall_rmse /= y.shape[1]

# Overall model score
overall_r2 = r2_score(y_test, y_pred)
print(f"Overall Model RMSE: {overall_rmse:.4f}")
print(f"Overall Model R² Score: {overall_r2:.4f}")

# Evaluate the model using cross-validation
cv_scores = cross_val_score(model, X, y, cv=5, scoring='r2')
print(f"Cross-validated R² scores: {cv_scores}")
print(f"Mean R² Score across all folds: {np.mean(cv_scores):.4f}")

# Predicting new targets based on new input features
new_input_features = pd.DataFrame({
    'Conc.': [0.8],
    'Cell Viability 24h': [93],
    'Cell Viability 48h': [88],
    'Cell Viability 72h': [86],
    'Cell Viability 96h': [82],
   
    'ALP 7d': [1.2],
    'Surface area m2/g': [225],
    'Pore volume cm3/g': [0.3],
    'Pore size nm': [2]
})

new_predictions = model.predict(new_input_features)

# Creating a DataFrame for new predictions
new_predictions_df = pd.DataFrame(new_predictions, columns=['SiO2', 'B2O3', 'CaO', 'Na2O', 'P2O5', 'Ce', 'Ce2O3', 'CeO2'])
print("New Predictions:")
print(new_predictions_df)


SiO2: RMSE = 1.6067, R² = 0.9875
B2O3: RMSE = 0.6927, R² = 0.9528
CaO: RMSE = 0.3681, R² = 0.9979
Na2O: RMSE = 1.6066, R² = 0.9787
P2O5: RMSE = 0.0766, R² = 0.9993
Ce: RMSE = 0.0000, R² = 1.0000
Ce2O3: RMSE = 0.2305, R² = 0.9871
CeO2: RMSE = 0.8750, R² = 0.8616
Overall Model RMSE: 0.6820
Overall Model R² Score: 0.9706
Cross-validated R² scores: [0.79755416 0.71353862 0.85720409 0.83798635 0.71426812]
Mean R² Score across all folds: 0.7841
New Predictions:
      SiO2  B2O3      CaO    Na2O    P2O5   Ce  Ce2O3   CeO2
0  62.7814   0.4  23.0418  5.0662  4.3045  0.0   0.44  2.436


In [5]:
# Reversed

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_path = r"C:\Users\29200\Downloads\Dataset\oversampled_resampled.xlsx"
df = pd.read_excel(file_path)

# Reversing X and y
# New X will be the previous y
X_reversed = df[['SiO2', 'B2O3', 'CaO', 'Na2O', 'P2O5', 'Ce', 'Ce2O3', 'CeO2']]
# New y will be the previous X
y_reversed = df[['Conc.', 'Cell Viability 24', 'Cell Viability 48', 'Cell Viability 72', 
                 'Cell Viability 96', 'ALP 7','Surface area m2/g', 'Pore volume cm3/g', 'Pore size nm']]

# Split data into training and testing sets
X_train_reversed, X_test_reversed, y_train_reversed, y_test_reversed = train_test_split(X_reversed, y_reversed, test_size=0.28, random_state=42)

# Choose a multioutput regression model (Example: MultiOutputRegressor with ExtraTreesRegressor)
model_reversed = MultiOutputRegressor(ExtraTreesRegressor(n_estimators=100, random_state=42))

# Train the model
model_reversed.fit(X_train_reversed, y_train_reversed)

# Make predictions on the test set
y_pred_reversed = model_reversed.predict(X_test_reversed)

# Calculate RMSE and R² score for each output variable
overall_rmse_reversed = 0
for i, column in enumerate(y_reversed.columns):
    rmse_reversed = np.sqrt(mean_squared_error(y_test_reversed.iloc[:, i], y_pred_reversed[:, i]))
    r2_reversed = r2_score(y_test_reversed.iloc[:, i], y_pred_reversed[:, i])
    overall_rmse_reversed += rmse_reversed
    print(f"{column}: RMSE = {rmse_reversed:.4f}, R² = {r2_reversed:.4f}")

# Calculate overall RMSE
overall_rmse_reversed /= y_reversed.shape[1]

# Overall model score
overall_r2_reversed = r2_score(y_test_reversed, y_pred_reversed)
print(f"Overall Model RMSE: {overall_rmse_reversed:.4f}")
print(f"Overall Model R² Score: {overall_r2_reversed:.4f}")

# Evaluate the model using cross-validation
cv_scores_reversed = cross_val_score(model_reversed, X_reversed, y_reversed, cv=5, scoring='r2')
print(f"Cross-validated R² scores: {cv_scores_reversed}")
print(f"Mean R² Score across all folds: {np.mean(cv_scores_reversed):.4f}")

# Predicting new targets based on new reversed input features
new_reversed_input_features = pd.DataFrame({
    'SiO2': [58],
    'B2O3': [0],
    'CaO': [36],
    'Na2O': [0],
    'P2O5': [4],
    'Ce': [0],
    'Ce2O3': [2.5],
    'CeO2': [0]
})

new_reversed_predictions = model_reversed.predict(new_reversed_input_features)

# Creating a DataFrame for new predictions
new_reversed_predictions_df = pd.DataFrame(new_reversed_predictions, columns=['Conc.', 'Cell Viability 24', 'Cell Viability 48', 'Cell Viability 72', 
                 'Cell Viability 96',  'ALP 7',
                 'Surface area m2/g', 'Pore volume cm3/g', 'Pore size nm'])
print("New Predictions based on reversed input features:")
print(new_reversed_predictions_df)


Conc.: RMSE = 0.2783, R² = 0.5267
Cell Viability 24: RMSE = 9.1587, R² = 0.5011
Cell Viability 48: RMSE = 2.6484, R² = 0.9124
Cell Viability 72: RMSE = 3.5345, R² = 0.4395
Cell Viability 96: RMSE = 3.5639, R² = 0.2239
ALP 7: RMSE = 0.7225, R² = 0.4714
Surface area m2/g: RMSE = 36.4084, R² = 0.6770
Pore volume cm3/g: RMSE = 0.0428, R² = 0.9999
Pore size nm: RMSE = 0.1041, R² = 0.9438
Overall Model RMSE: 6.2735
Overall Model R² Score: 0.6329
Cross-validated R² scores: [0.53439488 0.54814934 0.58528464 0.62534338 0.65340999]
Mean R² Score across all folds: 0.5893
New Predictions based on reversed input features:
      Conc.  Cell Viability 24  Cell Viability 48  Cell Viability 72  \
0  0.662259              92.96          90.789714          88.620635   

   Cell Viability 96     ALP 7  Surface area m2/g  Pore volume cm3/g  \
0          95.967571  2.173838         136.631835           7.125406   

   Pore size nm  
0      2.885881  
