In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_path = r"C:\Users\29200\Downloads\Dataset\Co_oversampled_resampled.xlsx"
df = pd.read_excel(file_path)

# Assuming your X and y are prepared based on your specific columns
X = df[['Conc.', 'Cell Viability 24', 'Cell Viability 48', 'Cell Viability 72', 'ALP 14','VEGF',
        'Surface area m2/g', 'Pore volume cm3/g', 'Pore size nm']]
y = df[['SiO2', 'B2O3', 'CaO', 'Na2O', 'P2O5','Co', 'CoO']]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29, random_state=42)

# Choose a multioutput regression model (Example: MultiOutputRegressor with AdaBoostRegressor)
model = MultiOutputRegressor(AdaBoostRegressor(random_state=42))

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate RMSE and R² score for each output variable
overall_rmse = 0
for i, column in enumerate(y.columns):
    rmse = np.sqrt(mean_squared_error(y_test.iloc[:, i], y_pred[:, i]))
    r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
    overall_rmse += rmse
    print(f"{column}: RMSE = {rmse:.4f}, R² = {r2:.4f}")

# Calculate overall RMSE
overall_rmse /= y.shape[1]

# Overall model score
overall_r2 = r2_score(y_test, y_pred)
print(f"Overall Model RMSE: {overall_rmse:.4f}")
print(f"Overall Model R² Score: {overall_r2:.4f}")

# Evaluate the model using cross-validation
cv_scores = cross_val_score(model, X, y, cv=5, scoring='r2')
print(f"Cross-validated R² scores: {cv_scores}")
print(f"Mean R² Score across all folds: {np.mean(cv_scores):.4f}")

# Predicting new targets based on new input features
new_input_features = pd.DataFrame({
    'Conc.': [0.8],
    'Cell Viability 24': [88],
    'Cell Viability 48': [95],
    'Cell Viability 72': [110],
    'ALP 14': [1.1],
    'VEGF': [1.3],
    'Surface area m2/g': [225],
    'Pore volume cm3/g': [0.35],
    'Pore size nm': [2]
})

new_predictions = model.predict(new_input_features)

# Creating a DataFrame for new predictions
new_predictions_df = pd.DataFrame(new_predictions, columns=['SiO2', 'B2O3', 'CaO', 'Na2O', 'P2O5', 'Co', 'CoO'])
print("New Predictions:")
print(new_predictions_df)


SiO2: RMSE = 14.6605, R² = 0.7446
B2O3: RMSE = 12.3729, R² = 0.7404
CaO: RMSE = 6.5571, R² = 0.6249
Na2O: RMSE = 0.2813, R² = 0.9909
P2O5: RMSE = 0.5681, R² = 0.8372
Co: RMSE = 0.6794, R² = 0.6422
CoO: RMSE = 0.3527, R² = 0.9371
Overall Model RMSE: 5.0674
Overall Model R² Score: 0.7882
Cross-validated R² scores: [0.97013528 0.84109556 0.96928478 0.98390126 0.98045192]
Mean R² Score across all folds: 0.9490
New Predictions:
   SiO2  B2O3        CaO  Na2O      P2O5   Co  CoO
0  70.0   0.0  21.470588   0.0  5.011163  2.0  3.0
