In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor


data = pd.read_csv('MOFS.csv')


data = data.dropna()

input_columns = ['LCD', 'PLD', 'SA', 'Void Fraction']
output_columns = ['Kh CO2/Kh H2O', 'Kh CO2/Kh N2']

X = data[input_columns]
y = data[output_columns]
models = [
    ('Linear Regression', LinearRegression()),
    ('Decision Tree', DecisionTreeRegressor()),
    ('Random Forest', RandomForestRegressor())
]

num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

for model_name, model in models:
    mse_scores_output1 = -cross_val_score(model, X, y['Kh CO2/Kh H2O'], cv=kf, scoring='neg_mean_squared_error')
    mse_scores_output2 = -cross_val_score(model, X, y['Kh CO2/Kh N2'], cv=kf, scoring='neg_mean_squared_error')

    avg_mse_output1 = mse_scores_output1.mean()
    avg_mse_output2 = mse_scores_output2.mean()

    print(f'{model_name} - Average MSE for output_col1: {avg_mse_output1:.2f}, Average MSE for output_col2: {avg_mse_output2:.2f}')




Linear Regression - Average MSE for output_col1: 90234720.63, Average MSE for output_col2: 248895075888787259392.00
Decision Tree - Average MSE for output_col1: 269421196.71, Average MSE for output_col2: 496595730507967627264.00
Random Forest - Average MSE for output_col1: 211976567.82, Average MSE for output_col2: 396557864013762002944.00
