In [None]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)

# Load your dataset
data_cleaned = pd.read_csv('/content/thermo_normalized_LCIA_dataset.csv')

# List of all LCIA impact metrics
lcia_metrics = ["GWP", "HTP", "MDP", "FETP", "PMFP", "TAP"]

# Initialize a DataFrame to store all results
all_results = pd.DataFrame()

for target_column in lcia_metrics:
    # The remaining columns will be the features
    feature_columns = [col for col in data_cleaned.columns if col != target_column]

    # Check and handle NaN and Inf values in features and target
    data_cleaned[feature_columns] = data_cleaned[feature_columns].replace([np.inf, -np.inf], np.nan)
    data_cleaned[feature_columns] = data_cleaned[feature_columns].fillna(data_cleaned[feature_columns].mean())
    data_cleaned[target_column] = data_cleaned[target_column].replace([np.inf, -np.inf], np.nan)
    data_cleaned[target_column] = data_cleaned[target_column].fillna(data_cleaned[target_column].mean())

    # Splitting the dataset into features (X) and labels (y)
    X = data_cleaned[feature_columns]
    y = data_cleaned[target_column]

    # Standardize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Splitting the dataset into training, validation, and test sets
    X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.4, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    # Initialize Random Forest Regressor
    model = RandomForestRegressor(
        n_estimators=100,  # Number of trees
        max_depth=5,       # Maximum depth of each tree
        random_state=42
    )

    # Training the model
    model.fit(X_train, y_train)

    # Evaluation on validation and test sets
    y_val_pred = model.predict(X_val)
    y_test_pred = model.predict(X_test)

    # Calculate metrics
    val_mae = mean_absolute_error(y_val, y_val_pred)
    val_mse = mean_squared_error(y_val, y_val_pred)
    val_r2 = r2_score(y_val, y_val_pred)
    test_mae = mean_absolute_error(y_test, y_test_pred)
    test_mse = mean_squared_error(y_test, y_test_pred)
    test_r2 = r2_score(y_test, y_test_pred)

    # Collecting results
    results = {
      'Metric': target_column,
      'Validation MAE': val_mae,
      'Validation MSE': val_mse,
      'Validation R2': val_r2,
      'Test MAE': test_mae,
      'Test MSE': test_mse,
      'Test R2': test_r2
    }
    all_results = all_results.append(results, ignore_index=True)

csv_file_path = '/content/all_metrics_random_forest_results.csv'
all_results.to_csv(csv_file_path, index=False)

print(f'Results for all metrics have been saved to {csv_file_path}')


  all_results = all_results.append(results, ignore_index=True)
  all_results = all_results.append(results, ignore_index=True)
  all_results = all_results.append(results, ignore_index=True)
  all_results = all_results.append(results, ignore_index=True)
  all_results = all_results.append(results, ignore_index=True)


Results for all metrics have been saved to /content/all_metrics_random_forest_results.csv


  all_results = all_results.append(results, ignore_index=True)
