In [15]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Atomic weights
atomic_weights = {'H': 1, 'O': 16, 'C': 12}

# Function to calculate molecular weight
def calculate_molecular_weight(molecule):
    weight = 0
    atoms = []
    molecule += ' '  # Add a space at the end for control after the loop
    for i, char in enumerate(molecule):
        if char.isalpha():  # If the character is a letter
            atoms.append(char)
        elif char.isdigit():  # If the character is a number
            # Calculate and add the atomic weight
            weight += atomic_weights[atoms[-1]] * int(char)
        elif char == ' ':  # If reached the end of the molecule name
            # Calculate and add the last atomic weight
            weight += atomic_weights[atoms[-1]]
            return weight
    return weight

# Create the dataset
data = {
    'Molecule': ['H2O', 'H2O2', 'CO2'],
    'Molecular Weight (g/mol)': [calculate_molecular_weight('H2O'), 
                                  calculate_molecular_weight('H2O2'), 
                                  calculate_molecular_weight('CO2')]
}

# Convert the dataset to a DataFrame
df = pd.DataFrame(data)

# Independent variables (X) and dependent variable (y)
X = pd.get_dummies(df[['Molecule']], drop_first=True)  # Convert categorical data using One-Hot Encoding
y = df['Molecular Weight (g/mol)']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the Random Forest Regression model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)

# Calculate the mean squared error
mse = mean_squared_error(y_test, predictions)
print("Mean Squared Error:", mse)

# Calculate and print the molecular weights of the given molecules' components
molecules = ['H2O', 'H2O2', 'CO2']
for molecule in molecules:
    weight = calculate_molecular_weight(molecule)
    print(f"Molecular Weight of {molecule}: {weight} g/mol")


Mean Squared Error: 931.4704000000002
Molecular Weight of H2O: 18 g/mol
Molecular Weight of H2O2: 50 g/mol
Molecular Weight of CO2: 48 g/mol
