<a href="https://colab.research.google.com/github/AnaaBiz/stability/blob/main/Stability_Studies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install pandas scikit-learn

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error

# Step 1: Create fake data
data = pd.DataFrame({
    'product_form': ['powder', 'liquid', 'liquid', 'powder', 'powder', 'liquid'] * 20,
    'packaging': ['PET', 'Glass', 'TetraPak', 'Glass', 'PET', 'TetraPak'] * 20,
    'vitamin_c_T0': [40, 35, 30, 50, 45, 32] * 20,
    'vitamin_c_T12': [30, 20, 15, 42, 38, 18] * 20
})

# Step 2: Encode categorical variables (product_form and packaging)
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore') # sparse=False is deprecated
encoded = encoder.fit_transform(data[['product_form', 'packaging']])
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(['product_form', 'packaging'])) # added column names

# Combine with numerical data
X = pd.concat([encoded_df, data[['vitamin_c_T0']]], axis=1)
y = data['vitamin_c_T12']

# Step 3: Split data to train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train the robot (Random Forest)
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Step 5: Test the robot
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error: {mse:.2f}")

# Step 6: Try a new product!
new_product = pd.DataFrame({
    'product_form': ['powder'],
    'packaging': ['PET'],
    'vitamin_c_T0': [40]
})

# Encode new product
new_encoded = encoder.transform(new_product[['product_form', 'packaging']])
new_X = pd.concat([pd.DataFrame(new_encoded, columns=encoder.get_feature_names_out(['product_form', 'packaging'])), # added column names
                   new_product[['vitamin_c_T0']]], axis=1)

# Predict final vitamin C
predicted_c = model.predict(new_X)
print(f"Predicted vitamin C after 12 months: {predicted_c[0]:.2f} mg")


Mean Squared Error: 0.00
Predicted vitamin C after 12 months: 30.00 mg
