In [60]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import joblib



In [61]:
file_path = "/content/plasticdataset.csv"

In [62]:
# Importing the dataset
data = pd.read_csv("plasticdataset.csv")

In [63]:
df = pd.DataFrame(data)

In [64]:
# One-Hot Encoding for Plastic_Type
encoder = OneHotEncoder(sparse_output=False)  # Updated parameter
encoded_plastic_type = encoder.fit_transform(df[['Plastic_Type']])
encoded_columns = encoder.get_feature_names_out(['Plastic_Type'])

# Combine encoded columns with the original dataframe
encoded_df = pd.DataFrame(encoded_plastic_type, columns=encoded_columns)
df = pd.concat([df.drop(columns=['Plastic_Type']), encoded_df], axis=1)

In [65]:
print(df.columns)

Index(['Index', 'Temperature (°C)', 'Pressure (atm)', 'Reaction_Time (hours)',
       'Reactor_Type (0=Batch, 1=Continuous)', 'Catalyst_Conc (%)',
       'Energy_Produced (MJ)', 'Plastic_Type_ABS', 'Plastic_Type_HDPE',
       'Plastic_Type_LCP', 'Plastic_Type_LDPE', 'Plastic_Type_Nylon',
       'Plastic_Type_PC', 'Plastic_Type_PEEK', 'Plastic_Type_PET',
       'Plastic_Type_PETG', 'Plastic_Type_PMMA', 'Plastic_Type_POM',
       'Plastic_Type_PP', 'Plastic_Type_PPO', 'Plastic_Type_PPS',
       'Plastic_Type_PS', 'Plastic_Type_PUR', 'Plastic_Type_PVC',
       'Plastic_Type_SAN', 'Plastic_Type_Teflon', 'Plastic_Type_UHMWPE'],
      dtype='object')


In [66]:
print(df.head())

   Index  Temperature (°C)  Pressure (atm)  Reaction_Time (hours)  \
0      0               450             1.0                    2.0   
1      1               400             0.8                    3.0   
2      2               320             0.9                    4.0   
3      3               375             1.2                    2.5   
4      4               400             1.0                    3.0   

   Reactor_Type (0=Batch, 1=Continuous)  Catalyst_Conc (%)  \
0                                     0                  5   
1                                     0                 10   
2                                     1                  3   
3                                     0                  6   
4                                     1                  7   

   Energy_Produced (MJ)  Plastic_Type_ABS  Plastic_Type_HDPE  \
0                  25.5               0.0                0.0   
1                  28.0               0.0                1.0   
2                  1

In [67]:
# Select all one-hot encoded Plastic_Type columns
plastic_type_columns = [col for col in df.columns if col.startswith("Plastic_Type_")]



In [68]:
# Define the one-hot encoded columns for Plastic_Type
plastic_type_columns = ["Plastic_Type_PET", "Plastic_Type_HDPE", "Plastic_Type_PVC",
                        "Plastic_Type_LDPE", "Plastic_Type_PP", "Plastic_Type_PS",
                        "Plastic_Type_PUR", "Plastic_Type_PC", "Plastic_Type_ABS",
                        "Plastic_Type_PMMA"]

# Define X (features) and y (target)
X = df[plastic_type_columns + ["Temperature (°C)", "Pressure (atm)", "Reaction_Time (hours)", "Reactor_Type (0=Batch, 1=Continuous)", "Catalyst_Conc (%)"]]
y = df["Energy_Produced (MJ)"]

print(X.head())  # Verify the feature DataFrame
print(y.head())  # Verify the target DataFrame


   Plastic_Type_PET  Plastic_Type_HDPE  Plastic_Type_PVC  Plastic_Type_LDPE  \
0               1.0                0.0               0.0                0.0   
1               0.0                1.0               0.0                0.0   
2               0.0                0.0               1.0                0.0   
3               0.0                0.0               0.0                1.0   
4               0.0                0.0               0.0                0.0   

   Plastic_Type_PP  Plastic_Type_PS  Plastic_Type_PUR  Plastic_Type_PC  \
0              0.0              0.0               0.0              0.0   
1              0.0              0.0               0.0              0.0   
2              0.0              0.0               0.0              0.0   
3              0.0              0.0               0.0              0.0   
4              1.0              0.0               0.0              0.0   

   Plastic_Type_ABS  Plastic_Type_PMMA  Temperature (°C)  Pressure (atm)  \
0   

In [69]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [70]:
# Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [71]:

# Predict on test data
y_pred = model.predict(X_test)


In [72]:
# Hyperparameter Tuning using GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train)

# Best model from GridSearchCV
best_model = grid_search.best_estimator_

In [73]:
# Feature importance
print("Feature Importance:")
for feature, importance in zip(X.columns, best_model.feature_importances_):
    print(f"{feature}: {importance:.4f}")

# Cross-validation scores
cv_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring='r2')
print(f"Cross-Validation R² Scores: {cv_scores}")
print(f"Mean Cross-Validation R²: {np.mean(cv_scores):.4f}")

Feature Importance:
Plastic_Type_PET: 0.0000
Plastic_Type_HDPE: 0.0000
Plastic_Type_PVC: 0.0000
Plastic_Type_LDPE: 0.0000
Plastic_Type_PP: 0.0000
Plastic_Type_PS: 0.0000
Plastic_Type_PUR: 0.0000
Plastic_Type_PC: 0.0000
Plastic_Type_ABS: 0.0000
Plastic_Type_PMMA: 0.0000
Temperature (°C): 0.7854
Pressure (atm): 0.0776
Reaction_Time (hours): 0.0694
Reactor_Type (0=Batch, 1=Continuous): 0.0086
Catalyst_Conc (%): 0.0590
Cross-Validation R² Scores: [ 0.8003253   0.35148005  0.35017688 -2.85021558 -2.37830114]
Mean Cross-Validation R²: -0.7453


In [74]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 0.8021249999999996
R-squared: 0.6997894736842107


In [76]:
joblib.dump(model, 'plastic_model.pkl')

['plastic_model.pkl']