In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib

# Load dataset
df = pd.read_csv("carbon_tracker_dataset.csv")

# Convert categorical hardware type to numerical
df["hardware"] = df["hardware"].astype("category").cat.codes

# Define features and target variable
X = df[["runtime_seconds", "power_watts", "hardware"]]
y = df["co2_emission_g"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.2f}")

# Save trained model
joblib.dump(model, "carbon_emission_model.pkl")
print("Model saved successfully as carbon_emission_model.pkl")


Mean Absolute Error: 12.00
Model saved successfully as carbon_emission_model.pkl


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=dce7dd19-ab8b-4223-9dff-a9a8cad6bd87' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>