In [1]:
import pandas as pd
from sklearn import linear_model
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# 1. Load the dataset
df = pd.read_csv('data.csv')

# 2. Define Features (X) and Target (y)
X = df[['Car', 'Model', 'Volume', 'Weight']]
y = df['CO2']

# 3. Preprocessing: Encode categorical text columns ('Car' and 'Model')
# We use OneHotEncoder to convert categories into binary columns.
categorical_features = ['Car', 'Model']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features)
    ],
    remainder='passthrough'  # Keep 'Volume' and 'Weight' as they are
)

# 4. Create and Train the Model Pipeline
regr = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', linear_model.LinearRegression())
])

regr.fit(X, y)

# 5. Make a Prediction
# Example: Predict CO2 for a Toyota Aygo, 1000cc, 790kg
# (Note: Using 'Toyoty' as spelled in your dataset)
new_car = pd.DataFrame({
    'Car': ['Benz'],
    'Model': ['C200'],
    'Volume': [1000],
    'Weight': [3000]
})

predicted_co2 = regr.predict(new_car)

print(f"Predicted CO2 emission: {predicted_co2[0]:.2f} g/km")

Predicted CO2 emission: 135.63 g/km


In [4]:
import pandas as pd
import pickle

# Define the filename
filename = 'co2_model.sav'

# Save the model to a file
with open(filename, 'wb') as file:
    pickle.dump(regr, file)

print(f"Model saved as {filename}")

# Load the saved model
with open('co2_model.sav', 'rb') as file:
    loaded_model = pickle.load(file)

# Now you can use loaded_model to predict
sample_data = pd.DataFrame({
    'Car': ['Toyoty'],
    'Model': ['Aygo'],
    'Volume': [1000],
    'Weight': [790]
})

prediction = loaded_model.predict(sample_data)
print(f"Prediction from loaded model: {prediction[0]}")

Model saved as co2_model.sav
Prediction from loaded model: 99.03121652197102
