In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error
# Load dataset
df = pd.read_csv("Co2_Emission.csv")

# Select only useful columns
df = df[[
    "Engine Size(L)", 
    "Cylinders", 
    "Fuel Consumption City (L/100 km)", 
    "Fuel Consumption Hwy (L/100 km)", 
    "Fuel Consumption Comb (L/100 km)", 
    "CO2 Emissions(g/km)"
]]

# Rename columns to simpler names
df = df.rename(columns={
    "Engine Size(L)": "ENGINESIZE",
    "Cylinders": "CYLINDERS",
    "Fuel Consumption City (L/100 km)": "FUELCONSUMPTION_CITY",
    "Fuel Consumption Hwy (L/100 km)": "FUELCONSUMPTION_HWY",
    "Fuel Consumption Comb (L/100 km)": "FUELCONSUMPTION_COMB",
    "CO2 Emissions(g/km)": "CO2EMISSIONS"
})

print(df.head())


   ENGINESIZE  CYLINDERS  FUELCONSUMPTION_CITY  FUELCONSUMPTION_HWY  \
0         2.0          4                   9.9                  6.7   
1         2.4          4                  11.2                  7.7   
2         1.5          4                   6.0                  5.8   
3         3.5          6                  12.7                  9.1   
4         3.5          6                  12.1                  8.7   

   FUELCONSUMPTION_COMB  CO2EMISSIONS  
0                   8.5           196  
1                   9.6           221  
2                   5.9           136  
3                  11.1           255  
4                  10.6           244  


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error

# Load dataset
df = pd.read_csv("Co2_Emission.csv")

# Select features + target
df = df[[
    "Engine Size(L)", 
    "Cylinders", 
    "Fuel Consumption City (L/100 km)", 
    "Fuel Consumption Hwy (L/100 km)", 
    "Fuel Consumption Comb (L/100 km)", 
    "CO2 Emissions(g/km)"
]]

# Rename for simplicity
df = df.rename(columns={
    "Engine Size(L)": "ENGINESIZE",
    "Cylinders": "CYLINDERS",
    "Fuel Consumption City (L/100 km)": "FUELCONSUMPTION_CITY",
    "Fuel Consumption Hwy (L/100 km)": "FUELCONSUMPTION_HWY",
    "Fuel Consumption Comb (L/100 km)": "FUELCONSUMPTION_COMB",
    "CO2 Emissions(g/km)": "CO2EMISSIONS"
})

# Split features/target
X = df[["ENGINESIZE", "CYLINDERS", "FUELCONSUMPTION_COMB"]]
y = df["CO2EMISSIONS"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("R² Score:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))

# Save model (for Streamlit)
import joblib
joblib.dump(model, "co2_model.pkl")


R² Score: 0.8773348735033226
MAE: 13.517321294682647


['co2_model.pkl']