In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [2]:
data = {
    'Vehicle': ['Car A', 'Car B', 'Car C', 'Truck A', 'Truck B', 'Car D', 'Car E', 'Car F', 'Truck C', 'Truck D'],
    'Engine_Size': [2000, 2500, 1800, 3500, 4500, 1600, 2200, 3000, 4000, 5000],
    'Horsepower': [180, 220, 160, 280, 350, 130, 200, 250, 320, 400],
    'Torque': [250, 300, 220, 450, 600, 180, 270, 350, 500, 700],
    'Weight': [1300, 1500, 1200, 2000, 2500, 1100, 1400, 1600, 2300, 2700],
    'Mileage_per_Liter': [12.75, 10.63, 14.82, 7.65, 6.38, 15.5, 11.8, 10.2, 8.5, 5.5]
}

In [3]:
df = pd.DataFrame(data)
X = df[['Engine_Size', 'Horsepower', 'Torque', 'Weight']]
Y = df['Mileage_per_Liter']  

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)

In [5]:
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)
print("Mean Squared Error:", mse)
print("R2 Score:", r2)

Mean Squared Error: 8.611268839787664
R2 Score: -6.592205109028331


In [6]:
joblib.dump(model, 'new_vehicle_mileage_model.pkl')

['new_vehicle_mileage_model.pkl']

In [7]:
sample_vehicle = [[2200, 200, 280, 1500]] 
predicted_mileage_per_liter = model.predict(sample_vehicle)
print("Predicted Mileage per Liter (km/l):", predicted_mileage_per_liter[0])

Predicted Mileage per Liter (km/l): 10.714803523542727


