In [123]:
import pandas as pd
import joblib

In [124]:
# Load model
lr = joblib.load("linear_regression_ev_model.pkl")

In [125]:
# Load your cleaned dataset (to get column names)
df = pd.read_csv("data/ev_data_cleaned.csv")

In [128]:
X = df.drop("range_km", axis=1)
y = df["range_km"]
X_encoded = pd.get_dummies(X, drop_first=True)

In [129]:
print("=" * 60)
print("TESTING MODEL ON ORIGINAL TRAINING DATA:")
print("=" * 60)

# Predict on first 5 rows of training data
test_predictions = lr.predict(X_encoded.head(5))
actual_values = y.head(5).values

print("\nFirst 5 predictions vs actual:")
for i, (pred, actual) in enumerate(zip(test_predictions, actual_values)):
    print(f"Row {i}: Predicted={pred:.1f} km, Actual={actual:.1f} km")

print("\n" + "=" * 60)
print("MODEL COEFFICIENTS CHECK:")
print("=" * 60)
print(f"Intercept: {lr.intercept_:.2f}")
print(f"Number of coefficients: {len(lr.coef_)}")
print(f"\nTop 10 positive coefficients:")
coef_df = pd.DataFrame({
    'feature': X_encoded.columns,
    'coefficient': lr.coef_
}).sort_values('coefficient', ascending=False)
print(coef_df.head(10))

print(f"\nTop 10 negative coefficients:")
print(coef_df.tail(10))

print("\n" + "=" * 60)
print("CHECKING IF MODEL WAS TRAINED WITH A PIPELINE:")
print("=" * 60)
print(f"Model type: {type(lr)}")
print(f"Model attributes: {dir(lr)}")

TESTING MODEL ON ORIGINAL TRAINING DATA:

First 5 predictions vs actual:
Row 0: Predicted=-92040.3 km, Actual=225.0 km
Row 1: Predicted=-91901.3 km, Actual=225.0 km
Row 2: Predicted=-99877.9 km, Actual=280.0 km
Row 3: Predicted=-99880.3 km, Actual=280.0 km
Row 4: Predicted=-105237.6 km, Actual=315.0 km

MODEL COEFFICIENTS CHECK:
Intercept: 393.38
Number of coefficients: 174

Top 10 positive coefficients:
                    feature  coefficient
1      battery_capacity_kWh    96.522529
157      segment_F - Luxury    31.444145
170       car_body_type_SUV    22.834619
155       segment_D - Large    21.486735
7                     seats    21.262603
156   segment_E - Executive    20.942941
154      segment_C - Medium    17.190384
162     segment_JC - Medium    12.144920
153     segment_B - Compact    10.429792
164  segment_JE - Executive     9.800253

Top 10 negative coefficients:
                  feature  coefficient
30     cargo_volume_l_300    -7.593190
4    acceleration_0_100_s    -8.

