In [49]:
import pandas as pd 
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
import joblib as jlb

In [25]:
df = pd.read_csv('/Users/adarshbadjate/Downloads/archive (1)/ElectricCarData_Clean.csv',header = 0)
df.info()
df = df[['AccelSec','TopSpeed_KmH','Range_Km','Efficiency_WhKm','Seats','PriceEuro']]
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Brand            103 non-null    object 
 1   Model            103 non-null    object 
 2   AccelSec         103 non-null    float64
 3   TopSpeed_KmH     103 non-null    int64  
 4   Range_Km         103 non-null    int64  
 5   Efficiency_WhKm  103 non-null    int64  
 6   FastCharge_KmH   103 non-null    object 
 7   RapidCharge      103 non-null    object 
 8   PowerTrain       103 non-null    object 
 9   PlugType         103 non-null    object 
 10  BodyStyle        103 non-null    object 
 11  Segment          103 non-null    object 
 12  Seats            103 non-null    int64  
 13  PriceEuro        103 non-null    int64  
dtypes: float64(1), int64(5), object(8)
memory usage: 11.4+ KB


Unnamed: 0,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,Seats,PriceEuro
0,4.6,233,450,161,5,55480
1,10.0,160,270,167,5,30000
2,4.7,210,400,181,5,56440
3,6.8,180,360,206,5,68040
4,9.5,145,170,168,4,32997


In [26]:
df.describe()

Unnamed: 0,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,Seats,PriceEuro
count,103.0,103.0,103.0,103.0,103.0,103.0
mean,7.396117,179.194175,338.786408,189.165049,4.883495,55811.563107
std,3.01743,43.57303,126.014444,29.566839,0.795834,34134.66528
min,2.1,123.0,95.0,104.0,2.0,20129.0
25%,5.1,150.0,250.0,168.0,5.0,34429.5
50%,7.3,160.0,340.0,180.0,5.0,45000.0
75%,9.0,200.0,400.0,203.0,5.0,65000.0
max,22.4,410.0,970.0,273.0,7.0,215000.0


In [27]:
df.isna().sum()

AccelSec           0
TopSpeed_KmH       0
Range_Km           0
Efficiency_WhKm    0
Seats              0
PriceEuro          0
dtype: int64

In [29]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df.drop('PriceEuro', axis=1))

In [33]:
X = scaled_features
y = df['PriceEuro']

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)


In [37]:
model = LinearRegression()
model.fit(X_train,y_train)

In [38]:
y_predict = model.predict(X_test)

In [40]:
print("mse :", mean_squared_error(y_test,y_predict))
print("mae :", mean_absolute_error(y_test,y_predict))
print("r2 :", r2_score(y_test,y_predict))


mse : 293564270.4115519
mae : 13555.57141370348
r2 : 0.6298230334963955


In [45]:
#Model tunning 
param_grid = {
        'copy_X' : [True, False],
        'fit_intercept' : [True, False],
        'n_jobs' : [1,2,3],
        'positive' : [True, False]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='r2', n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)


Fitting 3 folds for each of 24 candidates, totalling 72 fits
[CV] END copy_X=True, fit_intercept=True, n_jobs=1, positive=True; total time=   0.0s
[CV] END copy_X=True, fit_intercept=True, n_jobs=2, positive=True; total time=   0.0s
[CV] END copy_X=True, fit_intercept=True, n_jobs=1, positive=False; total time=   0.0s
[CV] END copy_X=True, fit_intercept=True, n_jobs=2, positive=True; total time=   0.0s
[CV] END copy_X=True, fit_intercept=True, n_jobs=2, positive=False; total time=   0.0s
[CV] END copy_X=True, fit_intercept=True, n_jobs=2, positive=False; total time=   0.0s
[CV] END copy_X=True, fit_intercept=True, n_jobs=2, positive=False; total time=   0.0s
[CV] END copy_X=True, fit_intercept=True, n_jobs=3, positive=True; total time=   0.0s
[CV] END copy_X=True, fit_intercept=True, n_jobs=1, positive=True; total time=   0.0s
[CV] END copy_X=True, fit_intercept=True, n_jobs=3, positive=True; total time=   0.0s
[CV] END copy_X=True, fit_intercept=True, n_jobs=3, positive=True; total ti

In [46]:
best_model = grid_search.best_estimator_
best_model.fit(X_train,y_train)

In [48]:
y_pred_tunned = best_model.predict(X_test)
mae_tuned = mean_absolute_error(y_test,y_pred_tunned)
mse_tuned = mean_squared_error(y_test,y_pred_tunned)
r2_tuned = r2_score(y_test,y_pred_tunned)

print(f"Tuned MAE: ", mae_tuned)
print(f"Tuned MSE: ", mse_tuned)
print(f"Tuned r2: ", r2_tuned)

Tuned MAE:  14289.333311540493
Tuned MSE:  327498976.7365628
Tuned r2:  0.5870322448593022


In [50]:
jlb.dump(model,'car_price_model.pkl')
loaded_model = jlb.load('car_price_model.pkl')

In [51]:
df.head()

Unnamed: 0,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,Seats,PriceEuro
0,4.6,233,450,161,5,55480
1,10.0,160,270,167,5,30000
2,4.7,210,400,181,5,56440
3,6.8,180,360,206,5,68040
4,9.5,145,170,168,4,32997


In [52]:
new_data = pd.DataFrame({
    'AccelSec': [4.5],
    'TopSpeed_KmH': [250],
    'Range_Km': [400],
    'Efficiency_WhKm': [150],
    'Seats': [5]
})


In [53]:
new_data_scaled = scaler.transform(new_data)


In [54]:
predicted_price = model.predict(new_data_scaled)

In [55]:
print(predicted_price)

[86007.07900853]
