In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [2]:
car_data = pd.read_csv('used_car.csv')
X = car_data[['on road old', 'on road now', 'economy', 'condition', 'rating']]
y = car_data['current price']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)

In [4]:
X

Unnamed: 0,on road old,on road now,economy,condition,rating
0,535651,798186,14,2,1
1,591911,861056,9,9,5
2,686990,770762,15,8,2
3,573999,722381,11,3,4
4,691388,811335,12,9,3
...,...,...,...,...,...
995,633238,743850,11,6,1
996,599626,848195,14,9,2
997,646344,842733,9,8,1
998,535559,732439,9,5,4


In [5]:
y

0      351318.0
1      285001.5
2      215386.0
3      244295.5
4      531114.5
         ...   
995    190744.0
996    419748.0
997    405871.0
998     74398.0
999    414938.5
Name: current price, Length: 1000, dtype: float64

In [6]:
car_data.isnull().sum()

v.id             0
on road old      0
on road now      0
years            0
km               0
rating           0
condition        0
economy          0
top speed        0
hp               0
torque           0
current price    0
dtype: int64

In [8]:
car_data.head()

Unnamed: 0,v.id,on road old,on road now,years,km,rating,condition,economy,top speed,hp,torque,current price
0,1,535651,798186,3,78945,1,2,14,177,73,123,351318.0
1,2,591911,861056,6,117220,5,9,9,148,74,95,285001.5
2,3,686990,770762,2,132538,2,8,15,181,53,97,215386.0
3,4,573999,722381,4,101065,4,3,11,197,54,116,244295.5
4,5,691388,811335,6,61559,3,9,12,160,53,105,531114.5


In [10]:
car_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   v.id           1000 non-null   int64  
 1   on road old    1000 non-null   int64  
 2   on road now    1000 non-null   int64  
 3   years          1000 non-null   int64  
 4   km             1000 non-null   int64  
 5   rating         1000 non-null   int64  
 6   condition      1000 non-null   int64  
 7   economy        1000 non-null   int64  
 8   top speed      1000 non-null   int64  
 9   hp             1000 non-null   int64  
 10  torque         1000 non-null   int64  
 11  current price  1000 non-null   float64
dtypes: float64(1), int64(11)
memory usage: 93.9 KB


In [23]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [12]:
y_pred

array([305242.85848914, 349050.53437717, 248005.53836185, 218363.21927917,
       310219.72922975, 358449.65357102, 357996.87089267, 234096.2868375 ,
       261092.68157614, 321895.97336644, 296730.75534022, 243570.42271807,
       261719.83925007, 376378.89386104, 390469.14920815, 283162.07362956,
       320461.11044581, 281173.96261643, 311141.17119329, 313638.26323928,
       302359.49640859, 293162.73966734, 373075.50973202, 299203.50283422,
       279101.60413222, 338155.3537643 , 408972.35854256, 307318.95731692,
       306951.50525582, 311094.65901374, 276326.92915398, 318354.36773282,
       393063.18330042, 291320.01853985, 312188.91287715, 340152.848218  ,
       346959.95429243, 335910.06120196, 363732.42331882, 319815.05544042,
       333958.3251628 , 324702.0246593 , 270649.15023052, 264676.7098095 ,
       380419.36474124, 363164.41851462, 264703.62710764, 321749.79370264,
       311077.37795909, 335046.33032402, 252845.68812031, 268164.03218765,
       365968.67227588, 3

In [14]:
mse

14143205523.415161

In [22]:
mae

103215.75037413798

In [24]:
r2

0.15869202771572744

In [15]:
import pickle
with open('linear_regression_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [16]:
import mlflow
import mlflow.sklearn

In [27]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")  # Set your MLflow tracking URI
mlflow.set_experiment("Used Car Price Prediction")

<Experiment: artifact_location='mlflow-artifacts:/203230380874467616', creation_time=1718473114487, experiment_id='203230380874467616', last_update_time=1718473114487, lifecycle_stage='active', name='Used Car Price Prediction', tags={}>

In [28]:
with mlflow.start_run():
    # Log parameters
    mlflow.log_param("train_test_split_random_state", 42)
    
    # Initialize and train the model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Log metrics
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2score", r2)
    
    # Log the trained model
    mlflow.sklearn.log_model(model, "linear_regression_model")



In [29]:
def train_and_log_model(X_train, X_test, y_train, y_test):
    with mlflow.start_run() as run:
        model = LinearRegression()
        model.fit(X_train, y_train)

        predictions = model.predict(X_test)
        mse = mean_squared_error(y_test, predictions)
        mae = mean_absolute_error(y_test, predictions)
        r2 = r2_score(y_test, predictions)

        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2_score", r2)
        mlflow.sklearn.log_model(model, "model")

        # Register the model
        mlflow.register_model(
            "runs:/{}/model".format(run.info.run_id),
            "Used_Car_Price_Prediction"
        )

        
       

        print(f"Model logged with MSE: {mse}, MAE: {mae}, and R2: {r2}")

train_and_log_model(X_train, X_test, y_train, y_test)

Successfully registered model 'Used_Car_Price_Prediction'.
2024/06/15 23:21:27 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Used_Car_Price_Prediction, version 1


Model logged with MSE: 14143205523.415161, MAE: 103215.75037413798, and R2: 0.15869202771572744


Created version '1' of model 'Used_Car_Price_Prediction'.
