# Import Libraries

In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pickle

In [None]:
from lightgbm import LGBMRegressor

# Import Preprocessed Data

In [None]:
data = pd.read_csv("C:/Users/Shruthi/Downloads/data.csv")

In [None]:
data.head()

# Label Encoding

In [None]:
labels = ['model_year','model_name', 'maker', 'fuel_type']

mapper = {}
for i in labels:
    mapper[i] = LabelEncoder()
    mapper[i].fit(data[i])
    tr = mapper[i].transform(data[i])
    np.save(str('classes'+i+'.npy'), mapper[i].classes_)
    data.loc[:, i+'_labels'] = pd.Series(tr, index=data.index)
    
labeled = data[['price', 'distance_covered (km)','price (â‚¹)']
                  +[x+"_labels" for x in labels]]

print(labeled.columns)

# Different Metrics Evaluation

In [6]:
def find_scores(Y_actual, Y_pred, X_train):
    scores = dict()
    mae = mean_absolute_error(Y_actual, Y_pred)
    mse = mean_squared_error(Y_actual, Y_pred)
    rmse = np.sqrt(mse)
    rmsle = np.log(rmse)
    r2 = r2_score(Y_actual, Y_pred)
    n, k = X_train.shape
    adj_r2_score = 1 - ((1-r2)*(n-1)/(n-k-1))
    
    scores['mae']=mae
    scores['mse']=mse
    scores['rmse']=rmse
    scores['rmsle']=rmsle
    scores['r2']=r2
    scores['adj_r2_score']=adj_r2_score
    
    return scores

# Train Test Split

In [7]:
X = labeled.iloc[:,1:].values
Y = labeled.iloc[:,0].values.reshape(-1,1)

In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=42)

# Predictive Modeling

# LGBM Regressor

In [None]:
model = LGBMRegressor(boosting_type="gbdt",learning_rate=0.07,metric="rmse",n_estimators=300,objective="root_mean_squared_error",random_state=42,reg_sqrt=True)

model.fit(X_train, Y_train)

Y_pred = model.predict(X_test)

find_scores(Y_test, Y_pred, X_train)

# Save Model

In [10]:
pickle.dump(model, open('resale_model.sav', 'wb'))