In [1]:
import pandas
import os
import plotly.offline as plotly_offline
import joblib

from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder

plotly_offline.init_notebook_mode(connected=True)

try:
    file_path = os.path.join('../sample-data/dataset.csv')

    dataset = pandas.read_csv(file_path)
    
except FileNotFoundError:
    dataset = None
    print(f"Error: The file at {file_path} was not found.")

except pandas.errors.EmptyDataError:
    dataset = None
    print("Error: The file is empty.")
    
except pandas.errors.ParserError:
    dataset = None
    print("Error: The file could not be parsed.")

feature_cols = ["brand", "model", "year", "transmission", "mileage", "fuelType", "mpg", "engineSize"]

target_col = ["price"]

categorical_cols = ["brand", "model", "transmission", "fuelType"]

if dataset is not None:
    desired_columns = ["brand", "model", "year", "price", "transmission", "mileage", "fuelType", "mpg", "engineSize"]

    dataset = dataset[desired_columns]

try:
    label_encoder = LabelEncoder()
    dataset['brand_encoded'] = label_encoder.fit_transform(dataset['brand'])
    dataset['model_encoded'] = label_encoder.fit_transform(dataset['model'])
    dataset['transmission_encoded'] = label_encoder.fit_transform(dataset['transmission'])
    dataset['fuelType_encoded'] = label_encoder.fit_transform(dataset['fuelType'])

    X = dataset[['brand_encoded', 'model_encoded', 'year', 'transmission_encoded', 'mileage', 'fuelType_encoded', 'mpg', 'engineSize' ]]
    Y = dataset[target_col]

except KeyError as e:
    X, Y = None
    print(f"Error: The specified column {e} does not exist in the dataframe.")
    
except Exception as e:
    X, Y = None
    print(f"An unexpected error occurred during preprocessing: {e}")

if X is not None and Y is not None:
    model = DecisionTreeRegressor()
    model.fit(X, Y)

    joblib.dump(model, '../trained-data/decision-tree-trained-model.joblib')

    print("success")

success
