In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from kerastuner.tuners import RandomSearch
from sklearn.preprocessing import StandardScaler

# Read data
file_path = '/Users/a1234/Desktop/BU/677 PYTHON/project/combined_data/processed_toyota_data.json'
data = pd.read_json(file_path, lines=True)

# Data splitting
train_data, temp_data = train_test_split(data, test_size=0.3, random_state=42)
validation_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

# Type conversion
train_data['After_Year'] = train_data['After_Year'].astype(int)
validation_data['After_Year'] = validation_data['After_Year'].astype(int)
test_data['After_Year'] = test_data['After_Year'].astype(int)

# Features and target
features = ['Publish Time', 'After_Year', 'Distance']
X_train = train_data[features]
y_train = train_data['Price']
X_validation = validation_data[features]
y_validation = validation_data['Price']
X_test = test_data[features]
y_test = test_data['Price']

# RandomForestRegressor
param_dist = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf = RandomForestRegressor(random_state=42)
random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_dist, n_iter=100, cv=5, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)

# Model and score output
print("Best parameters found: ", random_search.best_params_)
print("Best score (R^2): ", random_search.best_score_)

# LSTM model optimization
# Note: Preprocessing and formatting of input data is required, assumed to be processed here
# Prepare the three-dimensional input required for LSTM, shown here as an example, needs to be adjusted based on specific data
X_train_cnn = np.reshape(X_train.values, (X_train.shape[0], X_train.shape[1], 1))
X_validation_cnn = np.reshape(X_validation.values, (X_validation.shape[0], X_validation.shape[1], 1))

# Prediction function
def predict_price(publish_time, after_year, distance):
    input_data = pd.DataFrame({
        'Publish Time': [publish_time],
        'After_Year': [after_year],
        'Distance': [distance]
    })

    # Simplified approach, directly using the Random Forest model for prediction
    predicted_price = random_search.predict(input_data)
    return predicted_price[0]

# User input
car_make_model = input("Enter the Car Brand & Model: ")
user_publish_time = input("Enter the publish time of the car: ")
user_after_year = int(input("Enter the year after manufacturing: "))
user_distance = float(input("Enter the distance driven (in km): "))

# Output predicted price
predicted_price = predict_price(user_publish_time, user_after_year, user_distance)
print(f"Predicted price of the car is: ${predicted_price:.2f}")


Fitting 5 folds for each of 100 candidates, totalling 500 fits


  from kerastuner.tuners import RandomSearch
150 fits failed out of a total of 500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
89 fits failed with the following error:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/base.py", line 638, in _validate_params
    validate_parameter_constraints(

Best parameters found:  {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_depth': 20}
Best score (R^2):  0.5912722356676303
Predicted price of the car is: $2423.76
