In [None]:
import pandas as pd
file_path = 'cars resale prices.xlsx'
df = pd.read_excel(file_path, sheet_name='car_prices')
print(df.head())

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
df['saledate'] = pd.to_datetime(df['saledate'], errors='coerce')
df = df.drop(['saledate', 'vin'], axis=1)
categorical_columns = ['make', 'model', 'trim', 'body', 'transmission', 'color', 'interior', 'state','mmr']
for col in categorical_columns:
    df[col] = df[col].astype(str)
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le
print(df.head())


In [None]:
X = df[['year', 'make', 'model', 'trim', 'body', 'odometer', 'condition', 'mmr']]
y = df['sellingprice']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

X_train_clean = X_train.dropna()
y_train_clean = y_train[X_train_clean.index]

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor

if y_train.isnull().any():
    print("NaN values found in y_train.")
    valid_indices = y_train.notnull()
    X_train = X_train[valid_indices]
    y_train = y_train[valid_indices]

if y_train.isnull().any():
    print("NaN values still exist in y_train after handling.")
else:
    print("No NaN values in y_train. Proceeding with model training.")
    
# Best parameters obtained from RandomizedSearchCV (you may want to optimize these)
best_params_rf = {
    'n_estimators': 150,
    'max_depth': 10,
    'min_samples_split': 2,
    'min_samples_leaf': 1,
    'max_features': 'sqrt',  # Changed from 'auto' to 'sqrt'
    'random_state': 42,
    'n_jobs': -1  # Use all available cores
}
best_rf_model = RandomForestRegressor(**best_params_rf)
best_rf_model.fit(X_train, y_train)
y_pred = best_rf_model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred) ** 0.5
print(f"Random Forest RMSE: {rmse}")


In [None]:
import joblib
joblib.dump(best_rf_model, 'car_price_predictor.pkl')
loaded_model = joblib.load('car_price_predictor.pkl')


In [None]:
def get_car_parameters():
    year = int(input("Enter the car's year: "))
    make = input("Enter the car's make (e.g., BMW): ")
    model = input("Enter the car's model (e.g., 3 Series): ")
    trim = input("Enter the car's trim (e.g., 328i SULEV): ")
    body = input("Enter the car's body type (e.g., Sedan): ")
    while True:
        try:
            odometer = int(input("Enter the car's odometer reading (e.g., 60000): "))
            break  # Exit the loop if input is valid
        except ValueError:
            print("Invalid input. Please enter a numeric value for the odometer reading.")
    
    while True:
        try:
            condition = float(input("Enter the car's condition (on a scale of 1-5, e.g., 4.5): "))
            if 1 <= condition <= 5:  # Check that the condition is within the valid range
                break
            else:
                print("Condition must be between 1 and 5.")
        except ValueError:
            print("Invalid input. Please enter a numeric value for the condition.")

    while True:
        try:
            mmr = int(input("Enter the car's MMR (e.g., 18000): "))
            break  # Exit the loop if input is valid
        except ValueError:
            print("Invalid input. Please enter a numeric value for the MMR.")
    car_parameters = {
        'year': year,
        'make': make,
        'model': model,
        'trim': trim,
        'body': body,
        'odometer': odometer,
        'condition': condition,
        'mmr': mmr
    }
    
    return car_parameters

# Function to safely transform user input using label encoders
def safe_transform(column, value, encoder):
    try:
        # Try to transform the value using the encoder
        return encoder.transform([value])[0]
    except ValueError:
        print(f"Error: {value} is not recognized for {column}. Please input a valid value.")
        return None

# Load the trained Random Forest model
loaded_model = joblib.load('car_price_predictor.pkl')

# Function to predict resale price
def predict_resale_price(car_parameters, label_encoders):
    # Transform the categorical inputs using the label encoders
    make_encoded = safe_transform('make', car_parameters['make'], label_encoders['make'])
    model_encoded = safe_transform('model', car_parameters['model'], label_encoders['model'])
    trim_encoded = safe_transform('trim', car_parameters['trim'], label_encoders['trim'])
    body_encoded = safe_transform('body', car_parameters['body'], label_encoders['body'])

    # Check if any of the transformed values is None (due to invalid user input)
    if None in [make_encoded, model_encoded, trim_encoded, body_encoded]:
        print("One or more inputs were invalid. Please try again with valid values.")
        return None

    # Creating a DataFrame for the user inputs
    new_data = pd.DataFrame({
        'year': [car_parameters['year']],
        'make': [make_encoded],
        'model': [model_encoded],
        'trim': [trim_encoded],
        'body': [body_encoded],
        'odometer': [car_parameters['odometer']],
        'condition': [car_parameters['condition']],
        'mmr': [car_parameters['mmr']]
    })

    # Predicting the resale price
    predicted_price = loaded_model.predict(new_data)

    # Adjusting the price by multiplying by 0.4
    adjusted_price = predicted_price[0] * 0.4

    return adjusted_price

# Main execution
if __name__ == "__main__":
    # Assume label_encoders is a dictionary containing the fitted LabelEncoders for the categorical columns
    car_parameters = get_car_parameters()
    adjusted_price = predict_resale_price(car_parameters, label_encoders)
    
    if adjusted_price is not None:
        print(f'Adjusted Predicted Resale Price: {adjusted_price}')