In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout


file_path = '/mnt/data/ProjectTrainingDataFinalFINALCONFIRM.xlsx'
data = pd.ExcelFile('/content/drive/MyDrive/ProjectTrainingDataFinalFINALCONFIRM.xlsx')
df = data.parse(data.sheet_names[0])

def clean_data(df):

    df.columns = df.columns.str.lower().str.replace(' ', '_')


    if 'se/le' in df.columns:
        df['se/le'] = df['se/le'].str.upper().map({'YES': 1, 'NO': 0}).fillna(0)


    columns_to_drop = ['year_of_upload']
    cleaned_data = df.drop(columns=[col for col in columns_to_drop if col in df.columns])

    return cleaned_data


cleaned_data = clean_data(df)

#Define categorical and numerical features
categorical_features = ['car_name', 'fuel_type', 'transmission']
numerical_features = ['model_year', 'se/le']

# One-hot encode categorical features
encoder = OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore')
encoded_features = encoder.fit_transform(cleaned_data[categorical_features])

# Combine encoded features with numerical features
X = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(categorical_features))
X = pd.concat([cleaned_data[numerical_features].reset_index(drop=True), X], axis=1)
y = cleaned_data['car_price']

# Standardize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the TensorFlow model
model = Sequential([
    Dense(128, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, verbose=1)

# Evaluate the model
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Mean Absolute Error: {mae:.2f}")

# Implement Gradient Boosting for comparison
gbr = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, max_depth=5, random_state=42)
gbr.fit(X_train, y_train)
gbr_predictions = gbr.predict(X_test)
gbr_mae = mean_absolute_error(y_test, gbr_predictions)
print(f"Gradient Boosting Regressor MAE: {gbr_mae:.2f}")

# Cross-validation for robustness
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cross_val_errors = []
for train_index, val_index in kf.split(X_scaled):
    X_train_cv, X_val_cv = X_scaled[train_index], X_scaled[val_index]
    y_train_cv, y_val_cv = y[train_index], y[val_index]

    model.fit(X_train_cv, y_train_cv, epochs=50, batch_size=32, verbose=0)
    val_loss, val_mae = model.evaluate(X_val_cv, y_val_cv, verbose=0)
    cross_val_errors.append(val_mae)

print(f"Cross-validated MAE: {np.mean(cross_val_errors):.2f}")

# Function to predict custom configurations
def predict_specific_price(car_name, model_year, fuel_type, transmission, se_le, predicted_year):
    # Prepare the input data
    custom_data = pd.DataFrame({
        'car_name': [car_name],
        'model_year': [model_year],
        'fuel_type': [fuel_type],
        'transmission': [transmission],
        'se/le': [se_le]
    })

    # Encode and standardize the input data
    custom_encoded = encoder.transform(custom_data[categorical_features])
    custom_data_transformed = pd.DataFrame(custom_encoded, columns=encoder.get_feature_names_out(categorical_features))
    custom_data_transformed = pd.concat([custom_data[['model_year', 'se/le']].reset_index(drop=True), custom_data_transformed], axis=1)
    custom_data_scaled = scaler.transform(custom_data_transformed)

    # Predict the price
    predicted_price = model.predict(custom_data_scaled).flatten()[0]
    return predicted_price, predicted_year

# Example inflation rate function
def get_inflation_rate(year):
    # Define a dictionary or function with historical/forecasted inflation rates
    inflation_rates = {
        2023: 0.03,  # Example rates: 3% inflation for 2023
        2024: 0.025,
        2025: 0.02,
    }
    # Default to the last known rate if the year is not in the dictionary
    return inflation_rates.get(year, inflation_rates[max(inflation_rates)])

# Adjust the predict_specific_price function to include inflation
def predict_specific_price(car_name, model_year, fuel_type, transmission, se_le, predicted_year):
    # Prepare the input data
    custom_data = pd.DataFrame({
        'car_name': [car_name],
        'model_year': [model_year],
        'fuel_type': [fuel_type],
        'transmission': [transmission],
        'se/le': [se_le]
    })

    # Encode and standardize the input data
    custom_encoded = encoder.transform(custom_data[categorical_features])
    custom_data_transformed = pd.DataFrame(custom_encoded, columns=encoder.get_feature_names_out(categorical_features))
    custom_data_transformed = pd.concat([custom_data[['model_year', 'se/le']].reset_index(drop=True), custom_data_transformed], axis=1)
    custom_data_scaled = scaler.transform(custom_data_transformed)

    # Predict the price for the current year
    predicted_price = model.predict(custom_data_scaled).flatten()[0]

    # Adjust for inflation to the predicted year
    current_year = 2023  # Set to the current year of prediction
    adjusted_price = predicted_price
    for year in range(current_year + 1, predicted_year + 1):
        adjusted_price *= (1 + get_inflation_rate(year))

    return adjusted_price, predicted_year

# Example usage: Predict price for a specific car with inflation adjustment
car_name = input("Car name: ")
model_year = int(input("Model year: "))
fuel_type = input("Fuel type: ")
transmission = input("Car transmission: ")
se_le = int(input("Is the car a SE/LE (1 for Yes, 0 for No): "))
predicted_year = int(input("Which year do you want to predict: "))

predicted_price, year = predict_specific_price(car_name, model_year, fuel_type, transmission, se_le, predicted_year)
print(f"\nPredicted price for {car_name} (Model Year: {model_year}) in {year}: {predicted_price:.2f}")

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - loss: 647.4113 - mean_absolute_error: 22.8002 - val_loss: 622.2076 - val_mean_absolute_error: 21.9174
Epoch 2/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 527.2830 - mean_absolute_error: 20.2038 - val_loss: 451.3147 - val_mean_absolute_error: 17.8410
Epoch 3/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 372.0497 - mean_absolute_error: 16.1496 - val_loss: 197.1050 - val_mean_absolute_error: 10.1706
Epoch 4/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 149.9955 - mean_absolute_error: 9.1134 - val_loss: 86.0286 - val_mean_absolute_error: 6.8153
Epoch 5/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 98.2421 - mean_absolute_error: 7.0348 - val_loss: 66.7188 - val_mean_absolute_error: 5.8774
Epoch 6/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step

Predicted price for Toyota Corolla 2.0D (Model Year: 2004) in 2024: 11.06
