In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, r2_score

# Load the dataset
df = pd.read_csv('/content/car.csv')
df.dropna(inplace=True)

# Step 1: Calculate car age from 'Year' and drop 'Car_Name'
df['Car_Age'] = 2024 - df['Year']
df.drop(['Car_Name', 'Year'], axis=1, inplace=True)

# Step 2: Encoding categorical variables
categorical_columns = ['Fuel_Type', 'Seller_Type', 'Transmission']

# Use OneHotEncoder to encode categorical columns
encoder = OneHotEncoder(drop='first', sparse_output=False)
encoded_data = encoder.fit_transform(df[categorical_columns])
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(categorical_columns))

# Step 3: Merge encoded data with the original dataframe (after dropping original categorical columns)
df_final = pd.concat([df.drop(categorical_columns, axis=1), encoded_df], axis=1)

# Step 4: Split data into features (X) and target (y)
X = df_final.drop('Selling_Price', axis=1)
y = df_final['Selling_Price']

# Step 5: Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Initialize the models
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Decision Tree': DecisionTreeRegressor(random_state=42)
}

# Step 7: Train and evaluate each model
results = {}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[model_name] = {'MAE': mae, 'R-squared': r2}

# Step 8: Display results
for model_name, metrics in results.items():
    print(f"{model_name} - MAE: {metrics['MAE']}, R-squared: {metrics['R-squared']}")

# Optional: Predict selling price for a new car (user input)
def predict_selling_price(model, present_price, kms_driven, fuel_type, seller_type, transmission, owner, car_age):
    # Input preprocessing
    input_data = pd.DataFrame([[present_price, kms_driven, owner, car_age,
                                fuel_type == 'Diesel', fuel_type == 'Petrol',
                                seller_type == 'Individual', transmission == 'Manual']],
                              columns=X.columns)

    # Predict the selling price
    predicted_price = model.predict(input_data)
    return predicted_price[0]

# Example usage with the Random Forest model
predicted_price = predict_selling_price(models['Random Forest'], 6.0, 50000, 'Petrol', 'Dealer', 'Manual', 0, 1)
print(f"Predicted Selling Price (Random Forest): {predicted_price}")


Linear Regression - MAE: 1.2162256821297004, R-squared: 0.848981302489908
Random Forest - MAE: 0.63872131147541, R-squared: 0.9599938850484411
Decision Tree - MAE: 0.8624590163934425, R-squared: 0.910720950876861
Predicted Selling Price (Random Forest): 4.338499999999999
