In [5]:
import pandas as pd
import numpy as np
df=pd.read_csv('encoded_car_data.csv')

In [6]:
from sklearn.linear_model import LinearRegression
feature=df.drop(['Car_Price'],axis=1)
target=df['Car_Price']

In [7]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.2,random_state=39)

In [8]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [12]:
#linear regression model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.2,random_state=39)
linear_model = LinearRegression()
linear_model.fit(x_train,y_train)
y_pred_test = linear_model.predict(x_test)
y_pred_train=linear_model.predict(x_train)


# 6. Evaluate
mae_test = mean_absolute_error(y_test, y_pred_test)
mae_train=mean_squared_error(y_train,y_pred_train)
mse_test=mean_squared_error(y_test, y_pred_test)
mse_train=mean_squared_error(y_train,y_pred_train)
r2_test=r2_score(y_test, y_pred_test)
r2_train=r2_score(y_train,y_pred_train)

print("📊 Model Evaluation Metrics:")
print(f"MAE_test  : {mae_test:.4f}")
print(f"MSE_test : {mse_test:.4f}")
print(f"R²_test    : {r2_test:.4f}")

print(f"MAE_train  : {mae_train:.4f}")
print(f"MSE_train : {mse_train:.4f}")
print(f"R²_train    : {r2_train:.4f}")



📊 Model Evaluation Metrics:
MAE_test  : 335238.2857
MSE_test : 454121176505.1572
R²_test    : 0.7725
MAE_train  : 356066442427.5655
MSE_train : 356066442427.5655
R²_train    : 0.7546


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load encoded dataset
df = pd.read_csv('encoded_car_data.csv')

# Features & Target
X = df.drop('Car_Price', axis=1)
y = df['Car_Price']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
}

# Train & evaluate each model
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    results.append({
        'Model': name,
        'MAE': round(mae, 2),
        'RMSE': round(rmse, 2),
        'R² Score': round(r2, 4)
    })

# Display results
results_df = pd.DataFrame(results).sort_values(by='R² Score', ascending=False)
print("📊 Model Comparison:")
print(results_df.to_string(index=False))


📊 Model Comparison:
            Model       MAE      RMSE  R² Score
    Decision Tree   2018.50  20993.83    0.9997
    Random Forest   9102.35  30978.12    0.9994
Gradient Boosting 140186.84 244131.00    0.9596
Linear Regression 310371.07 555407.76    0.7911


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load data
df = pd.read_csv('encoded_car_data.csv')
X = df.drop('Car_Price', axis=1)
y = df['Car_Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models and hyperparameters
model_configs = {
    'Decision Tree': {
        'model': DecisionTreeRegressor(random_state=42),
        'params': {
            'max_depth': [5, 10, 20, None],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4]
        }
    },
    'Random Forest': {
        'model': RandomForestRegressor(random_state=42),
        'params': {
            'n_estimators': [100, 200],
            'max_depth': [10, 20, None],
            'min_samples_split': [2, 5],
            'min_samples_leaf': [1, 2]
        }
    },
    'Gradient Boosting': {
        'model': GradientBoostingRegressor(random_state=42),
        'params': {
            'n_estimators': [100, 200],
            'learning_rate': [0.05, 0.1],
            'max_depth': [3, 5],
            'min_samples_split': [2, 5],
            'min_samples_leaf': [1, 2]
        }
    },
    'AdaBoost': {
        'model': AdaBoostRegressor(random_state=42),
        'params': {
            'n_estimators': [50, 100, 200],
            'learning_rate': [0.05, 0.1, 1.0]
        }
    }
}

# Store results
results = []

# Loop over models
for name, config in model_configs.items():
    print(f"\n🔧 Tuning {name}...")
    grid = GridSearchCV(
        config['model'],
        config['params'],
        cv=3,
        scoring='neg_mean_absolute_error',
        n_jobs=-1,
        verbose=0
    )
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    y_pred = best_model.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    results.append({
        'Model': name,
        'Best Params': grid.best_params_,
        'MAE': round(mae, 2),
        'RMSE': round(rmse, 2),
        'R²': round(r2, 4)
    })

# Display results
results_df = pd.DataFrame(results).sort_values(by='R²', ascending=False)
pd.set_option('display.max_colwidth', None)
print("\n📊 Final Comparison:")
print(results_df.to_string(index=False))



🔧 Tuning Decision Tree...

🔧 Tuning Random Forest...

🔧 Tuning Gradient Boosting...

🔧 Tuning AdaBoost...

📊 Final Comparison:
            Model                                                                                                Best Params       MAE      RMSE     R²
    Decision Tree                                         {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}   2018.50  20993.83 0.9997
    Random Forest                    {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}   9356.44  34064.29 0.9992
Gradient Boosting {'learning_rate': 0.1, 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}  61553.80  94139.92 0.9940
         AdaBoost                                                                {'learning_rate': 0.1, 'n_estimators': 100} 295899.95 512076.50 0.8224


In [3]:
import pickle

def save_model(model, filename='best_model.pkl'):
    with open(filename, 'wb') as file:
        pickle.dump(model, file)
    print(f"✅ Model saved as '{filename}'")


In [4]:
# Assuming best_model = grid.best_estimator_ from your Random Forest
save_model(best_model, 'decision_tree_car_price.pkl')

✅ Model saved as 'decision_tree_car_price.pkl'
