In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures

In [None]:


# Load the dataset
df = pd.read_csv("C:\\Users\\Padraig\\Desktop\\Car_Project\\Feature_Encoding\\Features_included.csv")

df['Wear'].fillna(0, inplace=True)

# Feature Selection
features = ['Make','Model','Year','Mileage','Fuel Type','Engine Size (Litres)','Current Country of Reg.','Imported', 'Depreciation','Wear']
target = 'Price'

# Data Transformation
le = LabelEncoder()
df['Make'] = le.fit_transform(df['Make'])
df['Model'] = le.fit_transform(df['Model'])
df['Fuel Type'] = le.fit_transform(df['Fuel Type'])
df['Current Country of Reg.'] = le.fit_transform(df['Current Country of Reg.'])
df['Imported'] = le.fit_transform(df['Imported'])

X = df[features]
y = df[target]

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=41)

# Model Building
lr = LinearRegression()

# Hyperparameter Tuning
param_grid = {
    'fit_intercept': [True, False],
    'copy_X': [True, False],
    'n_jobs': [-1],
}

grid_search = GridSearchCV(lr, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Model Evaluation
y_pred = grid_search.best_estimator_.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Best RMSE: {rmse}")
print(f"Best Hyperparameters: {grid_search.best_params_}")

# Model Deployment
# Once you're satisfied with the model's performance, you can use it to make predictions on new data

In [None]:
# Create a random forest regression model
rf = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model on the training data
rf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = rf.predict(X_test)

# Evaluate the model using RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Evaluate the model using R-squared
r2 = r2_score(y_test, y_pred)
print(f"R-squared: {r2}")

In [None]:
# Load the dataset
df = pd.read_csv("C:\\Users\\Padraig\\Desktop\\Car_Project\\Feature_Encoding\\Features_included.csv")

df['Wear'].fillna(0, inplace=True)

# Feature Selection
features = ['Make','Model','Year','Mileage','Fuel Type','Engine Size (Litres)','Current Country of Reg.','Imported', 'Depreciation','Wear']
target = 'Price'

# Data Transformation
le = LabelEncoder()
df['Make'] = le.fit_transform(df['Make'])
df['Model'] = le.fit_transform(df['Model'])
df['Fuel Type'] = le.fit_transform(df['Fuel Type'])
df['Current Country of Reg.'] = le.fit_transform(df['Current Country of Reg.'])
df['Imported'] = le.fit_transform(df['Imported'])

X = df[features]
y = df[target]

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Polynomial Regression Model
poly = PolynomialFeatures(degree=3)
X_poly = poly.fit_transform(X_train)
poly.fit(X_poly, y_train)

# Train the model
lr = LinearRegression()
lr.fit(X_poly, y_train)

# Predict new results
y_pred = lr.predict(poly.fit_transform(X_test))

# Model Evaluation
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"RMSE: {rmse}")
print(f"R-squared: {r2}")

In [None]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the data to DMatrix format required by XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Define the XGBoost parameters
params = {
    'objective': 'reg:squarederror',
    'colsample_bytree': 0.8,
    'learning_rate': 0.1,
    'max_depth': 5,
    'alpha': 10,
    'n_estimators': 1000,
    'eval_metric': 'rmse'
}

# Train the XGBoost model
model = xgb.train(params, dtrain, num_boost_round=1000, evals=[(dtest, 'test')], early_stopping_rounds=50)

# Make predictions using the trained model
y_pred = model.predict(dtest)

# Evaluate the model using RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Evaluate the model using R-squared
r2 = r2_score(y_test, y_pred)
print(f"R-squared: {r2}")

In [None]:
# Create a polynomial features object
poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

# Transform the training data
X_poly_train = poly.fit_transform(X_train)

# Transform the testing data
X_poly_test = poly.transform(X_test)

# Train a linear regression model on the polynomial features
lr = LinearRegression()
lr.fit(X_poly_train, y_train)

# Make predictions on the testing data
y_pred = lr.predict(X_poly_test)

# Evaluate the model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Evaluate the model using R-squared
r2 = r2_score(y_test, y_pred)
print(f"R-squared: {r2}")