In [1]:
import warnings
from sklearn.exceptions import ConvergenceWarning

# Suppress convergence warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Your existing code
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from skopt import BayesSearchCV

df = pd.read_csv('Effort estimation data set.csv')

X = df.drop(columns=['Effort (Actual)'])
y = df['Effort (Actual)']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

mlp = MLPRegressor()
mlp.fit(X_train, y_train)
mlp_pred = mlp.predict(X_test)

svr = SVR()
svr.fit(X_train, y_train)
svr_pred = svr.predict(X_test)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svr_sigmoid = SVR(kernel='sigmoid')
svr_sigmoid.fit(X_train_scaled, y_train)
svr_sigmoid_pred = svr_sigmoid.predict(X_test_scaled)

svr_poly = SVR(kernel='poly')
svr_poly.fit(X_train_scaled, y_train)
svr_poly_pred = svr_poly.predict(X_test_scaled)

svr_rbf = SVR(kernel='rbf')
svr_rbf.fit(X_train_scaled, y_train)
svr_rbf_pred = svr_rbf.predict(X_test_scaled)

def normalized_mean_squared_error(y_true, y_pred):
    return mean_squared_error(y_true, y_pred) / np.var(y_true)

def evaluate_performance(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    nmse = normalized_mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mae, mse, mape, nmse, r2

models = [lr, dt, mlp, svr, svr_sigmoid, svr_poly, svr_rbf]
predictions = [lr_pred, dt_pred, mlp_pred, svr_pred, svr_sigmoid_pred, svr_poly_pred, svr_rbf_pred]
model_names = ['Linear Regression', 'Decision Tree Regressor', 'MLP Regressor', 'SVR', 'SMO with Sigmoid Kernel', 'SMO with polynomial Kernel', 'SMO with RBF Kernel']

results = []
for model, pred, name in zip(models, predictions, model_names):
    mae, mse, mape, nmse, r2 = evaluate_performance(y_test, pred)
    results.append([name, mae, mse, mape, nmse, r2])

df_results = pd.DataFrame(results, columns=["Model", "Mean Absolute Error", "Mean Squared Error", "Mean Absolute Percentage Error", "Normalized Mean Squared Error", "R^2 Score"])
df_results


Unnamed: 0,Model,Mean Absolute Error,Mean Squared Error,Mean Absolute Percentage Error,Normalized Mean Squared Error,R^2 Score
0,Linear Regression,70.083731,7611.219923,0.657789,0.077788,0.922212
1,Decision Tree Regressor,58.518,8443.495887,0.257034,0.086294,0.913706
2,MLP Regressor,83.241591,13518.711214,0.243468,0.138164,0.861836
3,SVR,235.43172,94979.271884,3.728616,0.970705,0.029295
4,SMO with Sigmoid Kernel,227.201465,88278.464248,3.622396,0.902221,0.097779
5,SMO with polynomial Kernel,161.270747,34556.725266,2.677129,0.353176,0.646824
6,SMO with RBF Kernel,235.167226,94876.092943,3.721497,0.96965,0.03035


In [2]:
# Get the parameters and default values for each model
model_params = {
    'Linear Regression': lr.get_params(),
    'Decision Tree Regressor': dt.get_params(),
    'MLP Regressor': mlp.get_params(),
    'SVR': svr.get_params(),
    'SMO with Sigmoid Kernel': svr_sigmoid.get_params(),
    'SMO with polynomial Kernel': svr_poly.get_params(),
    'SMO with RBF Kernel': svr_rbf.get_params(),
}

# Display the parameters and default values for each model
for model_name, params in model_params.items():
    print(f"Model: {model_name}")
    for param_name, default_value in params.items():
        print(f"  {param_name}: {default_value}")
    print()

Model: Linear Regression
  copy_X: True
  fit_intercept: True
  n_jobs: None
  positive: False

Model: Decision Tree Regressor
  ccp_alpha: 0.0
  criterion: squared_error
  max_depth: None
  max_features: None
  max_leaf_nodes: None
  min_impurity_decrease: 0.0
  min_samples_leaf: 1
  min_samples_split: 2
  min_weight_fraction_leaf: 0.0
  random_state: None
  splitter: best

Model: MLP Regressor
  activation: relu
  alpha: 0.0001
  batch_size: auto
  beta_1: 0.9
  beta_2: 0.999
  early_stopping: False
  epsilon: 1e-08
  hidden_layer_sizes: (100,)
  learning_rate: constant
  learning_rate_init: 0.001
  max_fun: 15000
  max_iter: 200
  momentum: 0.9
  n_iter_no_change: 10
  nesterovs_momentum: True
  power_t: 0.5
  random_state: None
  shuffle: True
  solver: adam
  tol: 0.0001
  validation_fraction: 0.1
  verbose: False
  warm_start: False

Model: SVR
  C: 1.0
  cache_size: 200
  coef0: 0.0
  degree: 3
  epsilon: 0.1
  gamma: scale
  kernel: rbf
  max_iter: -1
  shrinking: True
  tol: 0

In [3]:
# LINEAR REGRESSION


# Suppress specific warnings
warnings.filterwarnings("ignore", message="The objective has been evaluated at this point before.", category=UserWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Define the parameter grid for Grid Search
param_grid_lr = {
    'fit_intercept': [True, False],
    'copy_X': [True, False],
    'positive': [True, False]
}

# Perform Grid Search for Linear Regression
grid_search_lr = GridSearchCV(LinearRegression(), param_grid_lr, cv=8, scoring='neg_mean_squared_error')
grid_search_lr.fit(X_train_scaled, y_train)

# Get the best parameters and best score for Grid Search
best_params_grid_lr = grid_search_lr.best_params_
best_score_grid_lr = grid_search_lr.best_score_

# Train Linear Regression with the best parameters from Grid Search
best_lr_grid = LinearRegression(**best_params_grid_lr)
best_lr_grid.fit(X_train_scaled, y_train)

# Make predictions with the best Linear Regression model from Grid Search
best_lr_pred_grid = best_lr_grid.predict(X_test_scaled)

# Define a function to evaluate performance metrics
def normalized_mean_squared_error(y_true, y_pred):
    return mean_squared_error(y_true, y_pred) / np.var(y_true)

def evaluate_performance(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    nmse = normalized_mean_squared_error(y_true, y_pred)
    return mae, mse, mape, r2, nmse

# Evaluate performance metrics for the best Linear Regression model from Grid Search
mae_grid, mse_grid, mape_grid, r2_grid, nmse_grid = evaluate_performance(y_test, best_lr_pred_grid)

# Print the model name
print("Model: Linear Regression")

# Print the best parameters set by grid search
print("  Best Parameters:")
for param, value in best_params_grid_lr.items():
    print(f"    {param}: {value}")

# Print the performance metrics
print(f"  Normalized Mean Squared Error: {nmse_grid}")
print(f"  Mean Absolute Error: {mae_grid}")
print(f"  Mean Squared Error: {mse_grid}")
print(f"  Mean Absolute Percentage Error: {mape_grid}")
print(f"  R^2 Score: {r2_grid}")


Model: Linear Regression
  Best Parameters:
    copy_X: True
    fit_intercept: True
    positive: False
  Normalized Mean Squared Error: 0.07778798316065162
  Mean Absolute Error: 70.08373079843935
  Mean Squared Error: 7611.219922889723
  Mean Absolute Percentage Error: 0.6577887118231351
  R^2 Score: 0.9222120168393484


In [None]:
# DECISION TREE

# Suppress specific warnings
warnings.filterwarnings("ignore", message="The objective has been evaluated at this point before.", category=UserWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Define the parameter grid for Grid Search for Decision Tree Regressor with a wider range of values
param_grid_dt = {
    'max_depth': range(1, 10),  # Set a wider range from 1 to 20 for max_depth
    'max_leaf_nodes': range(2, 10),  # Set a wider range from 2 to 100 for max_leaf_nodes
    'min_samples_leaf': range(1, 6),  # Set a wider range from 1 to 10 for min_samples_leaf
    'min_samples_split': range(2, 6),  # Set a wider range from 2 to 20 for min_samples_split
    "criterion": ["poisson", "absolute_error", "squared_error", "friedman_mse"]
}

# Perform Grid Search for Decision Tree Regressor
grid_search_dt = GridSearchCV(DecisionTreeRegressor(), param_grid_dt, cv=8, scoring='neg_mean_squared_error')
grid_search_dt.fit(X_train_scaled, y_train)

# Get the best parameters and best score for Grid Search
best_params_grid_dt = grid_search_dt.best_params_
best_score_grid_dt = grid_search_dt.best_score_

# Train Decision Tree Regressor with the best parameters from Grid Search
best_dt_grid = DecisionTreeRegressor(**best_params_grid_dt)
best_dt_grid.fit(X_train_scaled, y_train)

# Make predictions with the best Decision Tree Regressor model from Grid Search
best_dt_pred_grid = best_dt_grid.predict(X_test_scaled)

# Evaluate performance metrics for the best Linear Regression model from Grid Search
mae_grid, mse_grid, mape_grid, r2_grid, nmse_grid = evaluate_performance(y_test, best_dt_pred_grid)

# Print the model name
print("Model: Decision Tree Regressor")

# Print the best parameters set by grid search
print("  Best Parameters:")
for param, value in best_params_grid_dt.items():
    print(f"    {param}: {value}")

# Print the performance metrics
print(f"  Normalized Mean Squared Error: {nmse_grid}")
print(f"  Mean Absolute Error: {mae_grid}")
print(f"  Mean Squared Error: {mse_grid}")
print(f"  Mean Absolute Percentage Error: {mape_grid}")
print(f"  R^2 Score: {r2_grid}")

In [None]:
# MLP

# Suppress specific warnings
warnings.filterwarnings("ignore", message="The objective has been evaluated at this point before.", category=UserWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", message="overflow encountered in square")
warnings.filterwarnings("ignore", message="overflow encountered in matmul")
warnings.filterwarnings("ignore", message="invalid value encountered in matmul")

# Define the parameter grid for Grid Search for MLP Regressor
param_grid_mlp = {
    'hidden_layer_sizes': [(100,), (50, 50), (100, 50, 20)],
    'activation': ['relu', 'tanh'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.001, 0.01, 0.1]
}

# Perform Grid Search for MLP Regressor
grid_search_mlp = GridSearchCV(MLPRegressor(), param_grid_mlp, cv=8, scoring='neg_mean_squared_error')
grid_search_mlp.fit(X_train_scaled, y_train)

# Get the best parameters and best score for Grid Search
best_params_grid_mlp = grid_search_mlp.best_params_
best_score_grid_mlp = grid_search_mlp.best_score_

# Train MLP Regressor with the best parameters from Grid Search
best_mlp_grid = MLPRegressor(**best_params_grid_mlp)
best_mlp_grid.fit(X_train_scaled, y_train)

# Make predictions with the best MLP Regressor model from Grid Search
best_mlp_pred_grid = best_mlp_grid.predict(X_test_scaled)

# Evaluate performance metrics for the best MLP Regressor model from Grid Search
mae_grid_mlp, mse_grid_mlp, mape_grid_mlp, r2_grid_mlp, nmse_grid_mlp = evaluate_performance(y_test, best_mlp_pred_grid)
# Print the model name
print("Model: MLP Regressor")

# Print the best parameters set by grid search
print("  Best Parameters:")
for param, value in best_params_grid_mlp.items():
    print(f"    {param}: {value}")

# Print the performance metrics
print(f"  Normalized Mean Squared Error: {nmse_grid_mlp}")
print(f"  Mean Absolute Error: {mae_grid_mlp}")
print(f"  Mean Squared Error: {mse_grid_mlp}")
print(f"  Mean Absolute Percentage Error: {mape_grid_mlp}")
print(f"  R^2 Score: {r2_grid_mlp}")


In [None]:
# SVR


# Suppress specific warnings
warnings.filterwarnings("ignore", message="The objective has been evaluated at this point before.", category=UserWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", message="overflow encountered in square")
warnings.filterwarnings("ignore", message="overflow encountered in matmul")
warnings.filterwarnings("ignore", message="invalid value encountered in matmul")


# Define the parameter grid for Grid Search for SVR
param_grid_svr = {
    'C': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0],  # Expanded range for C
    'epsilon': [0.001, 0.01, 0.1, 0.5, 1.0],  # Expanded range for epsilon
    'kernel': ['linear', 'rbf', 'poly'],  # Include 'poly' kernel
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1.0],  # Expanded range for gamma and additional values
    'degree': [2, 3, 4],  # Include 'degree' parameter for polynomial kernel
    'coef0': [0.0, 0.1, 0.5, 1.0],  # Include 'coef0' parameter for polynomial and sigmoid kernels
}

# Perform Grid Search for SVR
grid_search_svr = GridSearchCV(SVR(), param_grid_svr, cv=5, scoring='neg_mean_squared_error')
grid_search_svr.fit(X_train_scaled, y_train)

# Get the best parameters and best score for Grid Search with SVR
best_params_grid_svr = grid_search_svr.best_params_
best_score_grid_svr = grid_search_svr.best_score_

# Train SVR with the best parameters from Grid Search
best_svr_grid = SVR(**best_params_grid_svr)
best_svr_grid.fit(X_train_scaled, y_train)

# Make predictions with the best SVR model from Grid Search
best_svr_pred_grid = best_svr_grid.predict(X_test_scaled)

# Evaluate performance metrics for the best SVR model from Grid Search
mae_grid_svr, mse_grid_svr, mape_grid_svr, r2_grid_svr, nmse_grid_svr = evaluate_performance(y_test, best_svr_pred_grid)

# Print the model name
print("Model: SVR")

# Print the best parameters set by grid search
print("  Best Parameters:")
for param, value in best_params_grid_svr.items():
    print(f"    {param}: {value}")

# Print the performance metrics including NMSE
print(f"  Normalized Mean Squared Error: {nmse_grid_svr}")
print(f"  Mean Absolute Error: {mae_grid_svr}")
print(f"  Mean Squared Error: {mse_grid_svr}")
print(f"  Mean Absolute Percentage Error: {mape_grid_svr}")
print(f"  R^2 Score: {r2_grid_svr}")


In [None]:
# SMO WITH SIGMOID KERNEL


# Suppress specific warnings
warnings.filterwarnings("ignore", message="The objective has been evaluated at this point before.", category=UserWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Define the parameter grid for Grid Search for SMO with Sigmoid Kernel
param_grid_smo_sigmoid = {
    'C': [0.1, 1.0, 10.0],  # Regularization parameter
    'coef0': [0.0, 0.1, 0.5],  # Constant term of the sigmoid kernel
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1.0],  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
    'tol': [0.0001, 0.001, 0.01],  # Tolerance for stopping criterion
    'max_iter': [-1, 100, 1000]  # Maximum number of iterations
}

# Perform Grid Search for SMO with Sigmoid Kernel
grid_search_smo_sigmoid = GridSearchCV(SVR(kernel='sigmoid'), param_grid_smo_sigmoid, cv=5, scoring='neg_mean_squared_error')
grid_search_smo_sigmoid.fit(X_train_scaled, y_train)

# Get the best parameters and best score for Grid Search with SMO with Sigmoid Kernel
best_params_grid_smo_sigmoid = grid_search_smo_sigmoid.best_params_
best_score_grid_smo_sigmoid = grid_search_smo_sigmoid.best_score_

# Train SMO with Sigmoid Kernel with the best parameters from Grid Search
best_smo_sigmoid_grid = SVR(kernel='sigmoid', **best_params_grid_smo_sigmoid)
best_smo_sigmoid_grid.fit(X_train_scaled, y_train)

# Make predictions with the best SMO with Sigmoid Kernel model from Grid Search
best_smo_sigmoid_pred_grid = best_smo_sigmoid_grid.predict(X_test_scaled)

# Evaluate performance metrics for the best SMO with Sigmoid Kernel model from Grid Search
mae_grid_smo_sigmoid, mse_grid_smo_sigmoid, mape_grid_smo_sigmoid, r2_grid_smo_sigmoid, nmse_grid_smo_sigmoid = evaluate_performance(y_test, best_smo_sigmoid_pred_grid)

# Print the model name
print("Model: SMO with Sigmoid Kernel")

# Print the best parameters set by grid search
print("  Best Parameters:")
for param, value in best_params_grid_smo_sigmoid.items():
    print(f"    {param}: {value}")

# Print the performance metrics including NMSE
print(f"  Normalized Mean Squared Error: {nmse_grid_smo_sigmoid}")
print(f"  Mean Absolute Error: {mae_grid_smo_sigmoid}")
print(f"  Mean Squared Error: {mse_grid_smo_sigmoid}")
print(f"  Mean Absolute Percentage Error: {mape_grid_smo_sigmoid}")
print(f"  R^2 Score: {r2_grid_smo_sigmoid}")


In [None]:
# SMO WITH POLYNOMIAL KERNEL

# Suppress specific warnings
warnings.filterwarnings("ignore", message="The objective has been evaluated at this point before.", category=UserWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Define the parameter grid for Grid Search for SMO with Polynomial Kernel
param_grid_smo_poly = {
    'C': [0.1, 1.0, 10.0],  # Regularization parameter
    'coef0': [0.0, 0.1, 0.5],  # Constant term of the polynomial kernel
    'degree': [2, 3, 4],  # Degree of the polynomial kernel function
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1.0],  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
    'tol': [0.0001, 0.001, 0.01],  # Tolerance for stopping criterion
    'max_iter': [-1, 100, 1000]  # Maximum number of iterations
}

# Perform Grid Search for SMO with Polynomial Kernel
grid_search_smo_poly = GridSearchCV(SVR(kernel='poly'), param_grid_smo_poly, cv=5, scoring='neg_mean_squared_error')
grid_search_smo_poly.fit(X_train_scaled, y_train)

# Get the best parameters and best score for Grid Search with SMO with Polynomial Kernel
best_params_grid_smo_poly = grid_search_smo_poly.best_params_
best_score_grid_smo_poly = grid_search_smo_poly.best_score_

# Train SMO with Polynomial Kernel with the best parameters from Grid Search
best_smo_poly_grid = SVR(kernel='poly', **best_params_grid_smo_poly)
best_smo_poly_grid.fit(X_train_scaled, y_train)

# Make predictions with the best SMO with Polynomial Kernel model from Grid Search
best_smo_poly_pred_grid = best_smo_poly_grid.predict(X_test_scaled)

# Evaluate performance metrics for the best SMO with Polynomial Kernel model from Grid Search
mae_grid_smo_poly, mse_grid_smo_poly, mape_grid_smo_poly, r2_grid_smo_poly, nmse_grid_smo_poly = evaluate_performance(y_test, best_smo_poly_pred_grid)

# Print the model name
print("Model: SMO with Polynomial Kernel")

# Print the best parameters set by grid search
print("  Best Parameters:")
for param, value in best_params_grid_smo_poly.items():
    print(f"    {param}: {value}")

# Print the performance metrics including NMSE
print(f"  Normalized Mean Squared Error: {nmse_grid_smo_poly}")
print(f"  Mean Absolute Error: {mae_grid_smo_poly}")
print(f"  Mean Squared Error: {mse_grid_smo_poly}")
print(f"  Mean Absolute Percentage Error: {mape_grid_smo_poly}")
print(f"  R^2 Score: {r2_grid_smo_poly}")


In [None]:
# SMO WITH RBF KERNEL

# Suppress specific warnings
warnings.filterwarnings("ignore", message="The objective has been evaluated at this point before.", category=UserWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Define the parameter grid for Grid Search for SMO with RBF Kernel
param_grid_smo_rbf = {
    'C': [0.1, 1.0, 10.0],  # Regularization parameter
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1.0],  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
    'epsilon': [0.01, 0.1, 0.5],  # Epsilon in the epsilon-SVR model
    'tol': [0.0001, 0.001, 0.01],  # Tolerance for stopping criterion
    'max_iter': [-1, 100, 1000]  # Maximum number of iterations
}

# Perform Grid Search for SMO with RBF Kernel
grid_search_smo_rbf = GridSearchCV(SVR(kernel='rbf'), param_grid_smo_rbf, cv=5, scoring='neg_mean_squared_error')
grid_search_smo_rbf.fit(X_train_scaled, y_train)

# Get the best parameters and best score for Grid Search with SMO with RBF Kernel
best_params_grid_smo_rbf = grid_search_smo_rbf.best_params_
best_score_grid_smo_rbf = grid_search_smo_rbf.best_score_

# Train SMO with RBF Kernel with the best parameters from Grid Search
best_smo_rbf_grid = SVR(kernel='rbf', **best_params_grid_smo_rbf)
best_smo_rbf_grid.fit(X_train_scaled, y_train)

# Make predictions with the best SMO with RBF Kernel model from Grid Search
best_smo_rbf_pred_grid = best_smo_rbf_grid.predict(X_test_scaled)

# Evaluate performance metrics for the best SMO with RBF Kernel model from Grid Search
mae_grid_smo_rbf, mse_grid_smo_rbf, mape_grid_smo_rbf, r2_grid_smo_rbf, nmse_grid_smo_rbf = evaluate_performance(y_test, best_smo_rbf_pred_grid)

# Print the model name
print("Model: SMO with RBF Kernel")

# Print the best parameters set by grid search
print("  Best Parameters:")
for param, value in best_params_grid_smo_rbf.items():
    print(f"    {param}: {value}")

# Print the performance metrics including NMSE
print(f"  Normalized Mean Squared Error: {nmse_grid_smo_rbf}")
print(f"  Mean Absolute Error: {mae_grid_smo_rbf}")
print(f"  Mean Squared Error: {mse_grid_smo_rbf}")
print(f"  Mean Absolute Percentage Error: {mape_grid_smo_rbf}")
print(f"  R^2 Score: {r2_grid_smo_rbf}")
