#### importing necessary libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

#### DATASET WITH DELETED NULL ROWS

In [2]:
# Read the dataset and drop the 'Effort (Actual)' column
df = pd.read_csv('Effort estimation data set.csv')

# Store the 'Effort (Actual)' column separately
effort_actual_column = df['Effort (Actual)']

# Drop the 'Effort (Actual)' column
df.drop(['Effort (Actual)'], axis=1, inplace=True)

# Calculate the number of cells to delete (10% of total cells)
total_cells = df.size
cells_to_delete = int(0.1 * total_cells)

# Generate random row and column indices to select cells for deletion
indices_to_delete = np.random.choice(df.index, size=cells_to_delete, replace=True)

# Set the selected cells as NaN
for idx in indices_to_delete:
    row_idx, col_idx = np.random.randint(0, df.shape[0]), np.random.randint(0, df.shape[1])
    df.iat[row_idx, col_idx] = np.nan

# Append the 'Effort (Actual)' column to the modified DataFrame
df['Effort (Actual)'] = effort_actual_column

# Display the resulting DataFrame
df

Unnamed: 0,NOA,NEM,NSR,CP2,Effort (Actual)
0,170.0,142.0,97.0,110.55,286.00
1,292.0,409.0,295.0,242.54,396.00
2,929.0,821.0,567.0,446.60,471.00
3,755.0,975.0,723.0,760.96,1016.00
4,,,764.0,1242.60,1261.00
...,...,...,...,...,...
67,94.0,52.0,28.0,49.86,100.85
68,53.0,37.0,,29.26,47.15
69,34.0,23.0,17.0,20.68,44.83
70,110.0,67.0,36.0,58.47,128.27


#### Dropping rows with NaN values

In [3]:
df = df.dropna()
print(df.shape)
df

(51, 5)


Unnamed: 0,NOA,NEM,NSR,CP2,Effort (Actual)
0,170.0,142.0,97.0,110.55,286.0
1,292.0,409.0,295.0,242.54,396.0
2,929.0,821.0,567.0,446.6,471.0
3,755.0,975.0,723.0,760.96,1016.0
7,260.0,262.0,167.0,208.56,552.0
8,385.0,697.0,929.0,905.0,998.0
9,77.0,71.0,218.0,95.06,180.0
10,559.0,368.0,504.0,251.55,482.0
12,98.0,79.0,41.0,64.61,205.0
15,1087.0,885.0,701.0,1345.4,1414.0


#### Splitting the data into training and testing sets

In [4]:
X = df.drop(columns=['Effort (Actual)'])
y = df['Effort (Actual)']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Linear Regression

In [5]:
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

#### Decision Tree Regressor

In [6]:
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

#### MLP Regressor

In [7]:
mlp = MLPRegressor()
mlp.fit(X_train, y_train)
mlp_pred = mlp.predict(X_test)

#### SVR

In [8]:
svr = SVR()
svr.fit(X_train, y_train)
svr_pred = svr.predict(X_test)

#### SMO with sigmoid kernel (SVR with sigmoid kernel)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svr_sigmoid = SVR(kernel='sigmoid')
svr_sigmoid.fit(X_train_scaled, y_train)
svr_sigmoid_pred = svr_sigmoid.predict(X_test_scaled)

#### SMO with polynomial kernel (SVR with polynomial kernel)

In [10]:
svr_poly = SVR(kernel='poly')
svr_poly.fit(X_train_scaled, y_train)
svr_poly_pred = svr_poly.predict(X_test_scaled)

#### SMO with RBF kernel (SVR with RBF kernel)

In [11]:
svr_rbf = SVR(kernel='rbf')
svr_rbf.fit(X_train_scaled, y_train)
svr_rbf_pred = svr_rbf.predict(X_test_scaled)

#### Define a function to calculate performance metrics

In [12]:
# Define a function to calculate normalized mean squared error
def normalized_mean_squared_error(y_true, y_pred):
    return mean_squared_error(y_true, y_pred) / np.var(y_true)

def evaluate_performance(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    nmse = normalized_mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mae, mse, mape, nmse, r2

# Evaluate performance metrics for the models
models = [lr, dt, mlp, svr, svr_sigmoid, svr_poly, svr_rbf]
predictions = [lr_pred, dt_pred, mlp_pred, svr_pred, svr_sigmoid_pred, svr_poly_pred, svr_rbf_pred]
model_names = ['Linear Regression', 'Decision Tree Regressor', 'MLP Regressor', 'SVR', 'SMO with Sigmoid Kernel', 'SMO with polynomial Kernel', 'SMO with RBF Kernel']

results = []
for model, pred, name in zip(models, predictions, model_names):
    mae, mse, mape, nmse, r2 = evaluate_performance(y_test, pred)
    results.append([name, mae, mse, mape, nmse, r2])

# Create DataFrame from results
df_results = pd.DataFrame(results, columns=["Model", "Mean Absolute Error", "Mean Squared Error", "Mean Absolute Percentage Error", "Normalized Mean Squared Error", "R^2 Score"])
df_results

Unnamed: 0,Model,Mean Absolute Error,Mean Squared Error,Mean Absolute Percentage Error,Normalized Mean Squared Error,R^2 Score
0,Linear Regression,82.146751,10800.944436,0.783733,0.035367,0.964633
1,Decision Tree Regressor,89.136364,20052.338418,0.256539,0.065661,0.934339
2,MLP Regressor,37.568921,3513.196454,0.108709,0.011504,0.988496
3,SVR,422.507656,316807.494118,3.388,1.037379,-0.037379
4,SMO with Sigmoid Kernel,412.233828,302228.032986,3.327428,0.989639,0.010361
5,SMO with polynomial Kernel,344.776806,243544.399836,2.535595,0.79748,0.20252
6,SMO with RBF Kernel,422.467637,316876.726099,3.385063,1.037605,-0.037605
