In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectPercentile, f_regression
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns
random_seed = 42
np.random.seed(random_seed)


In [16]:
# Load the dataset

data = pd.read_csv("/content/drive/MyDrive/dataset/data.csv")

# Define features (X) and target variable (y)
X = data[['Area', 'Sensing Range', 'Transmission Range', 'Number of Sensor nodes']]
y = data['Number of Barriers']

In [17]:
# Split the dataset into training (75%) and testing (25%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

X_train = X_train.values
y_train = y_train.values
X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.float32)
X_test = (X_test.values).astype(np.float32)
y_test = (y_test.values).astype(np.float32)

In [18]:
# Perform feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [19]:

# creating a dictionary to store the results
results = {}

In [20]:
#Linear regression

from sklearn.linear_model import LinearRegression

linearRegression = LinearRegression()

linearRegression.fit(X_train_scaled, y_train)

r2_Score = linearRegression.score(X_test_scaled, y_test)

print(r2_Score)

0.8480872892727505


In [None]:
# Initialize SVR model with polynomial kernel
regressor = SVR(kernel='poly')

# Fit the model on the training data
regressor.fit(X_train_scaled, y_train)

predictions = regressor.predict(X_test_scaled)

# Calculating initial R² score
r2score = regressor.score(X_test_scaled, y_test)
print(f'Initial R² Score: {r2score}')

# Parameter grid for Random Search
param_dist = {
    'C': [0.1, 1, 10, 100,300,500, 1000],
    'epsilon': [0.01, 0.1, 0.5, 1, 2, 5],
    'degree': [2, 3, 4, 5],
    'coef0': [0, 0.1, 0.5, 1, 2]
}


random_seed = 42
random_search = RandomizedSearchCV(
    estimator=SVR(kernel='poly'),
    param_distributions=param_dist,
    n_iter=100,
    cv=5,
    random_state=random_seed,
    n_jobs=-1
)
random_search.fit(X_train_scaled, y_train)

y_pred_random = random_search.best_estimator_.predict(X_test_scaled)

# Print R² score of the best estimator from Randomized Search
print(f'Random Search R² Score: {r2_score(y_test, y_pred_random)}')
print(f'Best Parameters from Random Search: {random_search.best_params_}')

#pameter grid for Grid Search
param_grid = {
    'C': [0.1, 1, 10, 100,300,500, 1000],
    'epsilon': [0.01, 0.1, 0.5, 1, 2, 5],
    'degree': [2, 3, 4, 5],
    'coef0': [0, 0.1, 0.5, 1, 2]
}

grid_search = GridSearchCV(estimator=SVR(kernel='poly'), param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

y_pred_grid = grid_search.best_estimator_.predict(X_test_scaled)

# Print R² score of the best estimator from Grid Search
print(f'Grid Search R² Score: {r2_score(y_test, y_pred_grid)}')
print(f'Best Parameters from Grid Search: {grid_search.best_params_}')

Initial R² Score: 0.5131019684047469
Random Search R² Score: 0.9998230015622588
Best Parameters from Random Search: {'epsilon': 0.1, 'degree': 5, 'coef0': 2, 'C': 1000}


In [None]:
regressor = SVR(kernel='rbf')

# Fit the model on the training data
regressor.fit(X_train_scaled, y_train)

predictions = regressor.predict(X_test_scaled)

# Calculate R² score befire hyp tuning
r2score = regressor.score(X_test_scaled, y_test)
print(f'Initial R² Score: {r2score}')

# Parameter grid for Random Search
param_dist = {
    'C': [0.1, 1, 10, 100,300,500, 1000],
    'epsilon': [0.01, 0.1, 0.5, 1, 2, 5],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1]
}

random_seed = 42
random_search = RandomizedSearchCV(
    estimator=SVR(kernel='rbf'),
    param_distributions=param_dist,
    n_iter=100,
    cv=5,
    random_state=random_seed,
    n_jobs=-1
)
random_search.fit(X_train_scaled, y_train)

y_pred_random = random_search.best_estimator_.predict(X_test_scaled)

print(f'Random Search R² Score: {r2_score(y_test, y_pred_random)}')
print(f'Best Parameters from Random Search: {random_search.best_params_}')

# Parameter grid for Grid Search
param_grid = {
    'C': [0.1, 1, 10, 100,300,600, 1000],
    'epsilon': [0.01, 0.1, 0.5, 1, 2, 5],
    'gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(estimator=SVR(kernel='rbf'), param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

y_pred_grid = grid_search.best_estimator_.predict(X_test_scaled)

print(f'Grid Search R² Score: {r2_score(y_test, y_pred_grid)}')
print(f'Best Parameters from Grid Search: {grid_search.best_params_}')

In [14]:
#decision tree
# Parameter grid for Random Search
param_dist = {
    'max_depth': [None, 10, 20, 30, 40, 50, 60, 70],
    'max_features': [None, 'auto', 'sqrt', 'log2'],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'splitter': ['best', 'random']
}

# Parameter grid for Grid Search
param_grid = {
    'max_depth': [None, 10, 20, 30, 40, 50, 60, 70],
    'max_features': [None, 'auto', 'sqrt', 'log2'],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'splitter': ['best', 'random']
}

#Calculation pre-tuning score
regressor = DecisionTreeRegressor(random_state=random_seed)
regressor.fit(X_train, y_train)

predictions = regressor.predict(X_test)
r2score = regressor.score(X_test, y_test)
print(r2score)

#random search
print("After performing random search")

random_search = RandomizedSearchCV(
    estimator=regressor,
    param_distributions=param_dist,
    n_iter=100,
    cv=5,
    random_state=random_seed,
    n_jobs=-1
)
random_search.fit(X_train_scaled, y_train)

y_pred = random_search.best_estimator_.predict(X_test_scaled)

print(f'Random Search R² Score: {r2_score(y_test, y_pred)}')
print(f'Best Parameters: {random_search.best_params_}')
print("")
print("after performing grid search")

# Perform Grid Search
grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv=5,  n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

best_rf_grid = grid_search.best_estimator_
y_pred_grid = best_rf_grid.predict(X_test_scaled)
print(f'Grid Search R² Score: {r2_score(y_test, y_pred_grid)}')
print(f'Best Parameters: {grid_search.best_params_}')

0.9583936147086227
After performing random search




Random Search R² Score: 0.9613803439510321
Best Parameters: {'splitter': 'random', 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'auto', 'max_depth': 20}

after performing grid search
Grid Search R² Score: 0.9613803439510321
Best Parameters: {'max_depth': None, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'random'}


In [11]:
#Random forest

from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(random_state=42)
regressor.fit(X_train, y_train)

predictions = regressor.predict(X_test)

r2score = regressor.score(X_test, y_test)
print(r2score)
print("After performing random search")

#Parameter grid for Random Search
param_dist = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_features': [1.0, 'sqrt', 'log2'],
    'max_depth': [None, 10, 20, 30, 40, 50, 60, 70],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Parameter grid for Grid Search
param_grid = {
    'n_estimators': [100, 200, 300,500],
    'max_features': ['auto', 'sqrt'],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}



random_search = RandomizedSearchCV(
    estimator=RandomForestRegressor(random_state=random_seed),
    param_distributions=param_dist,
    n_iter=100,
    cv=5,
    random_state=random_seed,
    n_jobs=-1
)
random_search.fit(X_train_scaled, y_train)

y_pred = random_search.best_estimator_.predict(X_test_scaled)

print(f'Random Search R² Score: {r2_score(y_test, y_pred)}')
print(f'Best Parameters: {random_search.best_params_}')
print("")
print("after performing grid search")

# Perform Grid Search
grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv=5,  n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

best_rf_grid = grid_search.best_estimator_
y_pred_grid = best_rf_grid.predict(X_test_scaled)
print(f'Grid Search R² Score: {r2_score(y_test, y_pred_grid)}')
print(f'Best Parameters: {grid_search.best_params_}')

0.9820857161668751
After performing random search
Random Search R² Score: 0.984883288772921
Best Parameters: {'n_estimators': 400, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 1.0, 'max_depth': 40}

after performing grid search


  warn(


Grid Search R² Score: 0.9846476026044559
Best Parameters: {'max_depth': 20, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [12]:



# Initialize AdaBoost Regressor
regressor = AdaBoostRegressor()

regressor.fit(X_train_scaled, y_train)

predictions = regressor.predict(X_test_scaled)

# Calculate initial R² score
r2score = regressor.score(X_test_scaled, y_test)
print(f'Initial R² Score: {r2score}')

# Parameter grid for Random Search
param_dist = {
    'n_estimators': [50, 100, 200, 500, 1000],
    'learning_rate': [0.01, 0.05, 0.1, 0.5, 1.0]
}

random_seed = 42
random_search = RandomizedSearchCV(
    estimator=AdaBoostRegressor(),
    param_distributions=param_dist,
    n_iter=100,
    cv=5,
    random_state=random_seed,
    n_jobs=-1
)
random_search.fit(X_train_scaled, y_train)

y_pred_random = random_search.best_estimator_.predict(X_test_scaled)

print(f'Random Search R² Score: {r2_score(y_test, y_pred_random)}')
print(f'Best Parameters from Random Search: {random_search.best_params_}')

# Parameter grid for Grid Search
param_grid = {
    'n_estimators': [50, 100, 200, 500, 1000],
    'learning_rate': [0.01, 0.05, 0.1, 0.5, 1.0]
}

# Perform Grid Search
grid_search = GridSearchCV(
    estimator=AdaBoostRegressor(),
    param_grid=param_grid,
    cv=5,
    n_jobs=-1
)
grid_search.fit(X_train_scaled, y_train)

y_pred_grid = grid_search.best_estimator_.predict(X_test_scaled)

print(f'Grid Search R² Score: {r2_score(y_test, y_pred_grid)}')
print(f'Best Parameters from Grid Search: {grid_search.best_params_}')


Initial R² Score: 0.9329108526724954




Random Search R² Score: 0.9275467183974118
Best Parameters from Random Search: {'n_estimators': 50, 'learning_rate': 0.5}
Grid Search R² Score: 0.9257789853970088
Best Parameters from Grid Search: {'learning_rate': 1.0, 'n_estimators': 100}


In [13]:
# Initialize GradientBoostingRegressor model
regressor = GradientBoostingRegressor()

# Fit the model on the training data
regressor.fit(X_train, y_train)

predictions = regressor.predict(X_test)

# Calculate initial R² score
r2score = regressor.score(X_test, y_test)
print(f'Initial R² Score: {r2score}')

# Parameter grid for Random Search
param_dist = {
    'n_estimators': [50, 100, 200, 500, 1000],
    'learning_rate': [0.01, 0.1, 0.05, 0.1, 0.5, 1.0],
    'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4, 6, 8, 10],
    'subsample': [0.6, 0.7, 0.8, 0.9, 1.0]
}

random_seed = 42
random_search = RandomizedSearchCV(
    estimator=GradientBoostingRegressor(),
    param_distributions=param_dist,
    n_iter=100,
    cv=5,
    random_state=random_seed,
    n_jobs=-1
)
random_search.fit(X_train_scaled, y_train)

y_pred_random = random_search.best_estimator_.predict(X_test_scaled)

print(f'Random Search R² Score: {r2_score(y_test, y_pred_random)}')
print(f'Best Parameters from Random Search: {random_search.best_params_}')



Initial R² Score: 0.9874358789970695
Random Search R² Score: 0.9940536352256621
Best Parameters from Random Search: {'subsample': 0.6, 'n_estimators': 100, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 3, 'learning_rate': 0.1}


In [None]:
#Phase 1 result

import matplotlib.pyplot as plt

# Data
models = ["Linear Regression", "SVM (Linear Kernel)", "SVM (Polynomial Kernel)", "SVM (RBF Kernel)", "Decision Tree", "Random Forest", "AdaBoost", "Gradient Boosting"]
scores = [0.848, 0.750, 0.513, 0.289, 0.958, 0.981, 0.938, 0.987]

# Plotting the bar graph
plt.figure(figsize=(10, 7))
plt.bar(models, scores, color='orange')
plt.xlabel('Models')
plt.ylabel('R² Score')
plt.title('Regression model comparision')
plt.xticks(rotation=45, ha='right')


plt.show()

In [None]:
#Visualisation after performing hyperparameter tuning

#feeding the datsets for the plot
models = ["SVM (Polynomial)", "SVM (RBF Kernel)", "Decision Tree", "Random Forest", "AdaBoost", "Gradient Boosting"]
r2_scores_before = [0.513, 0.289, 0.958, 0.981, 0.938, 0.987]
r2_scores_after = [0.999, 0.999, 0.961, 0.984, 0.929, 0.995]

# Defining the positions and width for the bars
x = np.arange(len(models))
width = 0.3

plt.style.use('seaborn-darkgrid')
fig, ax = plt.subplots(figsize=(12, 8))

rects1 = ax.bar(x - width/2, r2_scores_before, width, label='Before Tuning', color='mediumpurple')
rects2 = ax.bar(x + width/2, r2_scores_after, width, label='After Tuning', color='darkorange')

# Adding labels and other charecterstrics
ax.set_xlabel('Model', fontsize=12)
ax.set_ylabel('R² Score', fontsize=12)
ax.set_title('R² Scores Before and After Hyperparameter Tuning', fontsize=14)
ax.set_xticks(x)
ax.set_xticklabels(models, rotation=45, ha='right', fontsize=10)
ax.legend(loc='best')
ax.bar_label(rects1, padding=3, fmt='%.3f')
ax.bar_label(rects2, padding=3, fmt='%.3f')

ax.set_ylim(0, 1.1)

plt.tight_layout()
plt.show()


In [None]:
#Applying tsne

tsne = TSNE(n_components=2, init='pca', learning_rate='auto', random_state=42)
tsne_results = tsne.fit_transform(X_scaled)

tsne_df = pd.DataFrame(tsne_results, columns=['TSNE1', 'TSNE2'])
tsne_df['Number of Barriers'] = y

plt.figure(figsize=(8, 6))
scatter = plt.scatter(tsne_df['TSNE1'], tsne_df['TSNE2'], c=tsne_df['Number of Barriers'], cmap='viridis', alpha=0.7)
colorbar = plt.colorbar(scatter)
colorbar.set_label('Number of Barriers')
plt.title('2D Visualization of Data using t-SNE')
plt.xlabel('TSNE1')
plt.ylabel('TSNE2')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
#Feature selection

from sklearn.feature_selection import SelectPercentile, f_regression

feature_selection = SelectPercentile(score_func=f_regression, percentile=50)
X_train_scaled_feature_selection = feature_selection.fit_transform(X_train_scaled, y_train)
X_test_scaled_feature_selection = feature_selection.transform(X_test_scaled)

selected_feature_indices = feature_selection.get_support(indices=True)


feature_names = np.array(['Area','Sensing rane', 'transmission range', 'number of sensors'])

# Get the selected feature names
selected_feature_names = feature_names[selected_feature_indices]

print("Selected feature names:")
print(selected_feature_names)


In [None]:
#Training the best model with selected features
gb = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, max_depth=3, random_state=42)
gb.fit(X_train_scaled_feature_selection, y_train)

# Predict and evaluate
y_pred = gb.predict(X_test_scaled_feature_selection)
r2 = r2_score(y_test, y_pred)

print(f"R² Score with selected features: {r2}")