In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor, VotingRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from sklearn.impute import SimpleImputer

# Load data from the local Excel file
df = pd.read_excel("Data_set_1.xlsx")

# Extract input and output data for the first 230 sets
X = df[['Nodes', 'Radius', 'packet_sent', 'Packet_received', 'SF7', 'SF8', 'SF9', 'SF10', 'SF11', 'SF12']].iloc[:230]
Y = df['Actual PDR'].iloc[:230]  # Single output

# Impute missing values in the input data
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Handle missing values in the target variable Y
Y_imputed = np.nan_to_num(Y)

# Define models
models = {
    "Bagging": BaggingRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Voting": VotingRegressor([('lr', LinearRegression()), ('dt', DecisionTreeRegressor()), ('rf', RandomForestRegressor())]),
    "Stacking": StackingRegressor([('lr', LinearRegression()), ('dt', DecisionTreeRegressor()), ('rf', RandomForestRegressor())]),
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(),
    "k-Nearest Neighbors": KNeighborsRegressor(),
}

# Perform train-test split for the subset of data
X_train, X_test, Y_train, Y_test = train_test_split(X_imputed, Y_imputed, test_size=0.2, random_state=42)

# Scale input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Aggregate predictions from all models and compute evaluation metrics
metrics = {}
for model_name, model in models.items():
    # Fit the model
    model.fit(X_train_scaled, Y_train)

    # Predict
    Y_train_pred = model.predict(X_train_scaled)
    Y_test_pred = model.predict(X_test_scaled)
    
    # Compute evaluation metrics
    mse = mean_squared_error(Y_test, Y_test_pred)
    mae = mean_absolute_error(Y_test, Y_test_pred)
    r2_train = r2_score(Y_train, Y_train_pred)
    r2_test = r2_score(Y_test, Y_test_pred)
    
    metrics[model_name] = {'MSE': mse, 'MAE': mae, 'R2 Train': r2_train, 'R2 Test': r2_test}

    # Plot predictions from each model in separate graphs with smooth curves
    plt.figure(figsize=(10, 8))

    # Original plot with smooth curve
    x_new = np.linspace(X_test[:, 0].min(), X_test[:, 0].max(), 500)
    f = interp1d(X_test[:, 0], Y_test, kind='cubic')
    plt.plot(x_new, f(x_new), color='blue', label='Actual')

    # Interpolated smooth curve for predicted values
    f_pred = interp1d(X_test[:, 0], Y_test_pred, kind='cubic')
    plt.plot(x_new, f_pred(x_new), color='red', label='Predicted')

    plt.xlabel('Nodes')
    plt.ylabel('Actual PDR')
    plt.title(f'{model_name} Algorithm')
    plt.legend()
    plt.show()

# Print evaluation metrics for each model
for model_name, metrics_dict in metrics.items():
    print(f"Model: {model_name}")
    print(f"MSE: {metrics_dict['MSE']}")
    print(f"MAE: {metrics_dict['MAE']}")
    print(f"R2 Train: {metrics_dict['R2 Train']}")
    print(f"R2 Test: {metrics_dict['R2 Test']}")
    print()



FileNotFoundError: [Errno 2] No such file or directory: 'Data_set_1.xlsx'