# Importing Libraries

In [36]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import warnings

warnings.filterwarnings("ignore")

# Load the Processed dataset

In [37]:
try:
    df_model = pd.read_csv('processed_lna_dataset.csv')
    scaler = joblib.load('scaler.pkl')
    le_material = joblib.load('label_encoder.pkl') # Loaded the label encoder for material
    
    print("Processed data and objects loaded successfully.")
    print("DataFrame shape:", df_model.shape)
    
except FileNotFoundError as e:
    print(f"Error loading files: {e}")
    print("Please ensure 'processed_lna_dataset.csv', 'scaler.pkl', and 'label_encoder.pkl' are in the same directory.")

# Display the first few rows to make sure it loaded correctly
print("Processed data loaded successfully:")
df_model.head()

Processed data and objects loaded successfully.
DataFrame shape: (512, 18)
Processed data loaded successfully:


Unnamed: 0,material,gLen_µm,freq_Ghz,gain_dB,noise_dB,bandwidth_GHz,lna_arch_3stage,lna_arch_3stageCS,lna_arch_4stage,lna_arch_5stage,lna_arch_6stage,lna_arch_Cascode,lna_arch_Distributed,lna_arch_Foldedcascode,lna_arch_PowerAmplifier,lna_arch_Singlestage,lna_arch_UWB,lna_arch_Unknown
0,1,1.865969,-0.831825,12.0,1.35,-1.033797,0,0,0,0,0,0,0,1,0,0,0,0
1,1,-0.010261,-0.797275,17.0,1.0,-1.021621,0,0,0,0,0,0,0,0,0,0,0,1
2,1,-0.948376,0.977322,12.0,2.5,-0.169327,0,0,0,0,0,1,0,0,0,0,0,0
3,1,1.865969,-0.593118,15.2,1.8,-0.899865,0,0,0,0,0,0,0,0,0,0,0,1
4,1,-0.010261,2.045221,18.0,3.5,0.561211,1,0,0,0,0,0,0,0,0,0,0,0


# Setting Up Data

## Define the features and target variable

In [38]:
# Our two target variables
y_noise = df_model['noise_dB']
y_gain = df_model['gain_dB']

# Our features are all columns EXCEPT for the two target variables
X = df_model.drop(['noise_dB', 'gain_dB'], axis=1)

# We also need the feature names for our final prediction function
feature_columns = X.columns.tolist()

print("Features (X) and Targets (y_noise, y_gain) have been defined.")
print("Shape of feature matrix X:", X.shape)
print("Shape of noise target y:", y_noise.shape)
print("Shape of gain target y:", y_gain.shape)

Features (X) and Targets (y_noise, y_gain) have been defined.
Shape of feature matrix X: (512, 16)
Shape of noise target y: (512,)
Shape of gain target y: (512,)


## Split the Data into Training and Testing Sets

In [39]:
# We use one split to ensure all models are compared on the exact same data

X_train, X_test, y_noise_train, y_noise_test, y_gain_train, y_gain_test = train_test_split(
    X, 
    y_noise, 
    y_gain, 
    test_size=0.2,  # Use 20% of the data for testing
    random_state=42 # Ensures the split is the same every time
)

print("Data has been split into training and testing sets.")
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

Data has been split into training and testing sets.
X_train shape: (409, 16)
X_test shape: (103, 16)


# Train and Evaluate the Three Models

## Imports

In [40]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Dictionary to store model performance
model_scores = {}

## Model 1: Linear Regression

In [41]:
# Initialize the Linear Regression models
lr_model_noise = LinearRegression()
lr_model_gain = LinearRegression()

# Train the models on the training data
print("Training Linear Regression models...")
lr_model_noise.fit(X_train, y_noise_train)
lr_model_gain.fit(X_train, y_gain_train)
print("Training complete.")

# --- Evaluate the models on the test data ---
y_noise_pred_lr = lr_model_noise.predict(X_test)
y_gain_pred_lr = lr_model_gain.predict(X_test)

# Calculate and store the performance metrics
lr_noise_mae = mean_absolute_error(y_noise_test, y_noise_pred_lr)
lr_noise_r2 = r2_score(y_noise_test, y_noise_pred_lr)
lr_gain_mae = mean_absolute_error(y_gain_test, y_gain_pred_lr)
lr_gain_r2 = r2_score(y_gain_test, y_gain_pred_lr)

model_scores['Linear Regression'] = {
    'Noise MAE': lr_noise_mae,
    'Noise R²': lr_noise_r2,
    'Gain MAE': lr_gain_mae,
    'Gain R²': lr_gain_r2
}

# --- Display the results for this model ---
print("\n--- Linear Regression Performance ---")
print(f"Noise Model MAE: {lr_noise_mae:.4f} dB")
print(f"Noise Model R²:  {lr_noise_r2:.4f}")
print("-----------------------------------")
print(f"Gain Model MAE:  {lr_gain_mae:.4f} dB")
print(f"Gain Model R²:   {lr_gain_r2:.4f}")

Training Linear Regression models...
Training complete.

--- Linear Regression Performance ---
Noise Model MAE: 0.4141 dB
Noise Model R²:  0.8288
-----------------------------------
Gain Model MAE:  2.7693 dB
Gain Model R²:   0.3945


## Model 2: Random Forest

In [42]:
# Initialize the Random Forest models
# n_estimators=100 means it will build 100 decision trees
rf_model_noise = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model_gain = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the models on the same training data
print("Training Random Forest models...")
rf_model_noise.fit(X_train, y_noise_train)
rf_model_gain.fit(X_train, y_gain_train)
print("Training complete.")

# --- Evaluate the models on the test data ---
y_noise_pred_rf = rf_model_noise.predict(X_test)
y_gain_pred_rf = rf_model_gain.predict(X_test)

# Calculate and store the performance metrics
rf_noise_mae = mean_absolute_error(y_noise_test, y_noise_pred_rf)
rf_noise_r2 = r2_score(y_noise_test, y_noise_pred_rf)
rf_gain_mae = mean_absolute_error(y_gain_test, y_gain_pred_rf)
rf_gain_r2 = r2_score(y_gain_test, y_gain_pred_rf)

model_scores['Random Forest'] = {
    'Noise MAE': rf_noise_mae,
    'Noise R²': rf_noise_r2,
    'Gain MAE': rf_gain_mae,
    'Gain R²': rf_gain_r2
}

# --- Display the results for this model ---
print("\n--- Random Forest Performance ---")
print(f"Noise Model MAE: {rf_noise_mae:.4f} dB")
print(f"Noise Model R²:  {rf_noise_r2:.4f}")
print("---------------------------------")
print(f"Gain Model MAE:  {rf_gain_mae:.4f} dB")
print(f"Gain Model R²:   {rf_gain_r2:.4f}")

Training Random Forest models...
Training complete.

--- Random Forest Performance ---
Noise Model MAE: 0.3811 dB
Noise Model R²:  0.8359
---------------------------------
Gain Model MAE:  2.2727 dB
Gain Model R²:   0.5837


## Model 3: Gradient Boosting

In [43]:
# Initialize the Gradient Boosting models
gb_model_noise = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model_gain = GradientBoostingRegressor(n_estimators=100, random_state=42)

# Train the models on the same training data
print("Training Gradient Boosting models...")
gb_model_noise.fit(X_train, y_noise_train)
gb_model_gain.fit(X_train, y_gain_train)
print("Training complete.")

# --- Evaluate the models on the test data ---
y_noise_pred_gb = gb_model_noise.predict(X_test)
y_gain_pred_gb = gb_model_gain.predict(X_test)

# Calculate and store the performance metrics
gb_noise_mae = mean_absolute_error(y_noise_test, y_noise_pred_gb)
gb_noise_r2 = r2_score(y_noise_test, y_noise_pred_gb)
gb_gain_mae = mean_absolute_error(y_gain_test, y_gain_pred_gb)
gb_gain_r2 = r2_score(y_gain_test, y_gain_pred_gb)

model_scores['Gradient Boosting'] = {
    'Noise MAE': gb_noise_mae,
    'Noise R²': gb_noise_r2,
    'Gain MAE': gb_gain_mae,
    'Gain R²': gb_gain_r2
}

# --- Display the results for this model ---
print("\n--- Gradient Boosting Performance ---")
print(f"Noise Model MAE: {gb_noise_mae:.4f} dB")
print(f"Noise Model R²:  {gb_noise_r2:.4f}")
print("-------------------------------------")
print(f"Gain Model MAE:  {gb_gain_mae:.4f} dB")
print(f"Gain Model R²:   {gb_gain_r2:.4f}")

Training Gradient Boosting models...
Training complete.

--- Gradient Boosting Performance ---
Noise Model MAE: 0.3348 dB
Noise Model R²:  0.8783
-------------------------------------
Gain Model MAE:  2.2761 dB
Gain Model R²:   0.6092


# Comparing Models to select the Best

In [44]:
# Create a DataFrame from the scores dictionary
performance_df = pd.DataFrame(model_scores).T # .T transposes the DataFrame for better readability

print("\n--- Model Performance Comparison ---")
print(performance_df)


--- Model Performance Comparison ---
                   Noise MAE  Noise R²  Gain MAE   Gain R²
Linear Regression   0.414067  0.828830  2.769330  0.394502
Random Forest       0.381149  0.835903  2.272682  0.583702
Gradient Boosting   0.334782  0.878266  2.276097  0.609208


# Saving the best model

In [45]:
import joblib

# --- Save the best models (Gradient Boosting) ---
joblib.dump(gb_model_noise, 'gb_model_noise.pkl')
joblib.dump(gb_model_gain, 'gb_model_gain.pkl')

print("Champion models have been saved to .pkl files.")

Champion models have been saved to .pkl files.
