# Energy Efficiency Correlation & Prediction based on Network KPIs
Author: Fatih E. NAR

In [None]:
%pip install -r requirements.txt

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = 'data/5G_netops_data_100K.csv.xz'
df = pd.read_csv(file_path)

# Selecting relevant features for energy efficiency prediction
features = ['Cell Availability (%)', 'MTTR (hours)', 'Throughput (Mbps)', 
            'Latency (ms)', 'Packet Loss Rate (%)', 'Call Drop Rate (%)',
            'Handover Success Rate (%)', 'Alarm Count', 'Critical Alarm Count',
            'Temperature (°C)', 'Humidity (%)']

# Using 'Fault Occurrence Rate (%)' as a proxy for energy efficiency
target = 'Fault Occurrence Rate (%)'

# Splitting the data into features and target
X = df[features]
y = df[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Step 4: Model Training
# Training a Linear Regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

## Step 5: Model Evaluation
# Making predictions
y_pred_train = model.predict(X_train_scaled)
y_pred_test = model.predict(X_test_scaled)

# Evaluating the model
mae_train = mean_absolute_error(y_train, y_pred_train)
mse_train = mean_squared_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)

mae_test = mean_absolute_error(y_test, y_pred_test)
mse_test = mean_squared_error(y_test, y_pred_test)
r2_test = r2_score(y_test, y_pred_test)

# Coefficients of the regression model
coefficients = model.coef_
feature_importance = pd.DataFrame({
    'Feature': features,
    'Importance': coefficients
}).sort_values(by='Importance', ascending=False)

## Step 7: Save the Model and Scaler
joblib.dump(model, 'models/energy_efficiency_model.pkl')
joblib.dump(scaler, 'models/energy_effficiency_scaler.pkl')

In [None]:
## Result Visualization
# Filtering the necessary features for visualization
visualization_features = ['Predicted', 'Fault Occurrence Rate (%)', 'Critical Alarm Count', 'Packet Loss Rate (%)', 'Call Drop Rate (%)']

# Creating a dataframe with the required features for the test set
combined_df = X_test.copy()
combined_df['Predicted'] = y_pred_test
combined_df = combined_df[['Predicted', 'Critical Alarm Count', 'Packet Loss Rate (%)', 'Call Drop Rate (%)']]
combined_df['Fault Occurrence Rate (%)'] = y_test

# Calculating Energy Efficiency Rate as inverse of Fault Occurrence Rate
combined_df['Real Energy Efficiency Rate (%)'] = 100 - combined_df['Fault Occurrence Rate (%)']
combined_df['Predicted Energy Efficiency Rate (%)'] = 100 - combined_df['Predicted']

# Melting the dataframes for plotting
melted_real_df = combined_df.melt(id_vars='Real Energy Efficiency Rate (%)', value_vars=['Critical Alarm Count', 'Packet Loss Rate (%)', 'Call Drop Rate (%)'], 
                                  var_name='Feature', value_name='Value')
melted_pred_df = combined_df.melt(id_vars='Predicted Energy Efficiency Rate (%)', value_vars=['Critical Alarm Count', 'Packet Loss Rate (%)', 'Call Drop Rate (%)'], 
                                  var_name='Feature', value_name='Value')

sns.set(style="whitegrid")

fig, axs = plt.subplots(2, 1, figsize=(10, 12))

# Real Energy Efficiency Rate plot
sns.lineplot(x='Value', y='Real Energy Efficiency Rate (%)', hue='Feature', data=melted_real_df, ax=axs[0], errorbar=None)
axs[0].set_title('Real Energy Efficiency Rate vs Feature Values', fontsize=16)
axs[0].set_xlabel('Feature Values', fontsize=14)
axs[0].set_ylabel('Real Energy Efficiency Rate (%)', fontsize=14)
axs[0].legend(title='Feature')

# Predicted Energy Efficiency Rate plot
sns.lineplot(x='Value', y='Predicted Energy Efficiency Rate (%)', hue='Feature', data=melted_pred_df, ax=axs[1], errorbar=None)
axs[1].set_title('Predicted Energy Efficiency Rate vs Feature Values', fontsize=16)
axs[1].set_xlabel('Feature Values', fontsize=14)
axs[1].set_ylabel('Predicted Energy Efficiency Rate (%)', fontsize=14)
axs[1].legend(title='Feature')

plt.tight_layout()
plt.show()