In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib

In [2]:
X_test = pd.read_csv(r'../Training_And_Test/Amazon/X_test_DL_static.csv')
y_test = pd.read_csv(r'../Training_And_Test/Amazon/y_test_UL_static.csv')
X_train = pd.read_csv(r'../Training_And_Test/Amazon/X_train_UL_static.csv')
y_train = pd.read_csv(r'../Training_And_Test/Amazon/y_train_UL_static.csv')

### Linear Regression ###

In [3]:
from sklearn.linear_model import LinearRegression as LR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [4]:
# Create a pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Data scaling step
    ('model', LR())  # Linear Regression model step
])


In [5]:
pipeline.fit(X_train, y_train)

In [6]:
linear_model = pipeline
joblib.dump(linear_model, r'../Models/Amazon/Static/Dowload/linear.pkl')

['../Models/Amazon/Static/Dowload/linear.pkl']

### Random Forest Regressor  ###

In [22]:

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

#### Randomized Search ####

In [23]:
rf_reg = RandomForestRegressor()


param_dist = {
    'n_estimators': randint(100, 2000)  
}

random_search = RandomizedSearchCV(
    estimator=rf_reg,
    param_distributions=param_dist,
    n_iter=12,
    cv=2,
    scoring='neg_mean_squared_error',
    verbose=2,
    n_jobs= 5
)

pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('random_search', random_search),
])

pipeline.fit(X_train, y_train)



Fitting 2 folds for each of 12 candidates, totalling 24 fits


[CV] END ...................................n_estimators=397; total time=  13.7s
[CV] END ...................................n_estimators=397; total time=  13.7s
[CV] END ...................................n_estimators=779; total time=  26.8s
[CV] END ...................................n_estimators=779; total time=  26.9s
[CV] END ..................................n_estimators=1664; total time=  57.4s
[CV] END ..................................n_estimators=1664; total time=  57.5s
[CV] END ..................................n_estimators=1419; total time=  49.1s
[CV] END ...................................n_estimators=770; total time=  26.9s
[CV] END ..................................n_estimators=1419; total time=  49.0s
[CV] END ...................................n_estimators=767; total time=  26.9s
[CV] END ...................................n_estimators=770; total time=  27.3s
[CV] END ...................................n_estimators=767; total time=  27.4s
[CV] END ...................

In [24]:
print("Best Parameters:", pipeline.named_steps['random_search'].best_params_)


Best Parameters: {'n_estimators': 770}


In [25]:
rf_reg_random_search = pipeline
joblib.dump(rf_reg_random_search, r'../Models/Amazon/Driving/Upload/random_forest_resgressor_random_search.pkl')

['../Models/Amazon/Driving/Upload/random_forest_resgressor_random_search.pkl']

#### Grid Search ####

In [26]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor


rf_reg = RandomForestRegressor()

param_grid = {
    'n_estimators': [100, 150, 200, 300, 400 ,500, 700, 1000,1200, 1500, 1800, 2000]
}

grid_search = GridSearchCV(
    estimator=rf_reg, 
    param_grid=param_grid, 
    cv=2, 
    scoring='neg_mean_squared_error',
    n_jobs= 5,
    verbose=2,
)


pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('grid_search', grid_search),
])


pipeline.fit(X_train, y_train)

Fitting 2 folds for each of 12 candidates, totalling 24 fits
[CV] END ...................................n_estimators=100; total time=   3.5s
[CV] END ...................................n_estimators=100; total time=   3.5s
[CV] END ...................................n_estimators=150; total time=   5.2s
[CV] END ...................................n_estimators=150; total time=   5.3s
[CV] END ...................................n_estimators=200; total time=   6.9s
[CV] END ...................................n_estimators=200; total time=   6.9s
[CV] END ...................................n_estimators=300; total time=  10.3s
[CV] END ...................................n_estimators=300; total time=  10.3s
[CV] END ...................................n_estimators=400; total time=  13.7s
[CV] END ...................................n_estimators=400; total time=  13.8s
[CV] END ...................................n_estimators=500; total time=  17.1s
[CV] END ...................................n_es

In [27]:
print("Best Parameters:", pipeline.named_steps['grid_search'].best_params_)


Best Parameters: {'n_estimators': 1000}


In [28]:
rf_reg_grid_search = pipeline
joblib.dump(rf_reg_grid_search , r'../Models/Amazon/Driving/Upload/random_forest_resgressor_grid_search.pkl')

['../Models/Amazon/Driving/Upload/random_forest_resgressor_grid_search.pkl']

### Suporte Vector Machine ###

In [6]:
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### Grid Search ####

In [7]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR

svm_reg = SVR(C = 1000,gamma = 0.0001 )

param_grid = {
    'kernel': [ 'rbf', 'poly']   
}


grid_search = GridSearchCV(
    estimator= svm_reg, 
    param_grid=param_grid, 
    cv=2, 
    scoring='neg_mean_squared_error',
    verbose=2,
)


pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('grid_search', grid_search),
])


pipeline.fit(X_train, y_train['UL_bitrate'])

Fitting 2 folds for each of 2 candidates, totalling 4 fits
[CV] END .........................................kernel=rbf; total time=   6.5s
[CV] END .........................................kernel=rbf; total time=   6.5s
[CV] END ........................................kernel=poly; total time=   4.6s
[CV] END ........................................kernel=poly; total time=   4.6s


In [8]:
best_params = pipeline.named_steps['grid_search'].best_params_
print("Best Parameters:", best_params)

Best Parameters: {'kernel': 'poly'}


In [9]:
svr_grid_search = pipeline
joblib.dump(svr_grid_search , r'../Models/Amazon/Driving/Upload/suport_vector.pkl')

['../Models/Amazon/Driving/Upload/suport_vector.pkl']

### Neural Network ###

#### Feedforward Neural Network (FNN) ####

In [14]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

2023-08-30 13:13:39.156402: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-30 13:13:39.191386: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-30 13:13:39.417090: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-30 13:13:39.419554: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [17]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Build and compile your model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=X_train_scaled.shape[1]),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Output layer for regression
])
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model and capture the training history
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=2)

# Extract and print the training and validation loss (MSE) for each epoch
for epoch in range(len(history.history['loss'])):
    train_mse = history.history['loss'][epoch]
    val_mse = history.history['val_loss'][epoch]
    print(f"Epoch {epoch + 1}/{len(history.history['loss'])} - Train MSE: {train_mse:.4f} - Val MSE: {val_mse:.4f}")


Epoch 1/50
612/612 - 1s - loss: 227460384.0000 - val_loss: 175857984.0000 - 863ms/epoch - 1ms/step
Epoch 2/50
612/612 - 1s - loss: 130192280.0000 - val_loss: 97950232.0000 - 602ms/epoch - 983us/step
Epoch 3/50
612/612 - 0s - loss: 100313224.0000 - val_loss: 91097944.0000 - 312ms/epoch - 509us/step
Epoch 4/50
612/612 - 1s - loss: 96867872.0000 - val_loss: 89718296.0000 - 526ms/epoch - 859us/step
Epoch 5/50
612/612 - 0s - loss: 95953672.0000 - val_loss: 89191240.0000 - 358ms/epoch - 586us/step
Epoch 6/50
612/612 - 1s - loss: 95501088.0000 - val_loss: 88920432.0000 - 646ms/epoch - 1ms/step
Epoch 7/50
612/612 - 1s - loss: 95241560.0000 - val_loss: 88690112.0000 - 553ms/epoch - 904us/step
Epoch 8/50
612/612 - 0s - loss: 95062904.0000 - val_loss: 88583416.0000 - 409ms/epoch - 668us/step
Epoch 9/50
612/612 - 1s - loss: 94959784.0000 - val_loss: 88494528.0000 - 559ms/epoch - 913us/step
Epoch 10/50
612/612 - 0s - loss: 94893056.0000 - val_loss: 88457528.0000 - 307ms/epoch - 502us/step
Epoch 11/

In [60]:
neural_fnn = model
joblib.dump(neural_fnn , r'../Models/Amazon/Static/Dowload/fnn.pkl')

['../Models/Amazon/Static/Dowload/fnn.pkl']

In [10]:
import tensorflow as tf
from sklearn.preprocessing import StandardScaler


# Standardize the input data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Build and compile your MLP model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=X_train_scaled.shape[1]),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Output layer for regression
])
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model and capture the training history
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=2)

# Extract and print the training and validation loss (MSE) for each epoch
for epoch in range(len(history.history['loss'])):
    train_mse = history.history['loss'][epoch]
    val_mse = history.history['val_loss'][epoch]
    print(f"Epoch {epoch + 1}/{len(history.history['loss'])} - Train MSE: {train_mse:.4f} - Val MSE: {val_mse:.4f}")


2023-08-30 13:29:45.270739: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-30 13:29:45.304127: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-30 13:29:45.533732: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-30 13:29:45.535585: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50


2023-08-30 13:29:46.825327: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-30 13:29:46.825828: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


661/661 - 1s - loss: 151853744.0000 - val_loss: 119465552.0000 - 971ms/epoch - 1ms/step
Epoch 2/50
661/661 - 1s - loss: 119407104.0000 - val_loss: 118576504.0000 - 586ms/epoch - 886us/step
Epoch 3/50
661/661 - 1s - loss: 118842104.0000 - val_loss: 118156696.0000 - 583ms/epoch - 882us/step
Epoch 4/50
661/661 - 1s - loss: 118584824.0000 - val_loss: 118034104.0000 - 751ms/epoch - 1ms/step
Epoch 5/50
661/661 - 1s - loss: 118528088.0000 - val_loss: 118011576.0000 - 752ms/epoch - 1ms/step
Epoch 6/50
661/661 - 1s - loss: 118515712.0000 - val_loss: 118004896.0000 - 604ms/epoch - 913us/step
Epoch 7/50
661/661 - 1s - loss: 118512008.0000 - val_loss: 118003416.0000 - 749ms/epoch - 1ms/step
Epoch 8/50
661/661 - 1s - loss: 118510376.0000 - val_loss: 118001832.0000 - 504ms/epoch - 763us/step
Epoch 9/50
661/661 - 1s - loss: 118510032.0000 - val_loss: 118001544.0000 - 654ms/epoch - 990us/step
Epoch 10/50
661/661 - 1s - loss: 118509856.0000 - val_loss: 118001296.0000 - 724ms/epoch - 1ms/step
Epoch 11/5

In [11]:
neural_mlp = model
joblib.dump(neural_mlp , r'../Models/Amazon/Driving/Upload/mlp.pkl')

['../Models/Amazon/Driving/Upload/mlp.pkl']