In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib

In [2]:
X_test = pd.read_csv(r'../Training_And_Test/Amazon/X_test_DL_static.csv')
y_test = pd.read_csv(r'../Training_And_Test/Amazon/y_test_UL_static.csv')
X_train = pd.read_csv(r'../Training_And_Test/Amazon/X_train_UL_static.csv')
y_train = pd.read_csv(r'../Training_And_Test/Amazon/y_train_UL_static.csv')

### Random Forest Regressor  ###

In [22]:

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

#### Randomized Search ####

In [23]:
rf_reg = RandomForestRegressor()


param_dist = {
    'n_estimators': randint(100, 2000)  
}

random_search = RandomizedSearchCV(
    estimator=rf_reg,
    param_distributions=param_dist,
    n_iter=12,
    cv=2,
    scoring='neg_mean_squared_error',
    verbose=2,
    n_jobs= 5
)

pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('random_search', random_search),
])

pipeline.fit(X_train, y_train)



Fitting 2 folds for each of 12 candidates, totalling 24 fits


[CV] END ...................................n_estimators=397; total time=  13.7s
[CV] END ...................................n_estimators=397; total time=  13.7s
[CV] END ...................................n_estimators=779; total time=  26.8s
[CV] END ...................................n_estimators=779; total time=  26.9s
[CV] END ..................................n_estimators=1664; total time=  57.4s
[CV] END ..................................n_estimators=1664; total time=  57.5s
[CV] END ..................................n_estimators=1419; total time=  49.1s
[CV] END ...................................n_estimators=770; total time=  26.9s
[CV] END ..................................n_estimators=1419; total time=  49.0s
[CV] END ...................................n_estimators=767; total time=  26.9s
[CV] END ...................................n_estimators=770; total time=  27.3s
[CV] END ...................................n_estimators=767; total time=  27.4s
[CV] END ...................

In [24]:
print("Best Parameters:", pipeline.named_steps['random_search'].best_params_)


Best Parameters: {'n_estimators': 770}


In [25]:
rf_reg_random_search = pipeline
joblib.dump(rf_reg_random_search, r'../Models/Amazon/Driving/Upload/random_forest_resgressor_random_search.pkl')

['../Models/Amazon/Driving/Upload/random_forest_resgressor_random_search.pkl']

#### Grid Search ####

In [26]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor


rf_reg = RandomForestRegressor()

param_grid = {
    'n_estimators': [100, 150, 200, 300, 400 ,500, 700, 1000,1200, 1500, 1800, 2000]
}

grid_search = GridSearchCV(
    estimator=rf_reg, 
    param_grid=param_grid, 
    cv=2, 
    scoring='neg_mean_squared_error',
    n_jobs= 5,
    verbose=2,
)


pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('grid_search', grid_search),
])


pipeline.fit(X_train, y_train)

Fitting 2 folds for each of 12 candidates, totalling 24 fits
[CV] END ...................................n_estimators=100; total time=   3.5s
[CV] END ...................................n_estimators=100; total time=   3.5s
[CV] END ...................................n_estimators=150; total time=   5.2s
[CV] END ...................................n_estimators=150; total time=   5.3s
[CV] END ...................................n_estimators=200; total time=   6.9s
[CV] END ...................................n_estimators=200; total time=   6.9s
[CV] END ...................................n_estimators=300; total time=  10.3s
[CV] END ...................................n_estimators=300; total time=  10.3s
[CV] END ...................................n_estimators=400; total time=  13.7s
[CV] END ...................................n_estimators=400; total time=  13.8s
[CV] END ...................................n_estimators=500; total time=  17.1s
[CV] END ...................................n_es

In [27]:
print("Best Parameters:", pipeline.named_steps['grid_search'].best_params_)


Best Parameters: {'n_estimators': 1000}


In [28]:
rf_reg_grid_search = pipeline
joblib.dump(rf_reg_grid_search , r'../Models/Amazon/Driving/Upload/random_forest_resgressor_grid_search.pkl')

['../Models/Amazon/Driving/Upload/random_forest_resgressor_grid_search.pkl']

### Suporte Vector Machine ###

In [6]:
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### Grid Search ####

In [7]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR

svm_reg = SVR(C = 1000,gamma = 0.0001 )

param_grid = {
    'kernel': [ 'rbf', 'poly']   
}


grid_search = GridSearchCV(
    estimator= svm_reg, 
    param_grid=param_grid, 
    cv=2, 
    scoring='neg_mean_squared_error',
    verbose=2,
)


pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('grid_search', grid_search),
])


pipeline.fit(X_train, y_train['UL_bitrate'])

Fitting 2 folds for each of 2 candidates, totalling 4 fits
[CV] END .........................................kernel=rbf; total time=   6.5s
[CV] END .........................................kernel=rbf; total time=   6.5s
[CV] END ........................................kernel=poly; total time=   4.6s
[CV] END ........................................kernel=poly; total time=   4.6s


In [8]:
best_params = pipeline.named_steps['grid_search'].best_params_
print("Best Parameters:", best_params)

Best Parameters: {'kernel': 'poly'}


In [9]:
svr_grid_search = pipeline
joblib.dump(svr_grid_search , r'../Models/Amazon/Driving/Upload/suport_vector.pkl')

['../Models/Amazon/Driving/Upload/suport_vector.pkl']

#### Feedforward Neural Network (FNN) ####

In [1]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [17]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Build and compile your model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=X_train_scaled.shape[1]),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Output layer for regression
])
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model and capture the training history
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=2)

# Extract and print the training and validation loss (MSE) for each epoch
for epoch in range(len(history.history['loss'])):
    train_mse = history.history['loss'][epoch]
    val_mse = history.history['val_loss'][epoch]
    print(f"Epoch {epoch + 1}/{len(history.history['loss'])} - Train MSE: {train_mse:.4f} - Val MSE: {val_mse:.4f}")


Epoch 1/50
612/612 - 1s - loss: 227460384.0000 - val_loss: 175857984.0000 - 863ms/epoch - 1ms/step
Epoch 2/50
612/612 - 1s - loss: 130192280.0000 - val_loss: 97950232.0000 - 602ms/epoch - 983us/step
Epoch 3/50
612/612 - 0s - loss: 100313224.0000 - val_loss: 91097944.0000 - 312ms/epoch - 509us/step
Epoch 4/50
612/612 - 1s - loss: 96867872.0000 - val_loss: 89718296.0000 - 526ms/epoch - 859us/step
Epoch 5/50
612/612 - 0s - loss: 95953672.0000 - val_loss: 89191240.0000 - 358ms/epoch - 586us/step
Epoch 6/50
612/612 - 1s - loss: 95501088.0000 - val_loss: 88920432.0000 - 646ms/epoch - 1ms/step
Epoch 7/50
612/612 - 1s - loss: 95241560.0000 - val_loss: 88690112.0000 - 553ms/epoch - 904us/step
Epoch 8/50
612/612 - 0s - loss: 95062904.0000 - val_loss: 88583416.0000 - 409ms/epoch - 668us/step
Epoch 9/50
612/612 - 1s - loss: 94959784.0000 - val_loss: 88494528.0000 - 559ms/epoch - 913us/step
Epoch 10/50
612/612 - 0s - loss: 94893056.0000 - val_loss: 88457528.0000 - 307ms/epoch - 502us/step
Epoch 11/

In [60]:
neural_fnn = model
joblib.dump(neural_fnn , r'../Models/Amazon/Static/Dowload/fnn.pkl')

['../Models/Amazon/Static/Dowload/fnn.pkl']

Collecting tensorflowNote: you may need to restart the kernel to use updated packages.

  Downloading tensorflow-2.13.0-cp310-cp310-win_amd64.whl (1.9 kB)
Collecting tensorflow-intel==2.13.0
  Downloading tensorflow_intel-2.13.0-cp310-cp310-win_amd64.whl (276.5 MB)
     -------------------------------------- 276.5/276.5 MB 4.0 MB/s eta 0:00:00
Collecting absl-py>=1.0.0
  Downloading absl_py-1.4.0-py3-none-any.whl (126 kB)
     ---------------------------------------- 126.5/126.5 kB ? eta 0:00:00
Collecting grpcio<2.0,>=1.24.3
  Downloading grpcio-1.58.0-cp310-cp310-win_amd64.whl (4.3 MB)
     ---------------------------------------- 4.3/4.3 MB 11.0 MB/s eta 0:00:00
Collecting wrapt>=1.11.0
  Downloading wrapt-1.15.0-cp310-cp310-win_amd64.whl (36 kB)
Collecting flatbuffers>=23.1.21
  Downloading flatbuffers-23.5.26-py2.py3-none-any.whl (26 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3
  Downloading protobuf-4.24.3-cp310-abi3-win_amd64.wh


[notice] A new release of pip available: 22.3.1 -> 23.2.1
[notice] To update, run: C:\Users\IvoAg\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [6]:
import tensorflow as tf
from sklearn.preprocessing import StandardScaler


# Standardize the input data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Build and compile your MLP model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=X_train_scaled.shape[1]),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Output layer for regression
])
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model and capture the training history
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=2)

# Extract and print the training and validation loss (MSE) for each epoch
for epoch in range(len(history.history['loss'])):
    train_mse = history.history['loss'][epoch]
    val_mse = history.history['val_loss'][epoch]
    print(f"Epoch {epoch + 1}/{len(history.history['loss'])} - Train MSE: {train_mse:.4f} - Val MSE: {val_mse:.4f}")


Epoch 1/50
612/612 - 2s - loss: 133812960.0000 - val_loss: 101838968.0000 - 2s/epoch - 3ms/step
Epoch 2/50
612/612 - 1s - loss: 101663736.0000 - val_loss: 101167784.0000 - 782ms/epoch - 1ms/step
Epoch 3/50
612/612 - 1s - loss: 101283280.0000 - val_loss: 101007576.0000 - 746ms/epoch - 1ms/step
Epoch 4/50
612/612 - 1s - loss: 101186624.0000 - val_loss: 100961192.0000 - 766ms/epoch - 1ms/step
Epoch 5/50
612/612 - 1s - loss: 101155632.0000 - val_loss: 100945552.0000 - 837ms/epoch - 1ms/step
Epoch 6/50
612/612 - 1s - loss: 101144656.0000 - val_loss: 100940352.0000 - 861ms/epoch - 1ms/step
Epoch 7/50
612/612 - 1s - loss: 101140352.0000 - val_loss: 100937360.0000 - 756ms/epoch - 1ms/step
Epoch 8/50
612/612 - 1s - loss: 101138616.0000 - val_loss: 100936432.0000 - 735ms/epoch - 1ms/step
Epoch 9/50
612/612 - 1s - loss: 101137888.0000 - val_loss: 100936704.0000 - 760ms/epoch - 1ms/step
Epoch 10/50
612/612 - 1s - loss: 101137512.0000 - val_loss: 100935872.0000 - 727ms/epoch - 1ms/step
Epoch 11/50


In [11]:
neural_mlp = model
joblib.dump(neural_mlp , r'../Models/Amazon/Driving/Upload/mlp.pkl')

['../Models/Amazon/Driving/Upload/mlp.pkl']