In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib

In [10]:
X_test = pd.read_csv(r'../Training_And_Test/Amazon/X_test_DL_static.csv')
y_test = pd.read_csv(r'../Training_And_Test/Amazon/y_test_DL_static.csv')
X_train = pd.read_csv(r'../Training_And_Test/Amazon/X_train_DL_static.csv')
y_train = pd.read_csv(r'../Training_And_Test/Amazon/y_train_DL_static.csv')

### Linear Regression ###

In [3]:
from sklearn.linear_model import LinearRegression as LR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [4]:
# Create a pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Data scaling step
    ('model', LR())  # Linear Regression model step
])


In [5]:
pipeline.fit(X_train, y_train)

In [11]:
linear_model = pipeline
joblib.dump(linear_model, r'../Models/Amazon/Static/Dowload/linear.pkl')

['../Models/Amazon/Static/Dowload/linear.pkl']

### Random Forest Regressor  ###

In [7]:

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

#### Randomized Search ####

In [8]:
rf_reg = RandomForestRegressor()


param_dist = {
    'n_estimators': randint(100, 2000)  
}

random_search = RandomizedSearchCV(
    estimator=rf_reg,
    param_distributions=param_dist,
    n_iter=50,
    cv=5,
    scoring='neg_mean_squared_error',
    verbose=2
)

pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('random_search', random_search),
])

pipeline.fit(X_train, y_train)



Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV] END ...................................n_estimators=518; total time=  16.7s
[CV] END ...................................n_estimators=518; total time=  16.8s
[CV] END ...................................n_estimators=518; total time=  16.5s
[CV] END ...................................n_estimators=518; total time=  16.4s
[CV] END ...................................n_estimators=518; total time=  16.4s
[CV] END ...................................n_estimators=851; total time=  27.2s
[CV] END ...................................n_estimators=851; total time=  27.2s
[CV] END ...................................n_estimators=851; total time=  27.2s
[CV] END ...................................n_estimators=851; total time=  27.0s
[CV] END ...................................n_estimators=851; total time=  27.0s
[CV] END ...................................n_estimators=769; total time=  24.5s
[CV] END ...................................n_e

In [10]:
print("Best Parameters:", pipeline.named_steps['random_search'].best_params_)


Best Parameters: {'n_estimators': 473}


In [13]:
rf_reg_random_search = pipeline
joblib.dump(rf_reg_random_search, r'../Models/Amazon/Static/Dowload/random_forest_resgressor_random_search.pkl')

['../Models/Amazon/Static/Dowload/random_forest_resgressor_random_search.pkl']

#### Grid Search ####

In [18]:
from sklearn.model_selection import GridSearchCV


rf_reg = RandomForestRegressor()

param_grid = {
    'n_estimators': [100, 150, 200, 300, 400 ,500, 700, 1000,1200, 1500, 1800, 2000]
}

grid_search = GridSearchCV(
    estimator=rf_reg, 
    param_grid=param_grid, 
    cv=5, 
    scoring='neg_mean_squared_error',
    verbose=2
)


pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('grid_search', grid_search),
])


pipeline.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV] END ...................................n_estimators=100; total time=   3.2s
[CV] END ...................................n_estimators=100; total time=   3.2s
[CV] END ...................................n_estimators=100; total time=   3.2s
[CV] END ...................................n_estimators=100; total time=   3.1s
[CV] END ...................................n_estimators=100; total time=   3.1s
[CV] END ...................................n_estimators=150; total time=   4.7s
[CV] END ...................................n_estimators=150; total time=   4.7s
[CV] END ...................................n_estimators=150; total time=   4.7s
[CV] END ...................................n_estimators=150; total time=   4.7s
[CV] END ...................................n_estimators=150; total time=   4.7s
[CV] END ...................................n_estimators=200; total time=   6.3s
[CV] END ...................................n_es

In [19]:
print("Best Parameters:", pipeline.named_steps['grid_search'].best_params_)


Best Parameters: {'n_estimators': 1200}


In [20]:
rf_reg_grid_search = pipeline
joblib.dump(rf_reg_grid_search , r'../Models/Amazon/Static/Dowload/random_forest_resgressor_rgrid_search.pkl')

['../Models/Amazon/Static/Dowload/random_forest_resgressor_rgrid_search.pkl']

### Suporte Vector Machine ###

In [7]:
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### Grid Search ####

In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR

svm_reg = SVR()

param_grid = {
    'C': [0.01, 0.1, 1, 10, 100, 1000],           # Range for C (regularization parameter)
    'kernel': ['linear', 'rbf', 'poly'],      # Kernel types to consider
    'gamma': [1e-4, 1e-3, 1e-2],  # Range for gamma (kernel coefficient)
}


grid_search = GridSearchCV(
    estimator= svm_reg, 
    param_grid=param_grid, 
    cv=5, 
    scoring='neg_mean_squared_error',
    verbose=2,
    n_jobs=8
)


pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('grid_search', grid_search),
])


pipeline.fit(X_train, y_train['DL_bitrate'])

Fitting 5 folds for each of 54 candidates, totalling 270 fits
[CV] END ................C=0.01, gamma=0.0001, kernel=linear; total time=   9.5s
[CV] END ................C=0.01, gamma=0.0001, kernel=linear; total time=   9.5s
[CV] END ................C=0.01, gamma=0.0001, kernel=linear; total time=   9.6s
[CV] END ................C=0.01, gamma=0.0001, kernel=linear; total time=   9.7s
[CV] END ................C=0.01, gamma=0.0001, kernel=linear; total time=  10.5s
[CV] END ...................C=0.01, gamma=0.0001, kernel=rbf; total time=  12.4s
[CV] END ...................C=0.01, gamma=0.0001, kernel=rbf; total time=  12.4s
[CV] END ...................C=0.01, gamma=0.0001, kernel=rbf; total time=  12.5s
[CV] END ..................C=0.01, gamma=0.0001, kernel=poly; total time=  10.0s
[CV] END ..................C=0.01, gamma=0.0001, kernel=poly; total time=  10.0s
[CV] END ..................C=0.01, gamma=0.0001, kernel=poly; total time=  10.6s
[CV] END ...................C=0.01, gamma=0.000

In [6]:
best_params = pipeline.named_steps['grid_search'].best_params_
print("Best Parameters:", best_params)

Best Parameters: {'C': 1000, 'gamma': 0.0001, 'kernel': 'linear'}


In [8]:
svr_grid_search = pipeline
joblib.dump(svr_grid_search , r'../Models/Amazon/Static/Dowload/supoert_vector_regressor_grid_search.pkl')

['../Models/Amazon/Static/Dowload/supoert_vector_regressor_grid_search.pkl']

### Neural Network ###

#### Feedforward Neural Network (FNN) ####

In [13]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

2023-08-24 13:29:15.101120: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-24 13:29:15.102565: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-24 13:29:15.125654: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-24 13:29:15.126865: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [17]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Build and compile your model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=X_train_scaled.shape[1]),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Output layer for regression
])
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model and capture the training history
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=2)

# Extract and print the training and validation loss (MSE) for each epoch
for epoch in range(len(history.history['loss'])):
    train_mse = history.history['loss'][epoch]
    val_mse = history.history['val_loss'][epoch]
    print(f"Epoch {epoch + 1}/{len(history.history['loss'])} - Train MSE: {train_mse:.4f} - Val MSE: {val_mse:.4f}")


Epoch 1/50
612/612 - 1s - loss: 227460384.0000 - val_loss: 175857984.0000 - 863ms/epoch - 1ms/step
Epoch 2/50
612/612 - 1s - loss: 130192280.0000 - val_loss: 97950232.0000 - 602ms/epoch - 983us/step
Epoch 3/50
612/612 - 0s - loss: 100313224.0000 - val_loss: 91097944.0000 - 312ms/epoch - 509us/step
Epoch 4/50
612/612 - 1s - loss: 96867872.0000 - val_loss: 89718296.0000 - 526ms/epoch - 859us/step
Epoch 5/50
612/612 - 0s - loss: 95953672.0000 - val_loss: 89191240.0000 - 358ms/epoch - 586us/step
Epoch 6/50
612/612 - 1s - loss: 95501088.0000 - val_loss: 88920432.0000 - 646ms/epoch - 1ms/step
Epoch 7/50
612/612 - 1s - loss: 95241560.0000 - val_loss: 88690112.0000 - 553ms/epoch - 904us/step
Epoch 8/50
612/612 - 0s - loss: 95062904.0000 - val_loss: 88583416.0000 - 409ms/epoch - 668us/step
Epoch 9/50
612/612 - 1s - loss: 94959784.0000 - val_loss: 88494528.0000 - 559ms/epoch - 913us/step
Epoch 10/50
612/612 - 0s - loss: 94893056.0000 - val_loss: 88457528.0000 - 307ms/epoch - 502us/step
Epoch 11/

In [18]:
neural_fnn = model
joblib.dump(neural_fnn , r'../Models/Amazon/Static/Dowload/fnn.pkl')

['../Models/Amazon/Static/Dowload/fnn.pkl']