In [93]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

df = pd.read_csv('time_series_data.csv')

## Prophet

In [94]:
def create_prophet_model(df, column_name):
    # Rename the columns as 'ds' and 'y' for Prophet
    print(df.columns)
    df = df[['ds', column_name]].rename(columns={'ds': 'ds', column_name: 'y'})
    print(df.columns)

    # Initialize and fit the Prophet model
    model = Prophet()
    model.fit(df)

    return model

In [95]:
# Function to evaluate and print metrics for each model
def evaluate_prophet_models(test_df, models):
    for column, model in models.items():
        future = model.make_future_dataframe(periods=len(test_df))  # Forecast for the length of the test set
        forecast = model.predict(future)
        
        y_true = test_df[column]
        y_pred = forecast['yhat'][-len(test_df):]  # Get predictions for the test set period

        mae = mean_absolute_error(y_true, y_pred)
        mape = mean_absolute_percentage_error(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))

        print(f"Column: {column}")
        print(f"Mean Absolute Error (MAE): {mae}")
        print(f"Mean Absolute Percentage Error (MAPE): {mape * 100:.2f}%")
        print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
        print("\n")

In [96]:
prophet_df = df.copy()
prophet_models = {}

# Split the data into training and test sets
train_df, test_df = train_test_split(prophet_df, test_size=0.2, shuffle=False)  # Adjust the test_size as needed

# Iterate through the columns in the DataFrame
for column in df.columns:
    if column not in ['ds', 'failure', 'hour']:
        model = create_prophet_model(train_df, column)
        prophet_models[column] = model

16:11:47 - cmdstanpy - INFO - Chain [1] start processing
16:11:47 - cmdstanpy - INFO - Chain [1] done processing
16:11:47 - cmdstanpy - INFO - Chain [1] start processing
16:11:47 - cmdstanpy - INFO - Chain [1] done processing
16:11:47 - cmdstanpy - INFO - Chain [1] start processing


Index(['ds', 'hour', 'failure', 'bleedFavTmFbk-1b', 'bleedPrsovClPosStatus-1a',
       'bleedHprsovOpPosStatus-1b', 'bleedHprsovOpPosStatus-2a',
       'bleedOnStatus-1b'],
      dtype='object')
Index(['ds', 'y'], dtype='object')
Index(['ds', 'hour', 'failure', 'bleedFavTmFbk-1b', 'bleedPrsovClPosStatus-1a',
       'bleedHprsovOpPosStatus-1b', 'bleedHprsovOpPosStatus-2a',
       'bleedOnStatus-1b'],
      dtype='object')
Index(['ds', 'y'], dtype='object')
Index(['ds', 'hour', 'failure', 'bleedFavTmFbk-1b', 'bleedPrsovClPosStatus-1a',
       'bleedHprsovOpPosStatus-1b', 'bleedHprsovOpPosStatus-2a',
       'bleedOnStatus-1b'],
      dtype='object')
Index(['ds', 'y'], dtype='object')


16:11:48 - cmdstanpy - INFO - Chain [1] done processing
16:11:48 - cmdstanpy - INFO - Chain [1] start processing
16:11:48 - cmdstanpy - INFO - Chain [1] done processing
16:11:48 - cmdstanpy - INFO - Chain [1] start processing
16:11:48 - cmdstanpy - INFO - Chain [1] done processing


Index(['ds', 'hour', 'failure', 'bleedFavTmFbk-1b', 'bleedPrsovClPosStatus-1a',
       'bleedHprsovOpPosStatus-1b', 'bleedHprsovOpPosStatus-2a',
       'bleedOnStatus-1b'],
      dtype='object')
Index(['ds', 'y'], dtype='object')
Index(['ds', 'hour', 'failure', 'bleedFavTmFbk-1b', 'bleedPrsovClPosStatus-1a',
       'bleedHprsovOpPosStatus-1b', 'bleedHprsovOpPosStatus-2a',
       'bleedOnStatus-1b'],
      dtype='object')
Index(['ds', 'y'], dtype='object')


In [97]:
evaluate_prophet_models(test_df, prophet_models)

Column: bleedFavTmFbk-1b
Mean Absolute Error (MAE): 0.38394719252810117
Mean Absolute Percentage Error (MAPE): 12163.62%
Root Mean Squared Error (RMSE): 0.40


Column: bleedPrsovClPosStatus-1a
Mean Absolute Error (MAE): 0.23985586395789515
Mean Absolute Percentage Error (MAPE): 14719566127678746.00%
Root Mean Squared Error (RMSE): 0.28


Column: bleedHprsovOpPosStatus-1b
Mean Absolute Error (MAE): 0.31450257461292697
Mean Absolute Percentage Error (MAPE): 8490252914530495.00%
Root Mean Squared Error (RMSE): 0.37


Column: bleedHprsovOpPosStatus-2a
Mean Absolute Error (MAE): 0.29564799381914036
Mean Absolute Percentage Error (MAPE): 15609047287048710.00%
Root Mean Squared Error (RMSE): 0.35


Column: bleedOnStatus-1b
Mean Absolute Error (MAE): 0.2355954859845281
Mean Absolute Percentage Error (MAPE): 9819424402249172.00%
Root Mean Squared Error (RMSE): 0.27




## Pycaret | classificador

In [99]:
from pycaret.classification import *
classifier_df = pd.read_csv('classifier_data.csv')
s = setup(classifier_df, target = 'failure', session_id = 123)
best_classifier = compare_models()

Unnamed: 0,Description,Value
0,Session id,123
1,Target,failure
2,Target type,Binary
3,Original data shape,"(3124, 6)"
4,Transformed data shape,"(3124, 6)"
5,Transformed train set shape,"(2186, 6)"
6,Transformed test set shape,"(938, 6)"
7,Numeric features,5
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9538,0.9809,0.9671,0.9424,0.9544,0.9076,0.9081,0.065
rf,Random Forest Classifier,0.9478,0.9826,0.9643,0.9339,0.9487,0.8957,0.8964,0.071
xgboost,Extreme Gradient Boosting,0.9428,0.9837,0.9579,0.93,0.9437,0.8856,0.8862,0.039
dt,Decision Tree Classifier,0.9286,0.9286,0.9451,0.9154,0.9298,0.8572,0.8581,0.012
gbc,Gradient Boosting Classifier,0.9204,0.9773,0.9287,0.9142,0.9211,0.8408,0.8415,0.055
knn,K Neighbors Classifier,0.9067,0.9612,0.9369,0.8841,0.9096,0.8133,0.8151,0.142
ada,Ada Boost Classifier,0.8339,0.9335,0.807,0.8534,0.8292,0.6678,0.6693,0.035
qda,Quadratic Discriminant Analysis,0.5851,0.6459,0.8809,0.5855,0.675,0.1702,0.2381,0.01
lda,Linear Discriminant Analysis,0.5837,0.6458,0.8087,0.5588,0.6597,0.1675,0.189,0.01
ridge,Ridge Classifier,0.5832,0.0,0.8243,0.5564,0.664,0.1666,0.1909,0.009


Processing:   0%|          | 0/69 [00:00<?, ?it/s]

In [None]:
model = create_model("et")  
save_model(model, "classifier.pkl")

## Pycaret | regressão por coluna

In [None]:

time_series_df = pd.read_csv('time_series_data.csv')

from pycaret.time_series import *

# Initialize a dictionary to store time series models for each column
time_series_models = {}

# Loop through each column in df except the "hour" column
for column in time_series_df.columns:
    if column not in ['ds', 'failure', 'hour']:
        # Subset the data for the current column
        column_data = time_series_df[["hour", column]]

        # Initialize PyCaret time series setup for the current column
        setup_id = f"setup_{column}"
        s = setup(column_data, session_id=123, target=column, fh=50)

        # Create a time series model for the current column
        best = compare_models()
        # Store the model in the dictionary with the column name as the key
        time_series_models[column] = best

# You can now access each time series model using column names as keys in time_series_models dictionary
for column, model in time_series_models.items():
    print(f"Time Series Model for Column '{column}':")
    print("===================================")
    print(model)

## Pycaret | regressão completa para target

In [None]:
time_series_df = pd.read_csv('time_series_data.csv')
time_series_df.drop('ds', inplace=True, axis=1)
from pycaret.time_series import *

s = setup(time_series_df, session_id=123, target='failure', fh=50)

# Create a time series model for the current column
best_time_series = compare_models()

In [None]:
save_model(best_time_series, "time_series.pkl")