In [4]:
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
import pandas as pd
import psycopg2
import warnings
import joblib
import h2o
import time

warnings.simplefilter("ignore")
localH2o = h2o.init(nthreads = -1)

def get_best_models(area, quant):
    try:
        connection = psycopg2.connect(
            database="testy",
            user="postgres",
            password="SENHA",
            host="127.0.0.1",
            port="5432"
        )

        # Cria um cursor para executar consultas
        cursor = connection.cursor()

        # Executa a consulta
        query = f"""
                SELECT "model", "algorithm", "accuracy", "precision", "recall", "f1-score" FROM public.metrics
                WHERE "area" = '{area}'
                ORDER BY "f1-score" DESC LIMIT {quant};
                """
        cursor.execute(query)

        # Recupera os resultados da consulta como uma lista de tuplas
        results = cursor.fetchall()

        # Cria um DataFrame pandas com os resultados
        df_modelos = pd.DataFrame(results, columns=[desc[0] for desc in cursor.description])

        # Fecha o cursor e a conexão
        cursor.close()

    except psycopg2.Error as error:
        print("Erro ao conectar ao PostgreSQL:", error)

    finally:
        # Fecha a conexão com o banco de dados
        if 'connection' in locals():
            connection.close()
        
        return df_modelos
    
def get_prevision(row, entrada):
    modelo = row[1]
    print('C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/' + row[0])

    if modelo == "Random Forest" :
        resultado = prevRF(entrada, ('C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/' + row[0]))

    if modelo == 'AdaBoost':
        resultado = prevADA(entrada, ('C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/' + row[0]))

    if modelo == 'XGBoost':
        resultado = prevXGB(entrada, ('C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/' + row[0]))

    if modelo == 'CatBoost':
        resultado = prevCAT(entrada, ('C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/' + row[0]))

    if modelo == 'Decision Tree':
        resultado = prevDT(entrada, ('C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/' + row[0]))

    if modelo == 'GaussianNB':
        resultado = prevGNB(entrada, ('C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/' + row[0]))

    if modelo == 'Logistic Regression':
        resultado = prevLOG(entrada, ('C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/' + row[0]))
        
    if modelo == 'SVC':
        resultado = prevSVC(entrada, ('C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/' + row[0]))

    return resultado

def prevH2O(abstract):
    model = h2o.load_model('C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/DeepLearning_model_R_1670582405235_1')
    prev = model.predict(h2o.H2OFrame(abstract))
    del(model)
    return prev[0, 0]

def prevXGB(abstract, path_model):
    model, encoder, _, _ = joblib.load(path_model)
    
    prev = model.predict(abstract)

    label_encoder = LabelEncoder()
    label_encoder.fit_transform(encoder)
    
    prev = label_encoder.inverse_transform(prev)
    del(model, encoder)
    return prev[0]

def prevADA(abstract, path_model):
    model, encoder, _, _ = joblib.load(path_model)
    
    prev = model.predict(abstract)

    label_encoder = LabelEncoder()
    label_encoder.fit_transform(encoder)
    
    prev = label_encoder.inverse_transform(prev)
    del(model, encoder)
    return prev[0]

def prevCAT(abstract, path_model):
    model, _= joblib.load(path_model)
    
    prev = model.predict(abstract)
    del(model)
    return prev[0][0]

def prevDT(abstract, path_model):
    model, encoder, _, _ = joblib.load(path_model)
    
    prev = model.predict(abstract)

    label_encoder = LabelEncoder()
    label_encoder.fit_transform(encoder)
    
    prev = label_encoder.inverse_transform(prev)
    del(model, encoder)
    return prev[0]

def prevGNB(abstract, path_model):
    model, encoder, _ = joblib.load(path_model)
    
    prev = model.predict(abstract)

    label_encoder = LabelEncoder()
    label_encoder.fit_transform(encoder)
    
    prev = label_encoder.inverse_transform(prev)
    del(model, encoder)
    return prev[0]

def prevLOG(abstract, path_model):
    model, encoder, _, _ = joblib.load(path_model)
    
    prev = model.predict(abstract)

    label_encoder = LabelEncoder()
    label_encoder.fit_transform(encoder)
    
    prev = label_encoder.inverse_transform(prev)
    del(model, encoder)
    return prev[0]

def prevSVC(abstract, path_model):
    model, encoder, _, _ = joblib.load(path_model)
    
    prev = model.predict(abstract)

    label_encoder = LabelEncoder()
    label_encoder.fit_transform(encoder)
    
    prev = label_encoder.inverse_transform(prev)
    del(model, encoder)
    return prev[0]

def prevRF(abstract, path_model):
    model, _= joblib.load(path_model)
    
    prev = model.predict(abstract)
    del(model)
    return prev[0]

def prev(entrada, qnt_models = 5):
    start = time.time()
    
    area = prevH2O(entrada)
    
    models = get_best_models(area, min(qnt_models, 8))
    
    results = []
    
    for index, row in models.iterrows():
        results.append(get_prevision(row, entrada))
    

    map = {}

    for i, result in enumerate(results):
        map[result] = map.get(result, 0) + models['f1-score'][i]
        
    sub, max_value = max(map.items(), key=lambda x: x[1])

    ans = [f"""c("{sub}", "{area}")"""]
    
    ans.append(results)
    
    end = time.time()
    print(f"""tempo total para previsão {end - start}s""")
    
    return ans

path_df = 'C:/Users/milen/OneDrive/Documentos/df.100x1x100.Ocorrencias.csv'
df_id = 'eef474adc4c2d494dca53fa6b3bd8211'

df = pd.read_csv(path_df)

df.head(1)

entrada = df.tail(1)
entrada = entrada.drop(['Status'], axis=1)

entrada

prev(entrada)

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,1 hour 9 mins
H2O_cluster_timezone:,America/Sao_Paulo
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.38.0.1
H2O_cluster_version_age:,"1 year, 4 months and 5 days !!!"
H2O_cluster_name:,H2O_from_python_milen_dyd6o2
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,2.953 Gb
H2O_cluster_total_cores:,16
H2O_cluster_allowed_cores:,16


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
deeplearning prediction progress: |██████████████████████████████████████████████| (done) 100%
C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/Engenharias_rf_comp.pkl
C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/Engenharias_xgboost_comp.pkl
C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/Engenharias_svc_comp.pkl
C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/Engenharias_catboost_comp.pkl
C:/Users/milen/OneDrive/Documentos/GitHub/Chat_ibict/Progressao/static/Modelos/Engenharias_logistic_regression_comp.pkl
tempo total para previsão 33.05903148651123s


['c("ENGENHARIA DE PRODUCAO", "ENGENHARIAS")',
 ['ENGENHARIA DE PRODUCAO',
  'ENGENHARIA DE PRODUCAO',
  'ENGENHARIA DE PRODUCAO',
  'ENGENHARIA DE PRODUCAO',
  'ENGENHARIA DE PRODUCAO']]