In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Carregar o dataframe inicial
User = pd.read_excel('EVIO_history_01-02-2023_29-02-2024.xlsx')
User = User[['Start date','Stop date','Duration (min)', 'Total Energy (kWh)', 'Nº cartão EVIO']]

# Remover valores de carregamento de energia inferiores a 1 kWh
User = User[User['Total Energy (kWh)'] >= 1]


# Remover valores de duração de carregamento inferiores a 5 minutos
User = User[User['Duration (min)'] >= 5]

#User = User[['Start date','Stop date','Duration (min)', 'Total Energy (kWh)']]

# Convertendo as colunas de data para o formato de data especificado
User['Start date'] = pd.to_datetime(User['Start date'], format='%m/%d/%Y | %H:%M')
User['Stop date'] = pd.to_datetime(User['Stop date'], format='%m/%d/%Y | %H:%M')

# Criando uma nova coluna 'Weekday' que contém o dia da semana
User['Weekday'] = User['Start date'].dt.day_name()

weekday_mapping = {
    'Monday': 1,
    'Tuesday': 2,
    'Wednesday': 3,
    'Thursday': 4,
    'Friday': 5,
    'Saturday': 6,
    'Sunday': 7
}

User['Weekday'] = User['Weekday'].map(weekday_mapping)

# Criar colunas para cada hora do dia (8h às 20h)
hours = range(8, 21)
for hour in hours:
    User[f'Charging_{hour}h'] = 0

# Preencher as colunas com valores binários (1 ou 0)
for index, row in User.iterrows():
    start_hour = row['Start date'].hour
    stop_hour = row['Stop date'].hour
    for hour in range(8, 21):
        if hour >= start_hour and hour <= stop_hour:
            User.at[index, f'Charging_{hour}h'] = 1

# Ordenar o dataframe pela coluna 'Start date' para garantir que os dados estejam em ordem temporal
User.sort_values(by='Start date', inplace=True)

# Calcular a diferença entre o stop date do último carregamento e o start date da linha seguinte em dias
User['Days_since_last_charge'] = (User['Start date'] - User['Stop date'].shift(1)).dt.days
User['Days_since_last_charge'].fillna(0, inplace=True)

# Calcular a diferença entre o 'Start date' da linha atual e o 'Stop date' da linha anterior em horas
User['Hours_since_last_charge'] = (User['Start date'] - User['Stop date'].shift(1)).dt.total_seconds() / 3600
User['Hours_since_last_charge'].fillna(0, inplace=True)

# Gerando um índice de datas para o intervalo específico
start_date = "2023-02-14"
end_date = "2024-02-29"
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
full_dates_df = pd.DataFrame(date_range, columns=['Date'])

# Converter a coluna 'Start date' do User para o mesmo formato de data
User['Date'] = User['Start date'].dt.normalize()

# Unir este índice ao dataset existente
full_dataset = full_dates_df.merge(User, on='Date', how='left')

# Preencher campos com zero para dias sem carregamento
columns_to_fill = ['Duration (min)', 'Total Energy (kWh)', 'Weekday', 'Charging_8h', 'Charging_9h', 
                   'Charging_10h', 'Charging_11h', 'Charging_12h', 'Charging_13h', 'Charging_14h', 
                   'Charging_15h', 'Charging_16h', 'Charging_17h', 'Charging_18h', 'Charging_19h', 
                   'Charging_20h', 'Days_since_last_charge', 'Hours_since_last_charge']

full_dataset[columns_to_fill] = full_dataset[columns_to_fill].fillna(0)

# Corrigir a coluna Weekday
full_dataset['Weekday'] = full_dataset['Date'].dt.day_name().map(weekday_mapping)

# Preencher as colunas 'Days_since_last_charge' e 'Hours_since_last_charge' corretamente
for i in range(1, len(full_dataset)):
    if full_dataset.loc[i, 'Total Energy (kWh)'] == 0:
        full_dataset.loc[i, 'Days_since_last_charge'] = full_dataset.loc[i-1, 'Days_since_last_charge'] + 1
        full_dataset.loc[i, 'Hours_since_last_charge'] = full_dataset.loc[i-1, 'Hours_since_last_charge'] + 24
    else:
        full_dataset.loc[i, 'Days_since_last_charge'] = 0
        full_dataset.loc[i, 'Hours_since_last_charge'] = 0

# Verificar se há pelo menos dois valores diferentes em cada coluna de destino
for hour in hours:
    col_name = f'Charging_{hour}h'
    if full_dataset[col_name].nunique() < 2:
        print(f"A coluna {col_name} não contém pelo menos duas classes distintas. Removendo esta coluna.")
        full_dataset.drop(columns=[col_name], inplace=True)

# Verificar se ainda temos colunas de destino após a remoção
target_columns = [f'Charging_{hour}h' for hour in hours if f'Charging_{hour}h' in full_dataset.columns]
if not target_columns:
    raise ValueError("Não há colunas de destino com pelo menos duas classes distintas. Não é possível treinar o modelo.")
    
print(full_dataset)

           Date          Start date           Stop date  Duration (min)  \
0    2023-02-14 2023-02-14 08:38:00 2023-02-14 12:48:00         250.700   
1    2023-02-14 2023-02-14 08:40:00 2023-02-14 10:00:00          79.867   
2    2023-02-14 2023-02-14 08:45:00 2023-02-14 18:58:00         613.733   
3    2023-02-14 2023-02-14 08:46:00 2023-02-14 16:07:00         440.783   
4    2023-02-14 2023-02-14 08:55:00 2023-02-14 13:09:00         254.583   
...         ...                 ...                 ...             ...   
1646 2024-02-29 2024-02-29 08:57:00 2024-02-29 10:11:00          74.783   
1647 2024-02-29 2024-02-29 09:02:00 2024-02-29 14:55:00         352.383   
1648 2024-02-29 2024-02-29 09:42:00 2024-02-29 10:12:00          30.283   
1649 2024-02-29 2024-02-29 10:44:00 2024-02-29 11:24:00          39.483   
1650 2024-02-29 2024-02-29 14:17:00 2024-02-29 16:12:00         114.433   

      Total Energy (kWh) Nº cartão EVIO  Weekday  Charging_8h  Charging_9h  \
0                 10.

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Carregar o dataframe inicial
User = pd.read_excel('EVIO_history_01-02-2023_29-02-2024.xlsx')
User = User[['Start date', 'Stop date', 'Duration (min)', 'Total Energy (kWh)', 'Nº cartão EVIO']]

# Remover valores de carregamento de energia inferiores a 1 kWh
User = User[User['Total Energy (kWh)'] >= 1]

# Remover valores de duração de carregamento inferiores a 5 minutos
User = User[User['Duration (min)'] >= 5]

# Convertendo as colunas de data para o formato de data especificado
User['Start date'] = pd.to_datetime(User['Start date'], format='%m/%d/%Y | %H:%M')
User['Stop date'] = pd.to_datetime(User['Stop date'], format='%m/%d/%Y | %H:%M')

# Criando uma nova coluna 'Weekday' que contém o dia da semana
User['Weekday'] = User['Start date'].dt.day_name()

weekday_mapping = {
    'Monday': 1,
    'Tuesday': 2,
    'Wednesday': 3,
    'Thursday': 4,
    'Friday': 5,
    'Saturday': 6,
    'Sunday': 7
}

User['Weekday'] = User['Weekday'].map(weekday_mapping)

# Criar colunas para cada hora do dia (8h às 20h)
hours = range(8, 21)
for hour in hours:
    User[f'Charging_{hour}h'] = 0

# Preencher as colunas com valores binários (1 ou 0)
for index, row in User.iterrows():
    start_hour = row['Start date'].hour
    stop_hour = row['Stop date'].hour
    for hour in range(8, 21):
        if hour >= start_hour and hour <= stop_hour:
            User.at[index, f'Charging_{hour}h'] = 1

# Ordenar o dataframe pela coluna 'Start date' para garantir que os dados estejam em ordem temporal
User.sort_values(by='Start date', inplace=True)

# Calcular a diferença entre o stop date do último carregamento e o start date da linha seguinte em dias
User['Days_since_last_charge'] = (User['Start date'] - User['Stop date'].shift(1)).dt.days
User['Days_since_last_charge'].fillna(0, inplace=True)

# Calcular a diferença entre o 'Start date' da linha atual e o 'Stop date' da linha anterior em horas
User['Hours_since_last_charge'] = (User['Start date'] - User['Stop date'].shift(1)).dt.total_seconds() / 3600
User['Hours_since_last_charge'].fillna(0, inplace=True)

# Converter a coluna 'Start date' para o formato de data para agrupar por dia
User['Date'] = User['Start date'].dt.normalize()

# Gerar um índice de datas para o intervalo específico
start_date = "2023-02-14"
end_date = "2024-02-29"
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
full_dates_df = pd.DataFrame(date_range, columns=['Date'])

# Função para criar colunas dinâmicas de carregamento por dia
def create_dynamic_columns(df):
    max_charges_per_day = df.groupby('Date').size().max()
    dynamic_columns = []
    for i in range(max_charges_per_day):
        for hour in hours:
            dynamic_columns.append(f'Charging_{hour}h_{i+1}')
    return dynamic_columns

# Agrupar por dia e criar colunas dinâmicas
dynamic_columns = create_dynamic_columns(User)
daily_data = pd.DataFrame(columns=['Date', 'Weekday'] + dynamic_columns)

for date, group in User.groupby('Date'):
    new_row = {'Date': date, 'Weekday': group['Weekday'].iloc[0]}
    for i, (_, row) in enumerate(group.iterrows()):
        for hour in hours:
            new_row[f'Charging_{hour}h_{i+1}'] = row[f'Charging_{hour}h']
    daily_data = daily_data.append(new_row, ignore_index=True)

# Preencher campos NaN com 0
daily_data.fillna(0, inplace=True)

# Gerar o conjunto de dados final
full_dataset = full_dates_df.merge(daily_data, on='Date', how='left')
full_dataset.fillna(0, inplace=True)

print(full_dataset)

# A partir daqui, pode-se usar `full_dataset` para treinamento do modelo, conforme mostrado anteriormente
X = full_dataset[['Weekday'] + [f'Hours_since_last_charge_{i+1}' for i in range(1, len(dynamic_columns)//len(hours) + 1)]].values
y = full_dataset[[f'Charging_{hour}h_{i+1}' for i in range(1, len(dynamic_columns)//len(hours) + 1) for hour in hours]]

# Dividir os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Treinar o modelo de regressão logística multinomial
model = MultiOutputClassifier(LogisticRegression(max_iter=1000))
model.fit(X_train, y_train)

# Fazer previsões para o conjunto de teste
predictions = model.predict(X_test)

# Avaliar a precisão do modelo comparando as previsões com y_test
accuracy = accuracy_score(y_test.values.ravel(), predictions.ravel())
print("Acurácia do modelo:", accuracy)

# Criar uma tabela de previsões com 0s e 1s
predictions_table = pd.DataFrame(predictions, columns=[f'Charging_{hour}h_{i+1}' for i in range(1, len(dynamic_columns)//len(hours) + 1) for hour in hours])
predictions_table.replace({0: 'Não Carregar', 1: 'Carregar'}, inplace=True)

# Exibir as previsões
print("Previsões:")
print(predictions_table)

# Calcular a matriz de confusão geral
overall_confusion_matrix = confusion_matrix(y_test.values.ravel(), predictions.ravel())

# Extrair os verdadeiros positivos e verdadeiros negativos da matriz de confusão geral
overall_true_positives = overall_confusion_matrix[1, 1]
overall_true_negatives = overall_confusion_matrix[0, 0]

# Calcular o total de exemplos positivos e negativos
total_positives = overall_confusion_matrix[:, 1].sum()  # Soma da segunda coluna
total_negatives = overall_confusion_matrix[:, 0].sum()  # Soma da primeira coluna

# Calcular as percentagens de verdadeiros positivos e verdadeiros negativos
percentage_true_positives = overall_true_positives / total_positives * 100 if total_positives > 0 else 0
percentage_true_negatives = overall_true_negatives / total_negatives * 100 if total_negatives > 0 else 0

# Imprimir os resultados
print(f'Percentagem de True Positives: {percentage_true_positives:.2f}%')
print(f'Percentagem de True Negatives: {percentage_true_negatives:.2f}%')


  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_dat

  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_dat

  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_dat

  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_dat

  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_dat

  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_dat

  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_dat

  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_dat

  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)
  daily_data = daily_data.append(new_row, ignore_index=True)


          Date  Weekday  Charging_8h_1  Charging_9h_1  Charging_10h_1  \
0   2023-02-14      2.0            1.0            1.0             1.0   
1   2023-02-15      3.0            1.0            1.0             1.0   
2   2023-02-16      4.0            1.0            1.0             1.0   
3   2023-02-17      5.0            1.0            1.0             1.0   
4   2023-02-18      0.0            0.0            0.0             0.0   
..         ...      ...            ...            ...             ...   
376 2024-02-25      7.0            0.0            0.0             1.0   
377 2024-02-26      1.0            1.0            1.0             1.0   
378 2024-02-27      2.0            1.0            1.0             1.0   
379 2024-02-28      3.0            1.0            1.0             1.0   
380 2024-02-29      4.0            1.0            1.0             1.0   

     Charging_11h_1  Charging_12h_1  Charging_13h_1  Charging_14h_1  \
0               1.0             1.0             0.0 

KeyError: "['Hours_since_last_charge_2', 'Hours_since_last_charge_3', 'Hours_since_last_charge_4', 'Hours_since_last_charge_5', 'Hours_since_last_charge_6', 'Hours_since_last_charge_7', 'Hours_since_last_charge_8', 'Hours_since_last_charge_9', 'Hours_since_last_charge_10', 'Hours_since_last_charge_11', 'Hours_since_last_charge_12', 'Hours_since_last_charge_13', 'Hours_since_last_charge_14', 'Hours_since_last_charge_15'] not in index"

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Supondo que o dataframe full_dataset já exista

# Agregar os dados por dia, somando os valores de carregamento
daily_data = full_dataset.groupby('Date')[[f'Charging_{hour}h' for hour in range(8, 21)]].sum().reset_index()

# Usar 'Weekday' e 'Hours_since_last_charge' como variáveis de entrada
daily_data['Weekday'] = daily_data['Date'].dt.day_name().map({
    'Monday': 1,
    'Tuesday': 2,
    'Wednesday': 3,
    'Thursday': 4,
    'Friday': 5,
    'Saturday': 6,
    'Sunday': 7
})

# Calcular 'Hours_since_last_charge' como a média das horas desde o último carregamento dos carregamentos do dia
daily_data['Hours_since_last_charge'] = full_dataset.groupby('Date')['Hours_since_last_charge'].mean().reset_index(drop=True)

# Definir X e y
X = daily_data[['Weekday', 'Hours_since_last_charge']].values
y = daily_data[[f'Charging_{hour}h' for hour in range(8, 21)]].values

# Dividir os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Treinar o modelo de regressão logística multinomial
model = MultiOutputClassifier(LogisticRegression(max_iter=1000))
model.fit(X_train, y_train)

# Fazer previsões para o conjunto de teste
predictions = model.predict(X_test)

# Avaliar a precisão do modelo comparando as previsões com y_test
accuracy = accuracy_score(y_test.ravel(), predictions.ravel())
print("Acurácia do modelo:", accuracy)

# Criar uma tabela de previsões com 0s e 1s
predictions_table = pd.DataFrame(predictions, columns=[f'Charging_{hour}h' for hour in range(8, 21)])
predictions_table.replace({0: 'Não Carregar', 1: 'Carregar'}, inplace=True)

# Exibir as previsões
print("Previsões:")
print(predictions_table)

# Calcular a matriz de confusão geral
overall_confusion_matrix = confusion_matrix(y_test.ravel(), predictions.ravel())

# Extrair os verdadeiros positivos e verdadeiros negativos da matriz de confusão geral
overall_true_positives = overall_confusion_matrix[1, 1]
overall_true_negatives = overall_confusion_matrix[0, 0]

# Calcular o total de exemplos positivos e negativos
total_positives = overall_confusion_matrix[:, 1].sum()  # Soma da segunda coluna
total_negatives = overall_confusion_matrix[:, 0].sum()  # Soma da primeira coluna

# Calcular as percentagens de verdadeiros positivos e verdadeiros negativos
percentage_true_positives = overall_true_positives / total_positives * 100 if total_positives > 0 else 0
percentage_true_negatives = overall_true_negatives / total_negatives * 100 if total_negatives > 0 else 0

# Imprimir os resultados
print(f'Percentagem de True Positives: {percentage_true_positives:.2f}%')
print(f'Percentagem de True Negatives: {percentage_true_negatives:.2f}%')


Acurácia do modelo: 0.5514485514485514
Previsões:
     Charging_8h   Charging_9h  Charging_10h  Charging_11h  Charging_12h  \
0   Não Carregar  Não Carregar           3.0      Carregar      Carregar   
1   Não Carregar  Não Carregar  Não Carregar  Não Carregar  Não Carregar   
2   Não Carregar  Não Carregar  Não Carregar  Não Carregar  Não Carregar   
3            2.0           2.0           2.0           2.0           2.0   
4            2.0           3.0           3.0           2.0           2.0   
..           ...           ...           ...           ...           ...   
72  Não Carregar  Não Carregar  Não Carregar  Não Carregar  Não Carregar   
73           2.0           2.0           2.0           2.0           2.0   
74           2.0           3.0           3.0           2.0           2.0   
75           2.0           3.0           3.0           2.0           2.0   
76           2.0           3.0           3.0           2.0           2.0   

    Charging_13h  Charging_14h  Charg

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Supondo que o dataframe full_dataset já exista

# Usar 'Weekday' e 'Hours_since_last_charge' como variáveis de entrada
X = full_dataset[['Weekday', 'Hours_since_last_charge']].values
y = full_dataset[[f'Charging_{hour}h' for hour in range(8, 21)]].values

# Dividir os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Treinar o modelo de regressão logística multinomial
model = MultiOutputClassifier(LogisticRegression(max_iter=1000))
model.fit(X_train, y_train)

# Fazer previsões para o conjunto de teste
predictions = model.predict(X_test)

# Avaliar a precisão do modelo comparando as previsões com y_test
accuracy = accuracy_score(y_test.ravel(), predictions.ravel())
print("Acurácia do modelo:", accuracy)

# Criar uma tabela de previsões com 0s e 1s
predictions_table = pd.DataFrame(predictions, columns=[f'Charging_{hour}h' for hour in range(8, 21)])
predictions_table.replace({0: 'Não Carregar', 1: 'Carregar'}, inplace=True)

# Exibir as previsões
print("Previsões:")
print(predictions_table)

# Calcular a matriz de confusão geral
overall_confusion_matrix = confusion_matrix(y_test.ravel(), predictions.ravel())

# Extrair os verdadeiros positivos e verdadeiros negativos da matriz de confusão geral
overall_true_positives = overall_confusion_matrix[1, 1]
overall_true_negatives = overall_confusion_matrix[0, 0]

# Calcular o total de exemplos positivos e negativos
total_positives = overall_confusion_matrix[:, 1].sum()  # Soma da segunda coluna
total_negatives = overall_confusion_matrix[:, 0].sum()  # Soma da primeira coluna

# Calcular as percentagens de verdadeiros positivos e verdadeiros negativos
percentage_true_positives = overall_true_positives / total_positives * 100 if total_positives > 0 else 0
percentage_true_negatives = overall_true_negatives / total_negatives * 100 if total_negatives > 0 else 0

# Imprimir os resultados
print(f'Percentagem de True Positives: {percentage_true_positives:.2f}%')
print(f'Percentagem de True Negatives: {percentage_true_negatives:.2f}%')


Acurácia do modelo: 0.7062514524750174
Previsões:
      Charging_8h Charging_9h Charging_10h Charging_11h  Charging_12h  \
0        Carregar    Carregar     Carregar     Carregar  Não Carregar   
1        Carregar    Carregar     Carregar     Carregar  Não Carregar   
2        Carregar    Carregar     Carregar     Carregar  Não Carregar   
3        Carregar    Carregar     Carregar     Carregar  Não Carregar   
4        Carregar    Carregar     Carregar     Carregar  Não Carregar   
..            ...         ...          ...          ...           ...   
326  Não Carregar    Carregar     Carregar     Carregar  Não Carregar   
327  Não Carregar    Carregar     Carregar     Carregar  Não Carregar   
328  Não Carregar    Carregar     Carregar     Carregar  Não Carregar   
329  Não Carregar    Carregar     Carregar     Carregar  Não Carregar   
330  Não Carregar    Carregar     Carregar     Carregar  Não Carregar   

     Charging_13h  Charging_14h  Charging_15h  Charging_16h  Charging_17h