## Importando os dados:

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, f1_score

Dicionário de Dados:
* data é a duração do intervalo que conseguimos.
* horario é o dia inteirocom marcos a cada 20 minutos.
* dia é o dia da semana, 1 para domingo e 7 para sábado.
* sensor_presença é o dado do sensor recebido naquele instante de tempo.

In [None]:
df = pd.read_csv('/content/sensor_presenca (1).csv', sep=',')
df

Unnamed: 0,data,horario,dia,sensor_presenca
0,2024-12-03,00:00,2,0
1,2024-12-03,00:20,2,0
2,2024-12-03,00:40,2,0
3,2024-12-03,01:00,2,0
4,2024-12-03,01:20,2,0
...,...,...,...,...
1651,2024-12-25,22:20,3,0
1652,2024-12-25,22:40,3,0
1653,2024-12-25,23:00,3,0
1654,2024-12-25,23:20,3,0


In [None]:
# Tratar a coluna data
df['data_hora'] = pd.to_datetime(df['data'] + ' ' + df['horario'], errors='coerce') # Unindo as colunas data e horário em uma só para ficar melhor de trabalharmos.
df['data_hora'] = pd.to_datetime(df['data_hora'], errors='coerce')                  # Transformando tudo para tipo data.

# Ordenar os dados pela coluna de data e hora
df = df.sort_values(by='data_hora').reset_index(drop=True)
df

Unnamed: 0,data,horario,dia,sensor_presenca,data_hora
0,2024-12-03,00:00,2,0,2024-12-03 00:00:00
1,2024-12-03,00:20,2,0,2024-12-03 00:20:00
2,2024-12-03,00:40,2,0,2024-12-03 00:40:00
3,2024-12-03,01:00,2,0,2024-12-03 01:00:00
4,2024-12-03,01:20,2,0,2024-12-03 01:20:00
...,...,...,...,...,...
1651,2024-12-25,22:20,3,0,2024-12-25 22:20:00
1652,2024-12-25,22:40,3,0,2024-12-25 22:40:00
1653,2024-12-25,23:00,3,0,2024-12-25 23:00:00
1654,2024-12-25,23:20,3,0,2024-12-25 23:20:00


In [None]:
# Calcular a diferença de tempo entre mudanças de estado do sensor
df['mudanca_estado'] = df['sensor_presenca'].diff().fillna(0)                         # Retorna a diferença entre o estado do sensor na linha atual em relação à linha anterior.
df

Unnamed: 0,data,horario,dia,sensor_presenca,data_hora,mudanca_estado
0,2024-12-03,00:00,2,0,2024-12-03 00:00:00,0.0
1,2024-12-03,00:20,2,0,2024-12-03 00:20:00,0.0
2,2024-12-03,00:40,2,0,2024-12-03 00:40:00,0.0
3,2024-12-03,01:00,2,0,2024-12-03 01:00:00,0.0
4,2024-12-03,01:20,2,0,2024-12-03 01:20:00,0.0
...,...,...,...,...,...,...
1651,2024-12-25,22:20,3,0,2024-12-25 22:20:00,0.0
1652,2024-12-25,22:40,3,0,2024-12-25 22:40:00,0.0
1653,2024-12-25,23:00,3,0,2024-12-25 23:00:00,0.0
1654,2024-12-25,23:20,3,0,2024-12-25 23:20:00,0.0


In [None]:
df['diferenca_tempo'] = df['data_hora'].diff().dt.total_seconds().fillna(0)           # Retorna a diferença em segundos passados de um marco para o outro
df

Unnamed: 0,data,horario,dia,sensor_presenca,data_hora,mudanca_estado,diferenca_tempo
0,2024-12-03,00:00,2,0,2024-12-03 00:00:00,0.0,0.0
1,2024-12-03,00:20,2,0,2024-12-03 00:20:00,0.0,1200.0
2,2024-12-03,00:40,2,0,2024-12-03 00:40:00,0.0,1200.0
3,2024-12-03,01:00,2,0,2024-12-03 01:00:00,0.0,1200.0
4,2024-12-03,01:20,2,0,2024-12-03 01:20:00,0.0,1200.0
...,...,...,...,...,...,...,...
1651,2024-12-25,22:20,3,0,2024-12-25 22:20:00,0.0,1200.0
1652,2024-12-25,22:40,3,0,2024-12-25 22:40:00,0.0,1200.0
1653,2024-12-25,23:00,3,0,2024-12-25 23:00:00,0.0,1200.0
1654,2024-12-25,23:20,3,0,2024-12-25 23:20:00,0.0,1200.0


In [None]:
df['tempo_ocupacao'] = np.where(df['mudanca_estado'] == 1, df['diferenca_tempo'], 0)      # Nas linhas em que o valor de df['mudanca_estado'] for 1 (o estado foi de 0 para 1), será salvo o valor de df['diferenca_tempo']
df.iloc[17:38]                                                                            # na linha correspondente em df['tempo_ocupacao']. Caso contrário, o valor será 0.

Unnamed: 0,data,horario,dia,sensor_presenca,data_hora,mudanca_estado,diferenca_tempo,tempo_ocupacao
17,2024-12-03,05:40,2,0,2024-12-03 05:40:00,0.0,1200.0,0.0
18,2024-12-03,06:00,2,0,2024-12-03 06:00:00,0.0,1200.0,0.0
19,2024-12-03,06:20,2,0,2024-12-03 06:20:00,0.0,1200.0,0.0
20,2024-12-03,06:40,2,0,2024-12-03 06:40:00,0.0,1200.0,0.0
21,2024-12-03,07:00,2,1,2024-12-03 07:00:00,1.0,1200.0,1200.0
22,2024-12-03,07:20,2,1,2024-12-03 07:20:00,0.0,1200.0,0.0
23,2024-12-03,07:40,2,1,2024-12-03 07:40:00,0.0,1200.0,0.0
24,2024-12-03,08:00,2,1,2024-12-03 08:00:00,0.0,1200.0,0.0
25,2024-12-03,08:20,2,1,2024-12-03 08:20:00,0.0,1200.0,0.0
26,2024-12-03,08:40,2,1,2024-12-03 08:40:00,0.0,1200.0,0.0


In [None]:
# Criar a coluna de ocupação - Target
df['ocupacao_futura'] = df['sensor_presenca'].shift(-1).fillna(0)    # Inverte o dataframe no período de -1, preenchendo os valores NaN com 0.
df['data_hora'] = df['data_hora'].astype('int64') // 10**9    # Na célula anterior mostra o erro que dá quando rodamos o código sem essa linha. Tivemos que transformar a data_hora pra inteiro para conseguirmos trabalhar com o modelo.

# Separando o dataset
features = df[['sensor_presenca', 'data_hora','tempo_ocupacao','dia']]
target = df['ocupacao_futura']

features

Unnamed: 0,sensor_presenca,data_hora,tempo_ocupacao,dia
0,0,1733184000,0.0,2
1,0,1733185200,0.0,2
2,0,1733186400,0.0,2
3,0,1733187600,0.0,2
4,0,1733188800,0.0,2
...,...,...,...,...
1651,0,1735165200,0.0,3
1652,0,1735166400,0.0,3
1653,0,1735167600,0.0,3
1654,0,1735168800,0.0,3


## Random Forest Classifier

In [None]:
# Dividir os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Treinar o modelo
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Fazer previsões
y_pred = model.predict(X_test)

# Avaliar o modelo
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, average='binary')
f1 = f1_score(y_test, y_pred, average='binary')

print(f'Acurácia do modelo: {accuracy:.2f}')
print(f'Recall do modelo: {recall:.2f}')
print(f'F1 Score do modelo: {f1:.2f}')


Acurácia do modelo: 0.90
Recall do modelo: 0.85
F1 Score do modelo: 0.85


* o modelo acertou 90% das classificações
* o modelo identificou corretamente apenas 85% dos casos que deveriam ser positivos, portanto não tem dificuldade em identificar todas as situações onde o espaço está ocupado.
* Com um F1 Score de 0.85, o modelo apresenta um bom equilíbrio entre precisão e recall, mas ainda há espaço para melhorar.