# Script

In [1]:
import numpy as np
import pandas as pd
import pymannkendall as mk

- [Time Series (Séries Temporais) com Python](https://maxwellpaparelli.medium.com/time-series-s%C3%A9ries-temporais-com-python-f4e74fd45b0b)
- [O teste de Mann-Kendall](https://medium.com/@duarte.jr105/o-teste-de-mann-kendall-28ff71e731c6)
- [Análise de tendência em série temporais](https://ivanildo-batista13.medium.com/an%C3%A1lise-de-tend%C3%AAncia-em-s%C3%A9rie-temporais-aa81c84354e0)

## ICUp5FBDiario

In [2]:
df_fluid_balance = pd.read_csv('./data/input/ICUp5FBDiario.csv')
df_fluid_balance.head(3)

Unnamed: 0,subject_id,day,start_time,BHDiario
0,19031918,1,2116-08-06 09:22:31.000000 UTC,151984.7813
1,10750406,1,2178-01-11 16:31:55.000000 UTC,102043.4016
2,10780878,1,2114-11-12 18:27:05.000000 UTC,8067.1615


In [3]:
df_fluid_balance.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62572 entries, 0 to 62571
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   subject_id  62572 non-null  int64  
 1   day         62572 non-null  int64  
 2   start_time  62572 non-null  object 
 3   BHDiario    62572 non-null  float64
dtypes: float64(1), int64(2), object(1)
memory usage: 1.9+ MB


In [4]:
data_types = {
    'subject_id': 'int64',
    'day': 'int64',
    'start_time': 'datetime',
    'BHDiario': 'float64',
}

In [5]:
correct_format = '%Y-%m-%d %H:%M:%S.%f UTC'

for col, dtype in data_types.items():
    if dtype == 'datetime':
        df_fluid_balance[col] = pd.to_datetime(df_fluid_balance[col], format=correct_format)
    else:
        df_fluid_balance[col] = df_fluid_balance[col].astype(dtype)
        
df_fluid_balance = df_fluid_balance[data_types.keys()]
        
df_fluid_balance.head(1)

Unnamed: 0,subject_id,day,start_time,BHDiario
0,19031918,1,2116-08-06 09:22:31,151984.7813


In [6]:
df_fluid_balance.sort_values('BHDiario', ascending=False).head(3)

Unnamed: 0,subject_id,day,start_time,BHDiario
650,13637699,1,2188-08-24 17:55:42,1587010.0
520,19246436,1,2185-01-14 04:25:44,1320920.0
5807,11536589,1,2116-05-21 00:57:28,1280959.0


## ICUpatients5

In [7]:
df_patients = pd.read_csv('./data/input/ICUpatients5.csv')
df_patients.head(3)

Unnamed: 0,subject_id,hadm_id,stay_id,gender,anchor_age,admittime,intime,outtime,deathtime,intervaloAdmIntUTI,tempoUTI,race,pesoadm,unidpesoadm,altura,unidaltura,imc
0,12088486,29875103,39348271,M,18,2135-02-07 02:57:00 UTC,2135-02-07 03:03:00 UTC,2135-02-12 00:25:00 UTC,2135-02-11 00:01:00 UTC,0,117,UNABLE TO OBTAIN,71.8,kg,178.0,cm,-99.9
1,19850244,27972658,32398411,F,18,2162-06-04 17:38:00 UTC,2162-06-05 15:25:42 UTC,2162-06-09 20:22:03 UTC,,21,100,OTHER,57.7,kg,163.0,cm,-99.9
2,10562205,20149612,34317198,M,18,2149-04-29 23:19:00 UTC,2149-04-30 00:58:00 UTC,2149-05-09 20:54:46 UTC,,1,235,WHITE,98.0,kg,180.0,cm,-99.9


In [8]:
data_types = {
    'subject_id': 'int64',
    'intime': 'datetime',
    'outtime': 'datetime',
}

In [9]:
correct_format = '%Y-%m-%d %H:%M:%S UTC'

for col, dtype in data_types.items():
    if dtype == 'datetime':
        df_patients[col] = pd.to_datetime(df_patients[col], format=correct_format)
    else:
        df_patients[col] = df_patients[col].astype(dtype)
        
df_patients = df_patients[data_types.keys()]

In [10]:
df_patients.loc[:, 'total_rate'] = 0
df_patients.loc[:, 'BHTendencia'] = 0
df_patients.loc[:, 'BHCorte'] = 0

In [11]:
def get_total_rate(row: pd.Series) -> int:
    subject_id = row.subject_id
    
    return df_fluid_balance[df_fluid_balance.subject_id == subject_id].BHDiario.sum()

def get_tendency(row: pd.Series) -> int:
    """
    - BHTendência: avaliar a curva de tendência ao longo do período de internamento – usar algoritmo de análise de tendência em série temporal para avaliar a mudança na soma diária (em valor). 
    Valores possíveis: 
        0 – Não possui quantidade mínima de anotações que possibilite identificar uma tendência (mínimo de três);
        1 – Não varia o sinal e manteve-se negativo em todo o tempo;
        2 - Não varia o sinal e manteve-se positivo em todo o tempo;
        3 - Não varia o sinal e manteve-se 0 em todo o tempo;
        4 - Varia o sinal e a tendência é descendente;
        5 - Varia o sinal e a tendência é ascendente;
        6 - Varia o sinal e não possui tendência;
    """
    
    subject_id = row.subject_id
    df_temp = df_fluid_balance[df_fluid_balance.subject_id == subject_id].sort_values('day')
    
    if df_temp.shape[0] < 3:
        return 0
    
    # Todos os valores são iguais a zero
    if all(np.sign(df_temp.BHDiario) == 0):
        return 3
    
    # Todos os valores são positivos
    if all(np.sign(df_temp.BHDiario) >= 0):
        return 2
    
    # Todos os valores são negativos
    if all(np.sign(df_temp.BHDiario) <= 0):
        return 1
    
    result = mk.original_test(df_temp.BHDiario)
    
    """
        - Trend type: increasing, decreasing, no trend
            Significa a tendência da série temporal, se está aumentando, diminuindo ou se não há tendência.
        Para a análise da tendência, vamos considerar apenas a tendência da série temporal.
    """
    
    trend = result.trend
    
    if trend == 'decreasing':
        return 4
    if trend == 'increasing':
        return 5
    if trend == 'no trend':
        return 6
    
    return 0

def get_cut(row: pd.Series) -> int:
    subject_id = row.subject_id
    df_temp = df_fluid_balance[df_fluid_balance.subject_id == subject_id].sort_values('day')
    
    if df_temp.shape[0] < 3:
        return np.nan
    
    result = mk.original_test(df_temp.BHDiario)
    
    """
        - Slope:
            A inclinação da reta que melhor se ajusta à série temporal.
    """
    
    return result.slope

In [12]:
df_patients.loc[:, 'total_rate'] = df_patients.apply(get_total_rate, axis=1)

In [13]:
df_patients.loc[:, 'BHTendencia'] = df_patients.apply(get_tendency, axis=1)

In [14]:
df_patients.loc[:, 'BHCorte'] = df_patients.apply(get_cut, axis=1)

In [15]:
df_patients.sort_values('total_rate', ascending=False).head(3)

Unnamed: 0,subject_id,intime,outtime,total_rate,BHTendencia,BHCorte
177,14816494,2145-11-05 17:51:00,2146-01-23 19:00:25,6627710.0,2,-24.565018
65,13697731,2122-03-21 21:17:00,2122-06-07 15:03:37,5139326.0,6,279.140325
292,11345357,2189-08-04 12:44:00,2189-08-20 21:26:39,5037923.0,2,5362.876913


In [16]:
df_patients.sort_values('BHTendencia', ascending=False).head(3)

Unnamed: 0,subject_id,intime,outtime,total_rate,BHTendencia,BHCorte
5728,18672293,2173-08-27 10:13:54,2173-09-04 17:52:30,285482.1,6,1831.75
2302,14066173,2144-04-13 09:13:56,2144-04-29 18:36:15,1087140.0,6,327.201575
2184,15503922,2110-06-22 14:38:57,2110-07-02 17:11:38,865977.2,6,-15209.601963


In [17]:
df_patients.sort_values('BHCorte', ascending=False).head(3)

Unnamed: 0,subject_id,intime,outtime,total_rate,BHTendencia,BHCorte
3696,16304457,2169-05-27 14:10:21,2169-05-30 17:12:30,685227.4188,2,158073.41535
2583,13655592,2140-02-26 22:53:00,2140-03-01 08:18:06,754201.5127,2,109963.21175
1430,10980425,2189-02-19 16:35:20,2189-02-23 16:05:21,687645.5788,2,104452.2608


In [18]:
df_patients.value_counts('BHTendencia').reset_index().sort_values('BHTendencia')

Unnamed: 0,BHTendencia,count
4,0,13
0,2,6301
2,4,215
3,5,33
1,6,931


## Finalizando

In [19]:
df_final = df_patients.reset_index()[['subject_id', 'intime', 'outtime', 'total_rate', 'BHTendencia', "BHCorte"]]
df_final.head()

Unnamed: 0,subject_id,intime,outtime,total_rate,BHTendencia,BHCorte
0,12088486,2135-02-07 03:03:00,2135-02-12 00:25:00,259602.1,2,-15963.906983
1,19850244,2162-06-05 15:25:42,2162-06-09 20:22:03,812564.0,2,-186219.128583
2,10562205,2149-04-30 00:58:00,2149-05-09 20:54:46,1013548.0,4,-33062.3842
3,17840418,2129-01-14 08:00:00,2129-01-17 16:52:18,304730.1,2,13381.903733
4,17590005,2134-03-03 03:47:56,2134-03-06 16:58:23,431688.9,2,27041.585267


In [20]:
df_final.to_csv('./data/output/ICUp5FBTendencia.csv', index=False)