# Imports

## Libraries

In [52]:
from datetime import datetime, timedelta
import polars as pl
import json
import sys
import pandas as pd
pl.Config(tbl_cols=15)
pl.Config.set_tbl_column_data_type_inline(True)  
pl.Config.set_tbl_hide_dataframe_shape(True)  
pl.Config.set_tbl_hide_dtype_separator(True)  
pd.set_option('display.max_colwidth', None)


In [2]:
sys.path.append('..')
from config.settings import Config

## Configurations

In [3]:
config = Config("dev")

## Data

In [22]:
# PATHS
morning_path = config.get_file("cleaned", "mrn_cleaned.parquet")

# DATA TABLES
df_cleaned = pl.read_parquet(morning_path)


In [59]:
wgt_list = ["day_date",
			"ttl_weight",
			"ttl_diff",
			"ttl_loss",
			"mus_weight",
			"mus_percentage",
			"mus_diff",
			"mus_loss",
			"fat_weight",
			"fat_percentage",
			"fat_diff",
			"fat_loss",]

df_cleaned2 = df_cleaned\
	.with_columns(
		(pl.col("fat_percentage") / 100).alias("fat_percentage"),
		(pl.col("mus_weight") / pl.col("ttl_weight")).round(3).alias("mus_percentage"),
		(pl.col("ttl_weight") * pl.col("fat_percentage") / 100).round(2).alias("fat_weight"),
	)\
	.with_columns(
		(pl.col("ttl_weight") - pl.col("ttl_weight").shift(1)).fill_null(0.0).alias("ttl_diff"),
		(pl.col("mus_weight") - pl.col("mus_weight").shift(1)).fill_null(0.0).alias("mus_diff"),
		(pl.col("fat_weight") - pl.col("fat_weight").shift(1)).fill_null(0.0).alias("fat_diff"),
	)\
	.with_columns(
		(pl.when(pl.col("ttl_diff") < 0).then(True).otherwise(False)).alias("ttl_loss"),
		(pl.when(pl.col("mus_diff") < 0).then(True).otherwise(False)).alias("mus_loss"),
		(pl.when(pl.col("fat_diff") < 0).then(True).otherwise(False)).alias("fat_loss"),
	)

df_cleaned3 = df_cleaned\
	.with_columns(
		(pl.col("fat_percentage") / 100).alias("fat_percentage"),
		(pl.col("mus_weight") / pl.col("ttl_weight")).round(3).alias("mus_percentage"),
		(pl.col("ttl_weight") * pl.col("fat_percentage") / 100).round(2).alias("fat_weight"),
	)\
	.with_columns([
		(pl.col(f"{x}_weight") - pl.col(f"{x}_weight").shift(1)).fill_null(0.0).alias(f"{x}_diff")
		for x in ["ttl", "mus", "fat"]
	])\
	.with_columns([
		(pl.when(pl.col(f"{x}_diff") < 0).then(True).otherwise(False)).alias(f"{x}_loss")
		for x in ["ttl", "mus", "fat"]
	])

#			self.dtype_exp = [
#				pl.col(column_config["name"]).cast(self.pl_dtype_dict[column_config["dtype"]])
#				for column_name, column_config in self.rename_columns_dict[table_id].items()
#			]

# Exibir o DataFrame resultante
#display(df_cleaned3.select(wgt_list))

df_cleaned3.select(wgt_list) == df_cleaned2.select(wgt_list)


day_date,ttl_weight,ttl_diff,ttl_loss,mus_weight,mus_percentage,mus_diff,mus_loss,fat_weight,fat_percentage,fat_diff,fat_loss
bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool
true,true,true,true,true,true,true,true,true,true,true,true
true,true,true,true,true,true,true,true,true,true,true,true
true,true,true,true,true,true,true,true,true,true,true,true
true,true,true,true,true,true,true,true,true,true,true,true
true,true,true,true,true,true,true,true,true,true,true,true
true,true,true,true,true,true,true,true,true,true,true,true
true,true,true,true,true,true,true,true,true,true,true,true
true,true,true,true,true,true,true,true,true,true,true,true
true,true,true,true,true,true,true,true,true,true,true,true
true,true,true,true,true,true,true,true,true,true,true,true


In [43]:
import polars as pl
import datetime

# Adaptacao do campo day_form_time para conter a data de day_date + 1, com o horario e os minutos de day_form_time
df_transformed = df_cleaned.with_columns(
	[
		(pl.col("day_date").cast(pl.Datetime) + pl.duration(days=1, hours=pl.col("day_form_time").dt.hour(), minutes=pl.col("day_form_time").dt.minute())).alias("day_form_time"),
		(pl.col("day_date").cast(pl.Datetime) + pl.duration(days=0, hours=pl.col("slp_fall").dt.hour(),		 minutes=pl.col("slp_fall").dt.minute())).alias("slp_fall"),
		(pl.col("day_date").cast(pl.Datetime) + pl.duration(days=1, hours=pl.col("slp_raise").dt.hour(),	 minutes=pl.col("slp_raise").dt.minute())).alias("slp_raise")
	]
)

df_transformed.head(5)


timestamp,email_confirmation,day_date,day_form_time,day_humor,pho_time,slp_fall,slp_raise,slp_duration,slp_awake,slp_rem,slp_light,slp_deep,wgt_total,wgt_muscle,wgt_bf,wgt_rmr,wgt_cal,grn_priority,red_priority,blu_priority,ylw_priority,prp_priority,day_priority
str,str,date,datetime[μs],str,duration[μs],datetime[μs],datetime[μs],duration[μs],f64,f64,f64,f64,f64,f64,f64,i64,i64,str,str,str,str,str,list[str]
"""2023-11-08T11:…","""matheuschomem@…",2023-11-09,2023-11-10 07:42:00,"""Sol""",,2023-11-09 20:58:00,2023-11-10 04:50:00,7h 52m,1.0,11.5,75.0,11.5,73.05,55.95,19.3,1598,897,"""Priorizar""","""Praticar""","""Manter""","""Manter""","""Praticar""","[""◈◇◇◇◇ Leitura""]"
"""2023-11-08T11:…","""matheuschomem@…",2023-11-08,2023-11-09 06:58:00,"""Lua""",,2023-11-08 20:55:00,2023-11-09 04:50:00,7h 55m,0.0,17.0,65.0,17.0,73.6,56.22,19.5,1604,541,"""Praticar""","""Priorizar""","""Praticar""","""Manter""","""Manter""","[""◇◈◇◇◇ Atenção à Limitação""]"
"""2023-11-09T11:…","""matheuschomem@…",2023-11-10,2023-11-11 05:21:00,"""Sol""",,2023-11-10 21:39:00,2023-11-11 05:05:00,7h 26m,1.0,14.5,69.0,14.5,73.5,56.2,19.4,1603,609,"""Praticar""","""Manter""","""Manter""","""Praticar""","""Priorizar""","[""◈◇◇◇◇ Leitura""]"
"""2023-11-10T11:…","""matheuschomem@…",2023-11-11,2023-11-12 05:10:00,"""Sol""",,2023-11-11 20:47:00,2023-11-12 05:03:00,8h 16m,1.0,16.0,66.0,16.0,73.5,56.2,19.4,1603,829,"""Priorizar""","""Praticar""","""Manter""","""Manter""","""Praticar""","[""◈◇◇◇◇ Leitura""]"
"""2023-11-11T11:…","""matheuschomem@…",2023-11-12,2023-11-13 05:01:00,"""Sol""",,2023-11-12 21:37:00,2023-11-13 04:52:00,7h 15m,0.0,19.0,61.0,19.0,72.9,55.9,19.2,1594,1081,"""Priorizar""","""Manter""","""Praticar""","""Manter""","""Praticar""","[""◈◇◇◇◇ Leitura""]"


In [36]:
import datetime

#df_transformed = df_cleaned.with_columns([
#    pl.col("day_date"),
#    (pl.col("day_date") + pl.col("day_form_time") + pl.duration(days=1)).alias("day_form_time"),
#    (pl.col("day_date") + pl.col("slp_fall")).alias("slp_fall"),
#    (pl.col("day_date") + pl.col("slp_raise") + pl.duration(days=1)).alias("slp_raise")
#])

#df_cleaned[["day_date", "day_form_time", "slp_fall", "slp_raise"]]

df_cleaned["day_date"][0] 
df_cleaned["day_form_time"][0].minute
df_cleaned.select(pl.col("day_form_time").dt.hour())

day_form_time
i8
7
6
5
5
5
9
4
5
5
7
