In [1]:
from datetime import datetime, timedelta
from config.settings import Config
import polars as pl
import json

In [2]:
config = Config("dev")

morning_path = config.get_file("data", "morning_routine_v2.xlsx")
night_path = config.get_file("data", "night_routine_v2.xlsx")
json_path = config.get_file("misc", "rename_columns.json")

pl_dtype_dict = {
	"list": pl.List(pl.Utf8),
	"string": pl.Utf8,
	"date": pl.Date,
	"float": pl.Float64,
	"int": pl.Int64,
	"timestamp": pl.Datetime,
	"timedelta": pl.Duration
}

# Function to convert string to microseconds
def convert_to_microseconds(time_str):
    if time_str is not None:
        time_datetime = datetime.strptime(time_str, "%H:%M")
        difference = time_datetime - datetime.strptime("00:00", "%H:%M")
        microseconds = difference.total_seconds() * 1e6
        return int(microseconds)
    else:
        return None

In [24]:
with open(json_path, encoding='utf-8') as f:
    rename_columns_dict = json.load(f)

df_sun_raw = pl.read_excel(morning_path)
df_moon_raw = pl.read_excel(night_path)

# Create a list of column expressions for renaming
rename_expressions = [
    pl.col(column_name).alias(column_config["name"])
    for column_name, column_config in rename_columns_dict["morning"].items()
]

dtype_expressions = [
    pl.col(column_config["name"]).cast(pl_dtype_dict[column_config["dtype"]])
    for column_name, column_config in rename_columns_dict["morning"].items()
]

df_sun = df_sun_raw.select(rename_expressions)

time_cols = ['day_form_time', 'slp_fall', 'pho_time', 'slp_raise', 'slp_duration']
df_sun = df_sun.select([
    pl.col(col_name).map_elements(convert_to_microseconds).alias(col_name)
    if col_name in time_cols
    else col_name
    for col_name in df_sun.columns
])

#df_sun = df_sun.select(dtype_expressions)

df_sun.select(time_cols)

day_form_time,slp_fall,pho_time,slp_raise,slp_duration
i64,i64,i64,i64,i64
27720000000,75480000000,,17400000000,28320000000
25080000000,75300000000,,17400000000,28500000000
19260000000,77940000000,,18300000000,26760000000
18600000000,74820000000,,18180000000,29760000000
18060000000,77820000000,,17520000000,26100000000
35940000000,82140000000,,29220000000,33480000000
17820000000,77160000000,,15600000000,24840000000
19140000000,75420000000,,18600000000,29580000000
19800000000,81780000000,7980000000,19080000000,23700000000
27480000000,420000000,9000000000,27000000000,26580000000


In [88]:
# Função para converter string em microssegundos
def converter_para_microssegundos(horario_str):
    if horario_str is not None:
        horario_datetime = datetime.strptime(horario_str, "%H:%M")
        diferenca = horario_datetime - datetime.strptime("00:00", "%H:%M")
        microssegundos = diferenca.total_seconds() * 1e6
        return int(microssegundos)
    else:
        return None

#df1 = df_sun.with_columns('pho_time', df_sun['pho_time'].map_elements(converter_para_microssegundos))


# Converter as strings para microssegundos e criar o DataFrame
#df = pl.DataFrame({
#    'horario': strings_horarios,
#    'microssegundos': [converter_para_microssegundos(h) for h in strings_horarios]
#})

#df = df1.select(pl.col("pho_time").map_elements(converter_para_microssegundos)).cast(pl.Duration)

# Mostrar o DataFrame
print(df_sun[time_cols])



shape: (76, 5)
┌───────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
│ day_form_time ┆ slp_fall     ┆ pho_time     ┆ slp_raise    ┆ slp_duration │
│ ---           ┆ ---          ┆ ---          ┆ ---          ┆ ---          │
│ duration[μs]  ┆ duration[μs] ┆ duration[μs] ┆ duration[μs] ┆ duration[μs] │
╞═══════════════╪══════════════╪══════════════╪══════════════╪══════════════╡
│ 7h 42m        ┆ 20h 58m      ┆ null         ┆ 4h 50m       ┆ 7h 52m       │
│ 6h 58m        ┆ 20h 55m      ┆ null         ┆ 4h 50m       ┆ 7h 55m       │
│ 5h 21m        ┆ 21h 39m      ┆ null         ┆ 5h 5m        ┆ 7h 26m       │
│ 5h 10m        ┆ 20h 47m      ┆ null         ┆ 5h 3m        ┆ 8h 16m       │
│ …             ┆ …            ┆ …            ┆ …            ┆ …            │
│ 5h 9m         ┆ 22h 59m      ┆ 8h 50m       ┆ 5h 2m        ┆ 6h 3m        │
│ 5h 12m        ┆ 21h 16m      ┆ 57m          ┆ 5h 12m       ┆ 7h 46m       │
│ 6h 36m        ┆ 19h 36m      ┆ 5h 25m       ┆ 6

In [84]:
import polars as pl

# Seu DataFrame (substitua isso pelo seu DataFrame real)
data = {
    "timestamp": [None, "2022-01-02", "2022-01-03"],
    "email_confirmation": ["email1@example.com", None, "email3@example.com"],
    "day_date": ["2022-01-01", "2022-01-02", None],
    # ... outras colunas ...
}

df = pl.DataFrame(data)

# Encontrar valores nulos em todos os campos
null_values = {col: df.filter(pl.col(col).is_null()).collect() for col in df.columns}

# Exibindo os valores nulos por coluna
for col, values in null_values.items():
    print(f"Column '{col}': {values}")


AttributeError: 'DataFrame' object has no attribute 'collect'

In [74]:
wgt_columns = ["day_date",
			   "wgt_total",
			   "wgt_muscle",
			   "wgt_bf",
			   "wgt_rmr",
			   "wgt_cal"]

print(df_sun.select(wgt_columns).dtypes)

df_sun.select(wgt_columns).head()

[String, Float64, String, String, Int64, Int64]


day_date,wgt_total,wgt_muscle,wgt_bf,wgt_rmr,wgt_cal
str,f64,str,str,i64,i64
"""11-08-23""",73.05,"""55.95""","""19.30""",1598,897
"""11-07-23""",73.6,"""56.22""","""19.50""",1604,541
"""11-09-23""",73.5,"""56.20""","""19.40""",1603,609
"""11-10-23""",73.5,"""56.20""","""19.40""",1603,829
"""11-11-23""",72.9,"""55.90""","""19.20""",1594,1081


In [7]:
config.get_file("misc", "rename_columns.json")

'c:\\Users\\mathe\\OneDrive\\Crescimento\\Crescimento Profissional\\compass_project\\files\\misc\\rename_columns.json'