In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import datetime

In [3]:
df = pd.read_csv("../data.csv").dropna()

In [4]:
df

Unnamed: 0,date,time_of_day,weight
0,2022-12-08,morning,93.6
1,2022-12-08,evening,94.3
2,2022-12-09,morning,92.4
3,2022-12-10,evening,94.4
4,2022-12-11,morning,93.6
5,2022-12-11,evening,95.6
6,2022-12-12,morning,93.9
7,2022-12-12,evening,94.8
8,2022-12-13,morning,93.5
9,2022-12-13,evening,94.6


In [5]:
def moving_average(arr, window_size):
    return np.convolve(arr, np.ones(window_size), "valid") / window_size

In [6]:
moving_average(df["weight"], 7)

array([93.97142857, 94.14285714, 94.02857143, 94.34285714, 94.21428571])

In [7]:
diet_start = datetime.date(2022, 12, 13)

In [8]:
df["date"] = pd.to_datetime(df["date"]).dt.date

In [9]:
df[df["date"] == diet_start]["weight"]

8    93.5
9    94.6
Name: weight, dtype: float64

In [10]:
start_avg_weight = (
    df[df["date"] == diet_start]["weight"].iloc[0]
    + df[df["date"] == diet_start]["weight"].iloc[1]
) / 2

In [11]:
start_avg_weight

94.05

In [12]:
x_start = diet_start
x_end = diet_start + datetime.timedelta(days=100)
x = pd.date_range(start=x_start, end=x_end)
weekly_coefficient = 0.5
daily_coefficient = 0.5 / 7
end_avg_weight = start_avg_weight - (daily_coefficient * len(x))

In [13]:
y = np.linspace(start_avg_weight, end_avg_weight, len(x))

In [14]:
y

array([94.05      , 93.97785714, 93.90571429, 93.83357143, 93.76142857,
       93.68928571, 93.61714286, 93.545     , 93.47285714, 93.40071429,
       93.32857143, 93.25642857, 93.18428571, 93.11214286, 93.04      ,
       92.96785714, 92.89571429, 92.82357143, 92.75142857, 92.67928571,
       92.60714286, 92.535     , 92.46285714, 92.39071429, 92.31857143,
       92.24642857, 92.17428571, 92.10214286, 92.03      , 91.95785714,
       91.88571429, 91.81357143, 91.74142857, 91.66928571, 91.59714286,
       91.525     , 91.45285714, 91.38071429, 91.30857143, 91.23642857,
       91.16428571, 91.09214286, 91.02      , 90.94785714, 90.87571429,
       90.80357143, 90.73142857, 90.65928571, 90.58714286, 90.515     ,
       90.44285714, 90.37071429, 90.29857143, 90.22642857, 90.15428571,
       90.08214286, 90.01      , 89.93785714, 89.86571429, 89.79357143,
       89.72142857, 89.64928571, 89.57714286, 89.505     , 89.43285714,
       89.36071429, 89.28857143, 89.21642857, 89.14428571, 89.07

In [15]:
fig = px.line(
    df,
    x="date",
    y="weight",
    color="time_of_day",
    line_shape="spline",
    markers=True,
    text="weight",
)
fig.update_traces(textposition="bottom right")
fig.add_vline(
    x=diet_start,
    line_width=1,
    line_dash="dash",
    line_color="green",
    # annotation="Diet start",
)
fig.add_trace(go.Scatter(x=x, y=y, mode="lines", name="Expected loss rate"))
fig.add_trace(
    go.Scatter(
        x=df["date"],
        y=df["avg_daily"],
        mode="lines+markers",
        connectgaps=True,
        line_shape="spline",
        name="Average daily weight",
    )
)
fig.update_layout(
    xaxis_range=[
        df["date"][0] - datetime.timedelta(days=1),
        df["date"][len(df) - 1] + datetime.timedelta(days=1),
    ],
    yaxis_range=[min(df["weight"] - 1), max(df["weight"]) + 1],
)
fig.show()

KeyError: 'avg_daily'

In [None]:
df.iloc[-1]

In [None]:
prev_day = None
avg_daily = []
for row in df.itertuples():
    curr_day = row.date
    curr_weight = row.weight
    if curr_day == prev_day:
        avg_daily.append((prev_weight + curr_weight) / 2)
    else:
        avg_daily.append(None)
    prev_day = curr_day
    prev_weight = curr_weight

    print(row)

In [None]:
pd.DataFrame([(1, 2), (3, 4)], columns=["a", "b"])

In [None]:
df["avg_daily"] = avg_daily

In [None]:
df

In [None]:
f = go.Figure(
    data=go.Scatter(
        x=df["date"], y=df["avg_daily"], mode="lines+markers", connectgaps=True
    )
)
f.show()

In [16]:
def get_avg_df(original_df: pd.DataFrame) -> pd.DataFrame:
    df_morning = original_df[original_df["time_of_day"] == "morning"]
    df_evening = original_df[original_df["time_of_day"] == "evening"]

    prev_day, prev_weight = None, None
    avg_daily = []
    for row in original_df.itertuples():
        curr_day = row.date
        curr_weight = row.weight
        if curr_day == prev_day:
            avg_daily.append((row.date, (prev_weight + curr_weight) / 2))
        prev_day = curr_day
        prev_weight = curr_weight

    return pd.DataFrame(avg_daily, columns=["date", "avg_weight"])

In [17]:
df_morning = df[df["time_of_day"] == "morning"]
df_evening = df[df["time_of_day"] == "evening"]

In [47]:
df_morning.join(
    df_evening, on="date", how="inner", lsuffix="_morning", rsuffix="_evening"
).drop(columns=["time_of_day_morning", "time_of_day_evening"])

Unnamed: 0,date,date_morning,weight_morning,date_evening,weight_evening


In [51]:
df_morning.set_index("date").join(
    df_evening.set_index("date"), lsuffix="_morning", rsuffix="_evening", how="outer"
).drop(columns=["time_of_day_morning", "time_of_day_evening"])

Unnamed: 0_level_0,weight_morning,weight_evening
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-08,93.6,94.3
2022-12-09,92.4,
2022-12-10,,94.4
2022-12-11,93.6,95.6
2022-12-12,93.9,94.8
2022-12-13,93.5,94.6
2022-12-14,93.5,


In [37]:
new_df = df_morning.join(
    df_evening, on="date", how="outer", lsuffix="_morning", rsuffix="_evening"
)

In [41]:
new_df

Unnamed: 0,date,date_morning,time_of_day_morning,weight_morning,date_evening,time_of_day_evening,weight_evening
0.0,2022-12-08,2022-12-08,morning,93.6,,,
2.0,2022-12-09,2022-12-09,morning,92.4,,,
4.0,2022-12-11,2022-12-11,morning,93.6,,,
6.0,2022-12-12,2022-12-12,morning,93.9,,,
8.0,2022-12-13,2022-12-13,morning,93.5,,,
10.0,2022-12-14,2022-12-14,morning,93.5,,,
,1,,,,2022-12-08,evening,94.3
,3,,,,2022-12-10,evening,94.4
,5,,,,2022-12-11,evening,95.6
,7,,,,2022-12-12,evening,94.8


In [39]:
pd.pivot_table(new_df, index="date")

TypeError: '<' not supported between instances of 'int' and 'datetime.date'

In [67]:
df = pd.read_csv("../data.csv").replace({"r": "morning", "w": "evening"}).dropna()
df["date"] = pd.to_datetime(df["date"]).dt.date
df_morning = df[df["time_of_day"] == "morning"]
df_evening = df[df["time_of_day"] == "evening"]

df = (
    df_morning.set_index("date")
    .join(
        df_evening.set_index("date"),
        lsuffix="_morning",
        rsuffix="_evening",
        how="outer",
    )
    .drop(columns=["time_of_day_morning", "time_of_day_evening"])
)

In [68]:
df

Unnamed: 0_level_0,weight_morning,weight_evening
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-08,93.6,94.3
2022-12-09,92.4,
2022-12-10,,94.4
2022-12-11,93.6,95.6
2022-12-12,93.9,94.8
2022-12-13,93.5,94.6
2022-12-14,93.5,


In [87]:
np.mean(df['weight_evening'] - df['weight_morning'])

1.1749999999999972

In [88]:
(df['weight_morning'] + df['weight_evening'] ) /2

date
2022-12-08    93.95
2022-12-09      NaN
2022-12-10      NaN
2022-12-11    94.60
2022-12-12    94.35
2022-12-13    94.05
2022-12-14      NaN
dtype: float64

In [118]:
def count_avg(row, coeff=5):
    print(row)
    print()
    if not(np.isnan(row['weight_morning']) or np.isnan(row['weight_evening'])):
        return (row['weight_morning'] + row['weight_evening']) / 2
    if np.isnan(row['weight_evening']):
        return row['weight_morning'] + coeff
    if np.isnan(row['weight_morning']):
        return row['weight_evening'] - coeff
    return None


In [120]:
df.apply(count_avg, axis=1, coeff=10)

weight_morning    93.6
weight_evening    94.3
Name: 2022-12-08, dtype: float64

weight_morning    92.4
weight_evening     NaN
Name: 2022-12-09, dtype: float64

weight_morning     NaN
weight_evening    94.4
Name: 2022-12-10, dtype: float64

weight_morning    93.6
weight_evening    95.6
Name: 2022-12-11, dtype: float64

weight_morning    93.9
weight_evening    94.8
Name: 2022-12-12, dtype: float64

weight_morning    93.5
weight_evening    94.6
Name: 2022-12-13, dtype: float64

weight_morning    93.5
weight_evening     NaN
Name: 2022-12-14, dtype: float64



date
2022-12-08     93.95
2022-12-09    102.40
2022-12-10     84.40
2022-12-11     94.60
2022-12-12     94.35
2022-12-13     94.05
2022-12-14    103.50
dtype: float64

In [None]:
df[]

In [74]:
fig = px.line(
    df,
    # x="date",
    # y="weight",
    # color="time_of_day",
    line_shape="spline",
    # connectgaps=True,

    markers=True,
    # text="weight",
)
fig.update_traces(connectgaps=True)

fig.show()