In [1]:
import numpy as np
import pandas as pd

from handle_reports import generate_report, append_report
from monitoring import cur_stats
from plotting import plot_data

Here you test something

Cols meaning:
- Day - day of the month
- DKind - day kind
- SIL - sport intensity level - has six levels from 0 - didn't exercise to 5 - hard exercise (More info in a separate message)
- OD - outdoor
- LE - left-eye reading
- JG - juggling
- GMG - gaming


(Separate message about SIL)
Exercise intensity notation:
- 0 - you didn't exercise
- 1 - easy
- 2 - between easy and moderate
- 3 - moderate
- 4 - between moderate and hard
- 5 - hard

In [3]:
# possible new name for modified prepare_data func
def prepare_initial_df(PATH, DAY_OFFS):
    """
    Summary:
        Prepare data for further operations
    Args:
        PATH (str, constant): path to your data
        DAY_OFFS (int list constant): days when you relax
    Returns:
        _type_: _description_
        month (str): month when the data was taken
        df (df): DataFrame
    """
    # Create list of colnames
    colnames = ["Day", "Math", "CS", "Eng", "Sport",
                "SIL", "OD", "LE", "JG", "GMG"]
    # Load data into df with sep equal to ':', '_', and '-'
    df = pd.read_csv(
        filepath_or_buffer=PATH, sep="[:_-]", names=colnames,
        header=None, engine="python")
    # Create var 'month' with df data from row 0 and col 0
    month = df.iloc[0][0]
    # Drop the row 0 where 'month' was
    df.drop(index=0, inplace=True)
    # Reset index, so it again starts with 0
    df = df.reset_index(drop=True)
    # in the cols below first two symbols are for easier
    # identification while writing the data. Many thanks
    # to https://stackoverflow.com/a/42349635/11749578
    cols_to_shorten = ["Sport", "OD", "LE", "JG", "GMG"]
    for col in cols_to_shorten:
        df[col] = df[col].str[2:]
    # Convert col dtypes to int64
    df[colnames] = df[colnames].astype("int64")
    # Create a new col with info about whether
    # you work or relax on a particular day
    df["DKind"] = df.apply(lambda row: categorise(row, DAY_OFFS), axis=1)
    # Rearrange order of cols, so col 'DKind'
    # goes after col 'Day'
    cols = df.columns.tolist()
    cols = [cols[0]] + [cols[-1]] + cols[1:-1]
    return month, df[cols]
    
# this small func is used in lambda 
# expression in prepare_initial_df function
def categorise(row, DAY_OFFS):
    if row["Day"] in DAY_OFFS:
        return "relax"
    return "work"

Unnamed: 0,Day,DKind,Math,CS,Eng,Sport,SIL,OD,LE,JG,GMG
0,17,work,90,85,131,9,3,60,5,0,240
1,18,work,80,110,70,10,3,60,5,0,65
2,19,work,70,90,65,10,0,70,0,10,260
3,20,work,70,70,35,11,3,40,5,0,150


In [4]:
def handle_day_offs(DAY_OFFS):
    # Create two day_offs vars for generate_report
    # func. With these two vars, reports will
    # provide a more detailed picture of the month
    day_offs_count = len(DAY_OFFS)
    if day_offs_count > 0:
        day_offs_str = ', '.join(map(str, DAY_OFFS))
    else:
        day_offs_str = "you studied every day"
    return day_offs_count, day_offs_str

In [6]:
def calc_study_data(df, DESIRED_MEAN_VALUE):
    """
    Summary:
        complete all the study data calculations 
        in the program. Study data cols are:
        Day, DKind, Math, CS, and Eng
        (Day and DKind are used to identify
        workdays and day offs)
    Args:
        df: pandas dataframe
    Returns:
        list of data
    """
    # get summed values from 'Math', 'CS', and 'Eng' cols
    total_per_subject = df[["Math", "CS", "Eng"]].sum(
        axis=0).div(60).round().astype(np.int64)
    math_hs, cs_hs, eng_hs = total_per_subject
    # create col 'Total' where each row is the sum
    # of 'Math', 'CS', and 'Eng' cols
    total_per_day = df[["Math", "CS", "Eng"]].sum(axis=1)
    df["MinTotal"] = total_per_day
    # with full data mean it's easier to compare
    # the change of your study time because
    # number of day offs per months differ
    # from month to month
    mean_full_data = round(total_per_day.mean())
    # find how many hours you studied this month
    total_hs = round(total_per_day.sum() / 60)
    # handle vacation days. Your mean and std
    # must not include data from vacation days.
    if "relax" in df["DKind"].values:
        df_removed_day_offs = df[~df["DKind"].str.contains("relax")]
        total_per_day = df_removed_day_offs["MinTotal"]
    mean = round(total_per_day.mean())
    std = round(total_per_day.std(ddof=0))
    min_to_study = calc_req_study_time(total_per_day, DESIRED_MEAN_VALUE)
    return [df, mean, std, min_to_study, math_hs, cs_hs, 
            eng_hs, total_hs, mean_full_data]

# small func to calculate how many more min you need to
# study to achieve your desired monthly mean
def calc_req_study_time(total_per_day, DESIRED_MEAN_VALUE):
    req_min = DESIRED_MEAN_VALUE * len(total_per_day)
    studied_min = sum(total_per_day)
    return req_min - studied_min


In [8]:
# at least, get here how much you exercised during the month
# then think about how to handle other nonstudy data
def handle_nonstudy_data(df):
    """
    Summary:
        Here you do operations with nonstudy data
    Args:
        df: df containing non-study data
    Returns:
        list of data
    """
    # get last value from the 'Sport' col
    sport = df["Sport"].iloc[-1]
    return sport

In [25]:
# this func is gonna be in the central file of your new package
# consenrned with data preparation
def prepare_data(PATH, DAY_OFFS, DESIRED_MEAN_VALUE):
    # in_df means initial_df; st_df means study_df
    month, in_df = prepare_initial_df(PATH, DAY_OFFS)
    st_df, mean, std, min_to_study, *report_data = calc_study_data(in_df[["Day", "DKind",
                                                                          "Math", "CS",
                                                                          "Eng"]],
                                                                   DESIRED_MEAN_VALUE)
    # combine st_df with nonstudy cols starting with 'Sport' col
    nonstudy_df = in_df.iloc[:, -6:]
    out_df = pd.concat([st_df, nonstudy_df], axis=1)
    day_offs_count, day_offs_str = handle_day_offs(DAY_OFFS)
    sport = handle_nonstudy_data(nonstudy_df)
    # add day offs vars and 'sport' var to report_data
    report_data += [day_offs_count, day_offs_str, sport]
    # get how many times you exercised that month
    pass
    # pack values before returning them
    wide_use_data = [month, mean, std, min_to_study, out_df]
    return report_data, wide_use_data

PATH = "C:/Users/San/Documents/inf/time monitoring/test_data.txt"
DAY_OFFS = []
DESIRED_MEAN_VALUE = 240
report_data, wide_use_data = prepare_data(PATH, DAY_OFFS, DESIRED_MEAN_VALUE)
month, mean, std, min_to_study, df = wide_use_data
nonstudy_df = df.iloc[:, -6:]
report_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["MinTotal"] = total_per_day


Unnamed: 0,Sport,SIL,OD,LE,JG,GMG
0,9,3,60,5,0,240
1,10,3,60,5,0,65
2,10,0,70,0,10,260
3,11,3,40,5,0,150


In [None]:
# maybe this func is gonna belong to 'monitoring' package
def sport_stats():
    """
    (Example)
    You exercised this month: 10 times
    Last time you got medium exercise: 3 days ago
    Last time you got hard exercise: 7 days ago
    """
    pass