In [29]:
import os
import pandas as pd
import numpy as np
import seaborn as sns

In [30]:
def merge_long(df, info_cols, col_dict):
    for i, key in enumerate(col_dict.keys()):
        sel_cols = info_cols + col_dict[key]
        temp = df[sel_cols]
        # select columns and melt 
        temp2 = pd.wide_to_long(temp, [key], i = info_cols, j="teat", sep='_', suffix=r'\w+').reset_index()
        if i == 0:
            df_long = temp2.copy()
        else:
            df_long = pd.merge(df_long, temp2, on = info_cols + ["teat"])
    return df_long

def get_col_dict(df, teat_list):
    # find columns with teats
    columns_q = [col for col in df.columns if col.split("_")[-1] in teat_list]
    col_dict = {}
    for col in  columns_q:
        key = "_".join(col.split("_")[0:-1])
        if key in col_dict.keys():
            col_dict[key].append(col)
        else:
            col_dict[key] = [col]
    return col_dict

In [32]:
dirpath = os.getcwd()
data_path = os.path.join(dirpath, "..", "delpro_vms", "data")
feature_path = os.path.join(dirpath, "..", "udder_processing", "features_dict")
out_dir = "long_format_df"
# read vistit data
all_df = pd.read_csv(os.path.join(data_path, "delpro_visit_lactation.csv"))
# read feature data 
feature_df = pd.read_csv(os.path.join(feature_path, "gmfeature_table.csv"))
feature_df = feature_df.rename(columns = lambda col: "_".join(col.split("_")[::-1]) if "_" in col else col)
# 'eu_back', 'geo_back', 'eu_front','geo_front'
all_df["ko_front"] = [0 if ("U" in str(all_df.kickoff_lf[i])) | ("U" in str(all_df.kickoff_rf[i])) else 1 for i in range(len(all_df[['kickoff_lf', 'kickoff_rf']]))]
all_df["ko_back"] = [0 if ("U" in str(all_df.kickoff_lr[i])) | ("U" in str(all_df.kickoff_rr[i])) else 1 for i in range(len(all_df[['kickoff_lr', 'kickoff_rr']]))]

In [33]:
# ko long format
info_cols = ["animal_number", "begin_time", "days_in_milk", "lactation_number"]
col_dict = get_col_dict(all_df, ["lf", "lr", "rr", "rf"])
ko_long = merge_long(all_df, info_cols, col_dict)
ko_long["ko_bin"] = [0 if "U" in str(ko) else 1 for ko in ko_long.kickoff]
# feature long format
info_cols = ["cow"]
col_dict = get_col_dict(feature_df, ["lf", "lb", "rb", "rf"])
ft_long = merge_long(feature_df, info_cols, col_dict)
ft_long["teat"] = [t.replace("b", "r") for t in ft_long.teat]

merged_long = pd.merge(ft_long, ko_long, left_on = ["cow", "teat"], right_on = ["animal_number", "teat"])
merged_long.to_csv(os.path.join(out_dir, "ko_ft_long_teat.csv"), index = False)

In [34]:
# ko long format
info_cols = ["animal_number", "begin_time", "days_in_milk", "lactation_number"]
col_dict = get_col_dict(all_df, ["front","back"])
ko_long = merge_long(all_df, info_cols, col_dict)
# ko_long["ko_bin"] = [0 if "U" in str(ko) else 1 for ko in ko_long.kickoff]
# feature long format
info_cols = ["cow"]
col_dict = get_col_dict(feature_df,  ["front","back"])
ft_long = merge_long(feature_df, info_cols, col_dict)

merged_long = pd.merge(ft_long, ko_long, left_on = ["cow", "teat"], right_on = ["animal_number", "teat"])
merged_long.rename(columns = {"teat":"side"}, inplace = True)
merged_long.to_csv(os.path.join(out_dir, "ko_ft_long_side.csv"), index = False)

In [None]:
np.unique(merged_long.cow)