In [1]:
#
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
import os
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

In [2]:
animaldf = pd.read_csv("animal_info.csv", sep = ";")
animaldf.columns = [col.replace(" ", "_").lower().replace("-", "") for col in animaldf.columns]
fresh_dates = animaldf[["animal_number", "last_calving_date"]]
fresh_dates["fresh_date"] = pd.to_datetime(fresh_dates["last_calving_date"], format = '%m/%d/%Y')
fresh_dates = fresh_dates[~fresh_dates.fresh_date.isnull()]
fresh_dates = fresh_dates.drop("last_calving_date", axis = 1)
fresh_dates = fresh_dates[(~fresh_dates.animal_number.isna())]

In [3]:
df = pd.read_csv(r"more_reports\oneyear_data.csv", sep = ";")
df.columns = [col.replace(" ", "_").lower().replace("-", "") for col in df.columns]
df["date"] = pd.to_datetime(df["date"], format = '%m/%d/%Y')
merged_df =  pd.merge(fresh_dates, df,  left_on="animal_number", right_on = "animal_number")

merged_df["dim"] = (merged_df["date"] - merged_df["fresh_date"]).dt.days
df2 = merged_df[merged_df.dim >= 0]

# Teats not found daily

## current lactation

In [4]:
tnf_df = df2[['animal_number', 'date', 'dim', 'teats_not_found']]
idx = ~ tnf_df.teats_not_found.isnull()
tnf_df2 = tnf_df[idx]

for i in tnf_df2.index:
    tnf_val = tnf_df.loc[i].teats_not_found
    vals = tnf_val.split(",")
    for val in vals:
        val2 = str(val.replace(" ", ""))
        tnf_df.loc[i, val2] = 1
tnf_df = tnf_df.drop(["teats_not_found"], axis = 1)


tnf_dfg = tnf_df.groupby(["animal_number", "date", "dim"]).agg("sum").reset_index()
tnf_dfg['udder'] = tnf_dfg.LR + tnf_dfg.LF + tnf_dfg.RR + tnf_dfg.RF
tnf_dfg['dudder']  = [1 if val >0 else 0 for val in tnf_dfg.udder]
tnf_dfg['dtnf_lr'] = [1 if val >0 else 0 for val in tnf_dfg.LR]
tnf_dfg['dtnf_lf'] = [1 if val >0 else 0 for val in tnf_dfg.LF]
tnf_dfg['dtnf_rf'] = [1 if val >0 else 0 for val in tnf_dfg.RF]
tnf_dfg['dtnf_rr'] = [1 if val >0 else 0 for val in tnf_dfg.RR]

dtnf_dfg = tnf_dfg.drop("date", axis = 1).groupby("animal_number").agg({"dudder":"sum", 'dtnf_lr':"sum", 'dtnf_lf':"sum", 'dtnf_rf':"sum",	'dtnf_rr':"sum" , "All" :"sum", "dim":"max"}).reset_index()
dtnf_dfg = dtnf_dfg.rename(columns = {"dudder":"dudder_cl", 'dtnf_lr':'dtnf_lr_cl', 'dtnf_lf':'dtnf_lf_cl', 'dtnf_rf':'dtnf_rf_cl', 'dtnf_rr':'dtnf_rr_cl' , "All" :"all_cl"})

dtnf_dfg["dtnf_udder_dim_cl"] = (dtnf_dfg["dudder_cl"]/dtnf_dfg["dim"])*100
dtnf_dfg["dtnf_lf_dim_cl"] = (dtnf_dfg['dtnf_lf_cl']/dtnf_dfg["dim"])*100
dtnf_dfg["dtnf_lr_dim_cl"] = (dtnf_dfg['dtnf_lr_cl']/dtnf_dfg["dim"])*100
dtnf_dfg["dtnf_rf_dim_cl"] = (dtnf_dfg['dtnf_rf_cl']/dtnf_dfg["dim"])*100
dtnf_dfg["dtnf_rr_dim_cl"] = (dtnf_dfg['dtnf_rr_cl']/dtnf_dfg["dim"])*100
dtnf_dfg["dtnf_all_dim_cl"] = (dtnf_dfg["all_cl"]/dtnf_dfg["dim"])*100

tnf1 = dtnf_dfg

## Last 7 days

In [5]:
tnf_df = df2[(df2.date >= '2023-11-10') & (df2.date < '2023-11-17')]
tnf_df = tnf_df[['animal_number', 'date', 'teats_not_found']]
idx = ~ tnf_df.teats_not_found.isnull()
tnf_df2 = tnf_df[idx]

for i in tnf_df2.index:
    tnf_val = tnf_df.loc[i].teats_not_found
    vals = tnf_val.split(",")
    for val in vals:
        val2 = str(val.replace(" ", ""))
        tnf_df.loc[i, val2] = 1
tnf_df = tnf_df.drop(["teats_not_found"], axis = 1)


tnf_dfg = tnf_df.groupby(["animal_number", "date"]).agg("sum").reset_index()
tnf_dfg['udder'] = tnf_dfg.LR + tnf_dfg.LF + tnf_dfg.RR + tnf_dfg.RF
tnf_dfg['dudder']  = [1 if val >0 else 0 for val in tnf_dfg.udder]
tnf_dfg['dtnf_lr'] = [1 if val >0 else 0 for val in tnf_dfg.LR]
tnf_dfg['dtnf_lf'] = [1 if val >0 else 0 for val in tnf_dfg.LF]
tnf_dfg['dtnf_rf'] = [1 if val >0 else 0 for val in tnf_dfg.RF]
tnf_dfg['dtnf_rr'] = [1 if val >0 else 0 for val in tnf_dfg.RR]

dtnf_dfg = tnf_dfg.drop("date", axis = 1).groupby("animal_number").agg({"dudder":"sum", 'dtnf_lr':"sum", 'dtnf_lf':"sum", 'dtnf_rf':"sum",	'dtnf_rr':"sum" , "All" :"sum"}).reset_index()
dtnf_dfg = dtnf_dfg.rename(columns = {"dudder":"dudder_7d", 'dtnf_lr':'dtnf_lr_7d', 'dtnf_lf':'dtnf_lf_7d', 'dtnf_rf':'dtnf_rf_7d', 'dtnf_rr':'dtnf_rr_7d' , "All" :"all_7d"})

dtnf_dfg["dtnf_udder_dim_7d"] = (dtnf_dfg["dudder_7d"]/7)*100
dtnf_dfg["dtnf_lf_dim_7d"] = (dtnf_dfg['dtnf_lf_7d']/7)*100
dtnf_dfg["dtnf_lr_dim_7d"] = (dtnf_dfg['dtnf_lr_7d']/7)*100
dtnf_dfg["dtnf_rf_dim_7d"] = (dtnf_dfg['dtnf_rf_7d']/7)*100
dtnf_dfg["dtnf_rr_dim_7d"] = (dtnf_dfg['dtnf_rr_7d']/7)*100
dtnf_dfg["dtnf_all_dim_7d"] = (dtnf_dfg["all_7d"]/7)*100

tnf2 = dtnf_dfg

# Teats not found visits

## current lactation

In [6]:
tnf_df = df2[['animal_number', 'date', 'dim', 'teats_not_found']]
idx = ~ tnf_df.teats_not_found.isnull()
tnf_df2 = tnf_df[idx]

for i in tnf_df2.index:
    tnf_val = tnf_df.loc[i].teats_not_found
    vals = tnf_val.split(",")
    for val in vals:
        val2 = str(val.replace(" ", ""))
        tnf_df.loc[i, val2] = 1
tnf_df = tnf_df.drop(["teats_not_found"], axis = 1)


tnf_df = tnf_df.rename(columns = {"LR": "vtnf_lr", "LF": "vtnf_lf", "RR": "vtnf_rr", "RF": "vtnf_rf", "All":"vall"})
tnf_df['udder'] = tnf_df.vtnf_lr + tnf_df.vtnf_lf + tnf_df.vtnf_rf + tnf_df.vtnf_rr
tnf_df['vudder']  = [1 if val >0 else 0 for val in tnf_df.udder]

vtnf_dfg = tnf_df.groupby("animal_number").agg({"vudder":"sum", 'vtnf_lr':"sum", 'vtnf_lf':"sum", 'vtnf_rf':"sum",	'vtnf_rr':"sum" , "vall" :"sum", "date":"count"}).reset_index()
vtnf_dfg = vtnf_dfg.rename(columns = {"vudder":"vudder_cl", 'vtnf_lr':'vtnf_lr_cl', 'vtnf_lf':'vtnf_lf_cl', 'vtnf_rf':'vtnf_rf_cl', 'vtnf_rr':'vtnf_rr_cl' , "vall" :"all_cl", "date":"visits_cl"})

vtnf_dfg["vtnf_udder_nv_cl"] = (vtnf_dfg["vudder_cl"]/vtnf_dfg["visits_cl"])*100
vtnf_dfg["vtnf_lf_nv_cl"] = (vtnf_dfg['vtnf_lf_cl']/vtnf_dfg["visits_cl"])*100
vtnf_dfg["vtnf_lr_nv_cl"] = (vtnf_dfg['vtnf_lr_cl']/vtnf_dfg["visits_cl"])*100
vtnf_dfg["vtnf_rf_nv_cl"] = (vtnf_dfg['vtnf_rf_cl']/vtnf_dfg["visits_cl"])*100
vtnf_dfg["vtnf_rr_nv_cl"] = (vtnf_dfg['vtnf_rr_cl']/vtnf_dfg["visits_cl"])*100
vtnf_dfg["vtnf_all_nv_cl"] = (vtnf_dfg["all_cl"]/vtnf_dfg["visits_cl"])*100

tnf3 = vtnf_dfg

## 7 days

In [7]:
tnf_df = df2[(df2.date >= '2023-11-10') & (df2.date < '2023-11-17')]
tnf_df = tnf_df[['animal_number', 'date', 'dim', 'teats_not_found']]
idx = ~ tnf_df.teats_not_found.isnull()
tnf_df2 = tnf_df[idx]

for i in tnf_df2.index:
    tnf_val = tnf_df.loc[i].teats_not_found
    vals = tnf_val.split(",")
    for val in vals:
        val2 = str(val.replace(" ", ""))
        tnf_df.loc[i, val2] = 1
tnf_df = tnf_df.drop(["teats_not_found"], axis = 1)


tnf_df = tnf_df.rename(columns = {"LR": "vtnf_lr", "LF": "vtnf_lf", "RR": "vtnf_rr", "RF": "vtnf_rf", "All":"vall"})
tnf_df['udder'] = tnf_df.vtnf_lr + tnf_df.vtnf_lf + tnf_df.vtnf_rf + tnf_df.vtnf_rr
tnf_df['vudder']  = [1 if val >0 else 0 for val in tnf_df.udder]

vtnf_dfg = tnf_df.groupby("animal_number").agg({"vudder":"sum", 'vtnf_lr':"sum", 'vtnf_lf':"sum", 'vtnf_rf':"sum",	'vtnf_rr':"sum" , "vall" :"sum", "date":"count"}).reset_index()
vtnf_dfg = vtnf_dfg.rename(columns = {"vudder":"vudder_7d", 'vtnf_lr':'vtnf_lr_7d', 'vtnf_lf':'vtnf_lf_7d', 'vtnf_rf':'vtnf_rf_7d', 'vtnf_rr':'vtnf_rr_7d' , "vall" :"all_7d", "date":"visits_7d"})

vtnf_dfg["vtnf_udder_nv_7d"] = (vtnf_dfg["vudder_7d"]/vtnf_dfg["visits_7d"])*100
vtnf_dfg["vtnf_lf_nv_7d"] = (vtnf_dfg['vtnf_lf_7d']/vtnf_dfg["visits_7d"])*100
vtnf_dfg["vtnf_lr_nv_7d"] = (vtnf_dfg['vtnf_lr_7d']/vtnf_dfg["visits_7d"])*100
vtnf_dfg["vtnf_rf_nv_7d"] = (vtnf_dfg['vtnf_rf_7d']/vtnf_dfg["visits_7d"])*100
vtnf_dfg["vtnf_rr_nv_7d"] = (vtnf_dfg['vtnf_rr_7d']/vtnf_dfg["visits_7d"])*100
vtnf_dfg["vtnf_all_nv_7d"] = (vtnf_dfg["all_7d"]/vtnf_dfg["visits_7d"])*100

tnf4 = vtnf_dfg

# Merge and save 

In [8]:
integrated_df = tnf1.merge(tnf2, how='left', on = "animal_number").merge(tnf3, how='left', on = "animal_number").merge(tnf2, how='left', on = "animal_number")
integrated_df.columns = [col.replace("rr", "rb").replace("lr", "lb").replace("animal_number", "cow") for col in integrated_df.columns]
integrated_df.to_csv(r"data_out\tnf_integrated.csv", index = False)