In [1]:
import math
import pandas as pd

# This notebook was used to develop the excecution of ME_calculations and MP_calculations on a dataframe. A cleaned up version of this can be found in demonstration.ipynb

In [2]:
# Load all the functions needed to run the ME and MP models
# The '*' wildcard import will import all functions in the script

from ME_functions import *
from MP_functions import *
from clean_data import clean_data

In [3]:
def clean_data(df):
    # Rename existing columns needed for the model
    df = df.rename(columns={
    'lactation_number': 'An_Parity_rl',
    'days_in_milk': 'An_LactDay',
    'MY': 'Trg_MilkProd',
    'BW_smooth': 'An_BW',
    'DMI': 'Dt_DMIn',
    'Fat %': 'Trg_MilkFatp',
    'Protein %': 'Trg_MilkTPp',
    'days_preg': 'An_GestDay'
})
    # Add columns with default values
    df['An_BW_mature'] = 700
    df['Trg_FrmGain'] = 0
    df['An_GestLength'] = 280
    df['Fet_BWbrth'] = 44.1
    df['Trg_MilkLacp'] = 4.85
    df['Trg_RsrvGain'] = 0

    # Add columns based on other columns
    df['An_AgeDay'] = df['age_m'] * 30.436875
    # 30.436875 converts months into days
  
    df['Dt_NDFIn'] = df['Dt_DMIn'] * 0.30
    # Assume ration is 30% NDF on DM basis, use DMI to get an NDF intake
    # This is temporary until we can add real NDF intake

# Create a nested data frame

    # data = df.drop(df.columns[:2], axis=1)
    # ID = df.drop(df.columns[2:], axis=1)
    # input_data_clean = pd.DataFrame({'idx':[1,2], 'dfs':[ID, data]})

# print(input_data_clean['dfs'].iloc[0]) # This will access the 'ID' Dataframe
# print(input_data_clean['dfs'].iloc[1]) # This will access the 'data' Dataframe
    
    # data_dict = data.to_dict()
    # input_data_clean = df.iloc[:, :2]
    # input_data_clean['data'] = data_dict

 # This does not work the same in python as in R. There is a way to store all the
 # data as a dictionary in the dataframe but I'm not sure how easy the data
 # would be to access if done this way

    # input_data_nested = df.groupby(['cow_id', 'DIM_bins_w'])
    input_data_clean = df
    return(input_data_clean)


In [4]:
# Alternate calculate ME function
def dev_execute_ME_requirement(row):
    # Check if series contains all the required column names.
    required_columns = {"An_BW", "Dt_DMIn", "Trg_MilkProd", "An_BW_mature", "Trg_FrmGain",
                        "An_GestDay", "An_GestLength", "An_AgeDay", "Fet_BWbrth", "An_LactDay",
                        "An_Parity_rl", "Trg_MilkFatp", "Trg_MilkTPp", "Trg_MilkLacp", "Trg_RsrvGain"}

    if not required_columns.issubset(row.index):
        missing_columns = list(required_columns - set(row.index))
        raise ValueError(f"Required columns are missing: {missing_columns}")

    ##########################################################################
    # Calculate Metabolizable Energy
    ##########################################################################
    An_BW = row['An_BW']
    Dt_DMIn = row['Dt_DMIn']
    Trg_MilkProd = row['Trg_MilkProd']
    An_BW_mature = row['An_BW_mature']
    Trg_FrmGain = row['Trg_FrmGain']
    An_GestDay = row['An_GestDay']
    An_GestLength = row['An_GestLength']
    An_AgeDay = row['An_AgeDay']
    Fet_BWbrth = row['Fet_BWbrth']
    An_LactDay = row['An_LactDay']
    An_Parity_rl = row['An_Parity_rl']
    Trg_MilkFatp = row['Trg_MilkFatp']
    Trg_MilkTPp = row['Trg_MilkTPp']
    Trg_MilkLacp = row['Trg_MilkLacp']
    Trg_RsrvGain = row['Trg_RsrvGain']

    # Call the function with the extracted values
    Trg_MEuse = calculate_ME_requirement(An_BW, Dt_DMIn, Trg_MilkProd, An_BW_mature,
                                         Trg_FrmGain, An_GestDay, An_GestLength,
                                         An_AgeDay, Fet_BWbrth, An_LactDay,
                                         An_Parity_rl, Trg_MilkFatp, Trg_MilkTPp,
                                         Trg_MilkLacp, Trg_RsrvGain)

    return Trg_MEuse


# Alternate calculate MP function
def dev_execute_MP_requirement(row):
    # Check if the series contains the required column names
    required_columns = {"An_BW", "Dt_DMIn", "Trg_MilkProd", "An_BW_mature", "Trg_FrmGain",
                        "An_GestDay", "An_GestLength", "An_AgeDay", "Fet_BWbrth", "An_LactDay",
                        "An_Parity_rl", "Trg_MilkTPp", "Trg_RsrvGain", "Dt_NDFIn"}

    if not required_columns.issubset(row.index):
        missing_columns = list(required_columns - set(row.index))
        raise ValueError(f"Required columns are missing: {missing_columns}")

    ##########################################################################
    # Calculate Metabolizable Protein
    ##########################################################################
    Dt_NDFIn = row['Dt_NDFIn']
    Dt_DMIn = row['Dt_DMIn']
    An_BW = row['An_BW']
    An_BW_mature = row['An_BW_mature']
    Trg_FrmGain = row['Trg_FrmGain']
    Trg_RsrvGain = row['Trg_RsrvGain']
    An_GestDay = row['An_GestDay']
    An_GestLength = row['An_GestLength']
    An_AgeDay = row['An_AgeDay']
    Fet_BWbrth = row['Fet_BWbrth']
    An_LactDay = row['An_LactDay']
    An_Parity_rl = row['An_Parity_rl']
    Trg_MilkProd = row['Trg_MilkProd']
    Trg_MilkTPp = row['Trg_MilkTPp']

    # Call the function with the extracted values
    An_MPuse_g_Trg = calculate_MP_requirement(Dt_NDFIn, Dt_DMIn, An_BW, An_BW_mature,
                                              Trg_FrmGain, Trg_RsrvGain, An_GestDay,
                                              An_GestLength, An_AgeDay, Fet_BWbrth,
                                              An_LactDay, An_Parity_rl, Trg_MilkProd,
                                              Trg_MilkTPp)

    return An_MPuse_g_Trg



In [5]:
# Load input data
input_data = pd.read_csv('../test_files/merged_7day_ind_withDHI.csv').iloc[:, 1:]
# The large dataset, drops first column

# input_data = pd.read_csv('../test_files/test_data.csv')
# 10 cow test dataset

# Clean input data
input_data_clean = clean_data(input_data)


In [6]:
# Pass data to functions to calculate values 

input_data_clean['ME'] = input_data_clean.apply(lambda row: dev_execute_ME_requirement(row), axis = 1)
input_data_clean['MP'] = input_data_clean.apply(lambda row: dev_execute_MP_requirement(row), axis = 1)


In [7]:
input_data_clean

Unnamed: 0,cow_id,DIM_bins_w,An_Parity_rl,An_LactDay,Trg_MilkProd,weight,An_BW,BW_gain,asfed_intake,Dt_DMIn,...,An_BW_mature,Trg_FrmGain,An_GestLength,Fet_BWbrth,Trg_MilkLacp,Trg_RsrvGain,An_AgeDay,Dt_NDFIn,ME,MP
0,936,2,6.0,18.333333,28.460000,724.166667,725.790000,-0.429833,32.066667,14.430,...,700,0,280,44.1,4.85,0,2800.192500,4.3290,60.941536,2012.076139
1,936,3,6.0,24.666667,33.901667,722.833333,723.163333,-0.253167,39.833333,17.925,...,700,0,280,44.1,4.85,0,2800.192500,5.3775,68.543171,2365.327913
2,936,4,6.0,32.500000,35.456667,721.000000,721.442167,-0.204333,40.766667,18.345,...,700,0,280,44.1,4.85,0,2800.192500,5.5035,70.695078,2456.484701
3,936,5,6.0,36.500000,39.485000,720.500000,720.765000,-0.189500,48.300000,21.735,...,700,0,280,44.1,4.85,0,2800.192500,6.5205,76.322911,2728.973670
4,974,7,5.0,51.000000,43.790000,781.000000,796.191000,1.114000,44.100000,19.845,...,700,0,280,44.1,4.85,0,2282.765625,5.9535,77.977153,2466.496122
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3017,5102,4,1.0,32.500000,34.378333,653.166667,653.722833,0.175167,43.000000,19.350,...,700,0,280,44.1,4.85,0,700.048125,5.8050,60.041614,2107.922135
3018,5102,5,1.0,39.500000,38.843333,654.833333,651.498500,-0.925000,43.066667,19.380,...,700,0,280,44.1,4.85,0,700.048125,5.8140,65.254073,2311.084746
3019,5102,6,1.0,45.833333,37.866667,642.833333,648.132500,-0.553833,48.066667,21.630,...,700,0,280,44.1,4.85,0,700.048125,6.4890,64.028307,2302.677652
3020,5102,7,1.0,54.000000,38.452000,656.600000,650.286600,0.307000,49.880000,22.446,...,700,0,280,44.1,4.85,0,700.048125,6.7338,64.766540,2343.537579


In [None]:
# Attempts to pass a nested dataframe to dev_execute_ME_requirement()


# input_data_clean['ME'] = input_data_clean['data'].apply(calculate_ME_requirement)

# input_data_clean['ME'] = input_data_clean.iloc[:, 3:26].apply(lambda row: dev_execute_ME_requirement(*row), axis=1)
# Error because 'dev_execute_ME_requirement' accepts a dataframe

# input_data_clean['ME'] = (input_data_clean['dfs'].iloc[1]).apply(lambda x: dev_execute_ME_requirement(x))
# input_data_clean['ME'] = input_data_clean['dfs'].apply(lambda x: dev_execute_ME_requirement(x['An_BW'], x['Dt_DMIn'], x['Trg_MilkProd'], x['An_BW_mature'], x['Trg_FrmGain'], x['An_GestDay'], x['An_GestLength'], x['An_AgeDay'], x['Fet_BWbrth'], x['An_LactDay'], x['An_Parity_rl'], x['Trg_MilkFatp'], x['Trg_MilkTPp'], x['Trg_MilkLacp'], x['Trg_RsrvGain']))
# input_data_clean['ME'] = input_data_clean['dfs'].apply(lambda x: dev_execute_ME_requirement(x['data']))
# input_data_clean['ME'] = input_data_clean['dfs'].apply(lambda x: dev_execute_ME_requirement(*x.iloc[1]))
# input_data_clean['ME'] = input_data_clean['dfs'].apply(lambda x: dev_execute_ME_requirement(x.iloc[1]))
# input_data_clean['ME'] = input_data_clean['dfs'].apply(lambda x: dev_execute_ME_requirement(x['data'].iloc[1]))


In [8]:
# Check for cows missing data

columns_to_check = ['ME', 'MP']
check_na = input_data_clean[columns_to_check].isna().any(axis=1)
cows_missing_data = input_data_clean[check_na]

In [9]:
cows_missing_data

Unnamed: 0,cow_id,DIM_bins_w,An_Parity_rl,An_LactDay,Trg_MilkProd,weight,An_BW,BW_gain,asfed_intake,Dt_DMIn,...,An_BW_mature,Trg_FrmGain,An_GestLength,Fet_BWbrth,Trg_MilkLacp,Trg_RsrvGain,An_AgeDay,Dt_NDFIn,ME,MP
15,4338,3,6.0,26.00,42.628000,796.800000,799.706400,-0.176000,50.920000,22.914000,...,700,0,280,44.1,4.85,0,,6.874200,,
18,4384,0,6.0,5.50,33.745000,823.000000,827.985000,-5.387250,34.575000,15.558750,...,700,0,280,44.1,4.85,0,,4.667625,,
19,4384,1,6.0,11.00,41.670000,800.857143,798.654429,-4.649000,36.528571,16.437857,...,700,0,280,44.1,4.85,0,,4.931357,,
28,4444,0,6.0,5.00,19.764000,865.400000,861.914800,-3.524600,43.920000,19.764000,...,700,0,280,44.1,4.85,0,,5.929200,,
29,4444,1,6.0,9.50,37.272500,844.500000,847.188250,-2.987750,46.350000,20.857500,...,700,0,280,44.1,4.85,0,,6.257250,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2946,5086,1,1.0,13.00,24.093333,573.666667,574.374667,-1.267667,28.400000,12.780000,...,700,0,280,44.1,4.85,0,,3.834000,,
2983,5093,2,1.0,16.75,30.645000,587.250000,582.641500,-1.367750,33.175000,14.928750,...,700,0,280,44.1,4.85,0,,4.478625,,
2990,5095,1,1.0,12.00,26.690000,563.800000,572.038000,-0.450600,32.840000,14.778000,...,700,0,280,44.1,4.85,0,,4.433400,,
3014,5102,1,1.0,13.00,21.035000,666.000000,666.255500,-1.831000,33.250000,14.962500,...,700,0,280,44.1,4.85,0,,4.488750,,
