In [1]:
import math
import pandas as pd

# This notebook was used to develop the excecution of ME_calculations and MP_calculations on a dataframe. A cleaned up version of this can be found in demonstration.ipynb

In [13]:
# Load all the functions needed to run the ME and MP models
# The '*' wildcard import will import all functions in the script

from ME_functions import *
from MP_functions import *
from clean_data import clean_data

In [25]:
def clean_data(df):
    # Rename existing columns needed for the model
    df = df.rename(columns={
    'lactation_number': 'An_Parity_rl',
    'days_in_milk': 'An_LactDay',
    'MY': 'Trg_MilkProd',
    'BW_smooth': 'An_BW',
    'DMI': 'Dt_DMIn',
    'Fat %': 'Trg_MilkFatp',
    'Protein %': 'Trg_MilkTPp',
    'days_preg': 'An_GestDay'
})
    # Add columns with default values
    df['An_BW_mature'] = 700
    df['Trg_FrmGain'] = 0
    df['An_GestLength'] = 280
    df['Fet_BWbrth'] = 44.1
    df['Trg_MilkLacp'] = 4.85
    df['Trg_RsrvGain'] = 0

    # Add columns based on other columns
    df['An_AgeDay'] = df['age_m'] * 30.436875
    # 30.436875 converts months into days
  
    df['Dt_NDFIn'] = df['Dt_DMIn'] * 0.30
    # Assume ration is 30% NDF on DM basis, use DMI to get an NDF intake
    # This is temporary until we can add real NDF intake

# Create a nested data frame

    # data = df.drop(df.columns[:2], axis=1)
    # ID = df.drop(df.columns[2:], axis=1)
    # input_data_clean = pd.DataFrame({'idx':[1,2], 'dfs':[ID, data]})

# print(input_data_clean['dfs'].iloc[0]) # This will access the 'ID' Dataframe
# print(input_data_clean['dfs'].iloc[1]) # This will access the 'data' Dataframe
    
    # data_dict = data.to_dict()
    # input_data_clean = df.iloc[:, :2]
    # input_data_clean['data'] = data_dict

 # This does not work the same in python as in R. There is a way to store all the
 # data as a dictionary in the dataframe but I'm not sure how easy the data
 # would be to access if done this way

    # input_data_nested = df.groupby(['cow_id', 'DIM_bins_w'])
    input_data_clean = df
    return(input_data_clean)


In [34]:
# Alternate calculate ME function
def dev_execute_ME_requirement(row):
    # Check if series contains all the required column names.
    required_columns = {"An_BW", "Dt_DMIn", "Trg_MilkProd", "An_BW_mature", "Trg_FrmGain",
                        "An_GestDay", "An_GestLength", "An_AgeDay", "Fet_BWbrth", "An_LactDay",
                        "An_Parity_rl", "Trg_MilkFatp", "Trg_MilkTPp", "Trg_MilkLacp", "Trg_RsrvGain"}

    if not required_columns.issubset(row.index):
        missing_columns = list(required_columns - set(row.index))
        raise ValueError(f"Required columns are missing: {missing_columns}")

    ##########################################################################
    # Calculate Metabolizable Energy
    ##########################################################################
    An_BW = row['An_BW']
    Dt_DMIn = row['Dt_DMIn']
    Trg_MilkProd = row['Trg_MilkProd']
    An_BW_mature = row['An_BW_mature']
    Trg_FrmGain = row['Trg_FrmGain']
    An_GestDay = row['An_GestDay']
    An_GestLength = row['An_GestLength']
    An_AgeDay = row['An_AgeDay']
    Fet_BWbrth = row['Fet_BWbrth']
    An_LactDay = row['An_LactDay']
    An_Parity_rl = row['An_Parity_rl']
    Trg_MilkFatp = row['Trg_MilkFatp']
    Trg_MilkTPp = row['Trg_MilkTPp']
    Trg_MilkLacp = row['Trg_MilkLacp']
    Trg_RsrvGain = row['Trg_RsrvGain']

    # Call the function with the extracted values
    Trg_MEuse = calculate_ME_requirement(An_BW, Dt_DMIn, Trg_MilkProd, An_BW_mature,
                                         Trg_FrmGain, An_GestDay, An_GestLength,
                                         An_AgeDay, Fet_BWbrth, An_LactDay,
                                         An_Parity_rl, Trg_MilkFatp, Trg_MilkTPp,
                                         Trg_MilkLacp, Trg_RsrvGain)

    return Trg_MEuse


# Alternate calculate MP function
def dev_execute_MP_requirement(row):
    # Check if the series contains the required column names
    required_columns = {"An_BW", "Dt_DMIn", "Trg_MilkProd", "An_BW_mature", "Trg_FrmGain",
                        "An_GestDay", "An_GestLength", "An_AgeDay", "Fet_BWbrth", "An_LactDay",
                        "An_Parity_rl", "Trg_MilkTPp", "Trg_RsrvGain", "Dt_NDFIn"}

    if not required_columns.issubset(row.index):
        missing_columns = list(required_columns - set(row.index))
        raise ValueError(f"Required columns are missing: {missing_columns}")

    ##########################################################################
    # Calculate Metabolizable Protein
    ##########################################################################
    Dt_NDFIn = row['Dt_NDFIn']
    Dt_DMIn = row['Dt_DMIn']
    An_BW = row['An_BW']
    An_BW_mature = row['An_BW_mature']
    Trg_FrmGain = row['Trg_FrmGain']
    Trg_RsrvGain = row['Trg_RsrvGain']
    An_GestDay = row['An_GestDay']
    An_GestLength = row['An_GestLength']
    An_AgeDay = row['An_AgeDay']
    Fet_BWbrth = row['Fet_BWbrth']
    An_LactDay = row['An_LactDay']
    An_Parity_rl = row['An_Parity_rl']
    Trg_MilkProd = row['Trg_MilkProd']
    Trg_MilkTPp = row['Trg_MilkTPp']

    # Call the function with the extracted values
    An_MPuse_g_Trg = calculate_MP_requirement(Dt_NDFIn, Dt_DMIn, An_BW, An_BW_mature,
                                              Trg_FrmGain, Trg_RsrvGain, An_GestDay,
                                              An_GestLength, An_AgeDay, Fet_BWbrth,
                                              An_LactDay, An_Parity_rl, Trg_MilkProd,
                                              Trg_MilkTPp)

    return An_MPuse_g_Trg



In [37]:
# Load input data
input_data = pd.read_csv('../test_files/merged_7day_ind_withDHI.csv').iloc[:, 1:]
# The large dataset, drops first column

# input_data = pd.read_csv('../test_files/test_data.csv')
# 10 cow test dataset

# Clean input data
input_data_clean = clean_data(input_data)


In [35]:
# Pass data to functions to calculate values 

input_data_clean['ME'] = input_data_clean.apply(lambda row: dev_execute_ME_requirement(row), axis = 1)
input_data_clean['MP'] = input_data_clean.apply(lambda row: dev_execute_MP_requirement(row), axis = 1)


In [None]:
# Attempts to pass a nested dataframe to dev_execute_ME_requirement()


# input_data_clean['ME'] = input_data_clean['data'].apply(calculate_ME_requirement)

# input_data_clean['ME'] = input_data_clean.iloc[:, 3:26].apply(lambda row: dev_execute_ME_requirement(*row), axis=1)
# Error because 'dev_execute_ME_requirement' accepts a dataframe

# input_data_clean['ME'] = (input_data_clean['dfs'].iloc[1]).apply(lambda x: dev_execute_ME_requirement(x))
# input_data_clean['ME'] = input_data_clean['dfs'].apply(lambda x: dev_execute_ME_requirement(x['An_BW'], x['Dt_DMIn'], x['Trg_MilkProd'], x['An_BW_mature'], x['Trg_FrmGain'], x['An_GestDay'], x['An_GestLength'], x['An_AgeDay'], x['Fet_BWbrth'], x['An_LactDay'], x['An_Parity_rl'], x['Trg_MilkFatp'], x['Trg_MilkTPp'], x['Trg_MilkLacp'], x['Trg_RsrvGain']))
# input_data_clean['ME'] = input_data_clean['dfs'].apply(lambda x: dev_execute_ME_requirement(x['data']))
# input_data_clean['ME'] = input_data_clean['dfs'].apply(lambda x: dev_execute_ME_requirement(*x.iloc[1]))
# input_data_clean['ME'] = input_data_clean['dfs'].apply(lambda x: dev_execute_ME_requirement(x.iloc[1]))
# input_data_clean['ME'] = input_data_clean['dfs'].apply(lambda x: dev_execute_ME_requirement(x['data'].iloc[1]))


In [36]:
# Check for cows missing data

columns_to_check = ['ME', 'MP']
check_na = input_data_clean[columns_to_check].isna().any(axis=1)
cows_missing_data = input_data_clean[check_na]