# Plot for Summary Report: Regression Analysis And 2018 Projections

In [90]:
%run 'helpers-for-report-plot-generation.ipynb'

In [91]:
# Get Data
def get_data():
    """Get raw data from local database"""
    milk = get_dataset('milk_volume')
    birthdates = get_dataset('birthdates')
    genetics = get_dataset('genetics')
    classification = get_dataset('classification')
        
    return select_milk_features(milk), \
           select_birthdate_features(birthdates), \
           select_genetics_features(genetics), \
           select_classification_features(classification)
    
def select_milk_features(milk):
    """Prepare and select milk features"""
    
    milk = milk.rename(columns={'date':'milk_date'})
    milk = milk.dropna(axis=0, how='any')
    milk = milk.sort_values(['milk_date', 'animal_id']).reset_index()
    
    features = ['animal_id', 'milk_date', 'milk_weight', 'days_since_calving']
    return milk[features]

def select_birthdate_features(birthdates):
    """Prepare and select birthdate features"""
    
    birthdates = birthdates.sort_values(['birthdate']).reset_index()
    
    features = ['animal_id', 'birthdate']
    return birthdates[features]

def select_genetics_features(genetics):
    """Prepare and select genetics features"""
    genetics = genetics.rename(columns={'milk':'milk_score'})
    
    features = ['animal_id', 'milk_score', 'ctpi']
    return genetics[features]

def select_classification_features(classifications):
    """Prepare and select genetics features"""
    
    classifications = classifications.sort_values(['animal_id', 'date'])
    classifications = classifications.drop_duplicates(subset=['animal_id'], keep='last').reset_index()
    classifications = classifications.fillna(classifications.mean()) 
    
    features = ['animal_id', 'dairy_form', 'udder_score_aggregate', 'dairy_strength_aggregate', \
                'final_score', 'breed_age_average']
    return classifications[features]

In [99]:
def build_annual_performance_profiles(milk, birthdates):
    """Takes milk data and and calculates the performance profiles of each animal by calendar year."""
    years = milk['milk_date'].dt.year.unique()
    profiles = []
    
    for year in years:
        year_mask = (milk['milk_date'].dt.year == year)
        milk_for_year = milk[year_mask]
        
        for animal_id in milk['animal_id'].unique():    
            animal_milk_for_year = milk_for_year[milk_for_year['animal_id'] == animal_id]
            
            profile = {}
            profile['animal_id'] = animal_id
            profile['year'] = year
            profile['days_milked'] = animal_milk_for_year.count()['animal_id']
            profile['total_milkweight'] = animal_milk_for_year['milk_weight'].sum()
            profile['milkweight_per_day'] = animal_milk_for_year['milk_weight'].mean()
            
            profiles.append(profile)
        
    data = pd.DataFrame(profiles)
    data = data.sort_values(['year', 'animal_id', 'milkweight_per_day']).reset_index()
    features = ['year', 'animal_id', 'days_milked', 'milkweight_per_day', 'total_milkweight']
    return data[features].fillna(0)

In [92]:
milk, birthdates, genetics, classification = get_data()

In [100]:
profiles = build_annual_performance_profiles(milk, birthdates)


Unnamed: 0,year,animal_id,days_milked,milkweight_per_day,total_milkweight
0,2015,4,180,58.399444,10511.9
1,2015,200,31,72.280645,2240.7
2,2015,201,0,0.000000,0.0
3,2015,478,181,76.753591,13892.4
4,2015,941,0,0.000000,0.0
5,2015,1295,181,87.464641,15831.1
6,2015,1351,61,66.436066,4052.6
7,2015,1418,136,84.247059,11457.6
8,2015,1508,159,61.246541,9738.2
9,2015,1528,142,85.002113,12070.3
