## Add growing degree days to raw tall data from seasons with days to flowering and/or days to flag leaf emergence

In [None]:
import datetime as datetime
import numpy as np
import pandas as pd

### Functions

In [None]:
def save_to_csv_without_timestamp(list_of_dfs, list_of_output_filenames):

    for i,j in zip(list_of_dfs, list_of_output_filenames):
        i.to_csv(j, index=False)

### MAC Season 4

In [None]:
s4_0 = pd.read_csv('data/raw/mac_season_four_2020-04-22.csv', low_memory=False)
print(s4_0.shape)
# s4_0.head()

### Season 4 Days to Flowering

In [None]:
fl_0 = pd.read_csv('data/processed/mac_season_4_days_gdd_to_flowering.csv')
print(fl_0.shape)
# fl_0.head()

In [None]:
# s4_0.columns

In [None]:
s4_sites = s4_0[['site_id', 'sitename', 'lat', 'lon', 'cultivar', 'cultivar_id', 'treatment', 'treatment_id']]
# s4_sites.head()

Get information from original raw data that was not included in the `days_to_flowering` dataset

In [None]:
s4_sites_unique = s4_sites.drop_duplicates(ignore_index=True)
print(s4_sites_unique.shape)
# s4_sites_unique.head()

In [None]:
fl_sites = fl_0.merge(s4_sites_unique, left_on='plot', right_on='sitename', how='left')
print(fl_sites.shape)
# fl_sites.head()

In [None]:
s4_1 = s4_0.drop(labels='Unnamed: 0', axis=1)
# s4_1.head(2)

In [None]:
data = {'checked': 0, 'result_type': 'traits', 'id': np.nan, 'citation_id': np.nan, 'site_id': fl_sites.site_id.values,
        'treatment_id': fl_sites.treatment_id.values, 'sitename': fl_sites['plot'].values, 'city': 'Maricopa', 
        'lat': fl_sites.lat.values, 'lon': fl_sites.lon.values, 'scientificname': 'Sorghum bicolor', 'commonname': 'sorghum',
        'genus': 'Sorghum', 'species_id': 2588, 'cultivar_id': fl_sites.cultivar_id.values, 'author': np.nan, 'citation_year': np.nan,
        'treatment': fl_sites.treatment_x.values, 'date': np.nan, 'time': np.nan, 'raw_date': np.nan, 'month': np.nan,
        'year': np.nan, 'dateloc': np.nan, 'trait': 'gdd_to_flowering', 'trait_description': 'accumulated growing degree days to flowering', 
        'mean': fl_sites.gdd_to_flowering.values, 'units': 'C', 'n': np.nan, 'statname': np.nan, 'stat': np.nan, 'notes': np.nan,
        'access_level': np.nan, 'cultivar': fl_sites.cultivar.values, 'entity': np.nan, 'method_name': np.nan, 'view_url': np.nan,
        'edit_url': np.nan}

Do not need to specifiy `np.nan` values since `pd.concat()` will fill those as needed.

In [None]:
gdd_flowering_s4 = pd.DataFrame(data=data)
print(gdd_flowering_s4.shape)
# gdd_flowering_s4.tail(3)

In [None]:
# gdd_flowering_s4.isnull().sum()

Add `gdd_to_flowering` dataframe to raw season 4 data

In [None]:
tall_s4 = pd.concat([s4_1, gdd_flowering_s4], ignore_index=True)
print(tall_s4.shape)
tall_s4.tail(3)

### Season 4 Days to Flag Leaf Emergence

In [None]:
fle_0 = pd.read_csv('data/processed/mac_season_4_days_gdd_to_flag_leaf_emergence.csv')
print(fle_0.shape)
# fle_0.head()

In [None]:
print(s4_sites_unique.shape)
# s4_sites.tail()

In [None]:
fle_sites = fle_0.merge(s4_sites_unique, left_on='plot', right_on='sitename', how='left')
print(fle_sites.shape)
# fle_sites.head()

In [None]:
# fle_sites.columns

In [None]:
fle_sites_1 = fle_sites[['gdd_to_flag_leaf_emergence', 'site_id', 'sitename', 'lat', 'lon', 'cultivar', 'cultivar_id',
                        'treatment_y', 'treatment_id']]
print(fle_sites_1.shape)
# fle_sites_1.head()

In [None]:
fle_data = {'checked': 0, 'result_type': 'traits', 'id': np.nan, 'citation_id': np.nan, 'site_id': fle_sites_1.site_id.values,
        'treatment_id': fle_sites_1.treatment_id.values, 'sitename': fle_sites_1.sitename.values, 'city': 'Maricopa', 
        'lat': fle_sites_1.lat.values, 'lon': fle_sites_1.lon.values, 'scientificname': 'Sorghum bicolor', 'commonname': 'sorghum',
        'genus': 'Sorghum', 'species_id': 2588, 'cultivar_id': fle_sites_1.cultivar_id.values, 'author': np.nan, 'citation_year': np.nan,
        'treatment': fle_sites_1.treatment_y.values, 'date': np.nan, 'time': np.nan, 'raw_date': np.nan, 'month': np.nan,
        'year': np.nan, 'dateloc': np.nan, 'trait': 'gdd_to_flag_leaf_emergence', 'trait_description': 'accumulated growing degree days to flag leaf emergence', 
        'mean': fle_sites_1.gdd_to_flag_leaf_emergence.values, 'units': 'C', 'n': np.nan, 'statname': np.nan, 'stat': np.nan, 'notes': np.nan,
        'access_level': np.nan, 'cultivar': fle_sites_1.cultivar.values, 'entity': np.nan, 'method_name': np.nan, 'view_url': np.nan,
        'edit_url': np.nan}

In [None]:
fle_s4 = pd.DataFrame(data=fle_data)
print(fle_s4.shape)
# fle_s4.head()

In [None]:
fl_fle_tall_s4 = pd.concat([tall_s4, fle_s4], ignore_index=True)
print(fl_fle_tall_s4.shape)
fl_fle_tall_s4.tail(3)

### MAC Season 6
Does not contain any added growing degree day traits

### KSU

In [None]:
ksu = pd.read_csv('data/raw/ksu_data_2020-06-11.csv')
print(ksu.shape)
# ksu.head()

In [None]:
ksu_1 = ksu.drop(labels='Unnamed: 0', axis=1)

In [None]:
ksu_fl_0 = pd.read_csv('data/processed/ksu_flowering_2020-06-15T164738.csv')
print(ksu_fl_0.shape)
# ksu_fl_0.head()

In [None]:
ksu_sites = ksu_1[['site_id', 'sitename', 'cultivar', 'cultivar_id']]
print(ksu_sites.shape)
# ksu_sites.head(3)

In [None]:
ksu_sites_unique = ksu_sites.drop_duplicates()
print(ksu_sites_unique.shape)
# ksu_sites_unique.tail(3)

In [None]:
ksu_fl_sites = ksu_fl_0.merge(ksu_sites_unique, how='left', left_on=['sitename', 'cultivar'],
                              right_on=['sitename', 'cultivar'])
print(ksu_fl_sites.shape)
# ksu_fl_sites.head(3)

In [None]:
# ksu_1.columns

In [None]:
ksu_fl_data = {'checked': 0, 'result_type': 'traits', 'site_id': ksu_fl_sites.site_id.values, 'treatment_id': 6000000022, 
               'sitename': ksu_fl_sites.sitename.values, 'city': 'Ashland', 'lat': ksu_fl_sites.lat.values, 'lon': ksu_fl_sites.lon.values,
               'scientificname': 'Sorghum bicolor', 'commonname': 'sorghum', 'genus': 'Sorghum', 'species_id': 2588, 
               'cultivar_id': ksu_fl_sites.cultivar_id.values, 'treatment': 'KSU 2016 Observational', 'trait': 'gdd_to_flowering',
               'trait_description': 'accumulated growing degree days to flowering', 'mean': ksu_fl_sites.gdd.values, 
               'units': 'C', 'cultivar': ksu_fl_sites.cultivar.values}

In [None]:
ksu_fl_df = pd.DataFrame(data=ksu_fl_data)
print(ksu_fl_df.shape)
# ksu_fl_df.head(3)

In [None]:
tall_ksu_df = pd.concat([ksu_1, ksu_fl_df], ignore_index=True)
print(tall_ksu_df.shape)
tall_ksu_df.tail(3)

### Clemson

In [None]:
clemson = pd.read_csv('data/raw/clemson_data_2020-06-01.csv')
print(clemson.shape)
# clemson.head(3)

In [None]:
clemson_1 = clemson.drop(labels='Unnamed: 0', axis=1)

In [None]:
clem_fl_0 = pd.read_csv('data/processed/clemson_days_to_flowering_2020-06-24T113715.csv')
print(clem_fl_0.shape)
# clem_fl_0.tail()

In [None]:
clem_sites = clemson_1[['site_id', 'sitename', 'cultivar', 'cultivar_id']]
print(clem_sites.shape)
# clem_sites.head(3)

In [None]:
clem_sites_unique = clem_sites.drop_duplicates(ignore_index=True)
print(clem_sites_unique.shape)

In [None]:
clem_fl_sites = clem_fl_0.merge(clem_sites_unique, how='left', left_on=['sitename', 'cultivar'],
                              right_on=['sitename', 'cultivar'])
print(clem_fl_sites.shape)
# clem_fl_sites.head(3)

In [None]:
# clemson_1.columns

In [None]:
clem_fl_data = {'checked': 0, 'result_type': 'trait', 'site_id': 6000025503, 'treatment_id': 6000000042, 
                'sitename': 'Clemson University Pee Dee Research and Education Center', 'city': 'Florence', 'lat': 34.289,
                'lon': -79.737, 'scientificname': 'Sorghum bicolor', 'commonname': 'sorghum', 'genus': 'Sorghum',
                'species_id': 2588, 'cultivar_id': clem_fl_sites.cultivar_id.values, 'treatment': '2014 Clemson BAP Phenotyping Trials',
                'trait': 'gdd_to_flowering', 'trait_description': 'growing_degree_days_to_flowering', 'mean': clem_fl_sites.gdd_to_flowering.values,
                'units': 'C', 'cultivar': clem_fl_sites.cultivar.values}


In [None]:
clem_fl_df = pd.DataFrame(data=clem_fl_data)
print(clem_fl_df.shape)
# clem_fl_df.head(3)

In [None]:
tall_clem = pd.concat([clemson_1, clem_fl_df], ignore_index=True)
print(tall_clem.shape)
tall_clem.tail(3)

### Convert dataframes to `.csv` files

In [None]:
list_of_dfs = [fl_fle_tall_s4, tall_ksu_df, tall_clem]
list_of_df_filepaths = ['data/interim/tall_season_four.csv', 'data/interim/tall_ksu_data.csv', 'data/iterim/tall_clemson_data.csv']

save_to_csv_without_timestamp(list_of_dfs, list_of_df_filepaths)