## Nov 7.

- ```SW```: Seasonal Weather: temp. and precip.

On Nov. 6 Mike wanted to model cattle inventory using only ```NPP```/```SW``` and rangeland area for one year.

**Min's data are inconsistent:** Let us subset the counties that are in common between ```NPP``` and ```SW```, and cattle inventory.

#### Seasons in Tonsor are
- S1: Jan - Mar
- S2: Apr - Jul
- S3: Aug - Sep
- S4: Oct - Dec

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import os, os.path, pickle, sys

import matplotlib
import matplotlib.pyplot as plt

sys.path.append("/Users/hn/Documents/00_GitHub/Rangeland/Python_Codes/")
import rangeland_core as rc

In [None]:
data_dir_base = "/Users/hn/Documents/01_research_data/RangeLand/Data/"
census_population_dir = data_dir_base + "census/"
# Shannon_data_dir = data_dir_base + "Shannon_Data/"
# USDA_data_dir = data_dir_base + "/NASS_downloads/"
param_dir = data_dir_base + "parameters/"
Min_data_base = data_dir_base + "Min_Data/"
reOrganized_dir = data_dir_base + "reOrganized/"
seasonal_dir = reOrganized_dir + "seasonal_variables/02_merged_mean_over_county/"

In [None]:
Bhupi = pd.read_csv(param_dir + "Bhupi_25states_clean.csv")
Bhupi["SC"] = Bhupi.state + "-" + Bhupi.county

print (f"{len(Bhupi.state.unique()) = }")
print (f"{len(Bhupi.county_fips.unique()) = }")
Bhupi.head(2)

In [None]:
SoI = ["Alabama", "Arkansas", "California", 
       "Colorado", "Florida", "Georgia", "Idaho",
       "Illinois", "Iowa", "Kansas", "Kentucky",
       "Louisiana", "Mississippi", "Missouri", "Montana", 
       "Nebraska", "New Mexico", "North Dakota", 
       "Oklahoma", "Oregon", "South Dakota", "Tennessee",
       "Texas", "Virginia", "Wyoming"]

abb_dict = pd.read_pickle(param_dir + "state_abbreviations.sav")

SoI_abb = []
for x in SoI:
    SoI_abb = SoI_abb + [abb_dict["full_2_abb"][x]]

In [None]:
USDA_data = pd.read_pickle(reOrganized_dir + "USDA_data.sav")

cattle_inventory = USDA_data["cattle_inventory"]

# pick only 25 states we want
cattle_inventory = cattle_inventory[cattle_inventory.state.isin(SoI)].copy()

print (f"{cattle_inventory.data_item.unique() = }")
print (f"{cattle_inventory.commodity.unique() = }")
print (f"{cattle_inventory.year.unique() = }")

census_years = list(cattle_inventory.year.unique())

# pick only useful columns
# cattle_inventory = cattle_inventory[["year", "county_fips", "cattle_cow_inventory"]]
cattle_inventory = cattle_inventory[["year", "county_fips", "cattle_cow_beef_inventory"]]

print (f"{len(cattle_inventory.county_fips.unique()) = }")
cattle_inventory.head(2)

In [None]:
print (cattle_inventory.shape)
cattle_inventory = rc.clean_census(df=cattle_inventory, col_="cattle_cow_beef_inventory")
print (cattle_inventory.shape)

### Min has an extra "1" as leading digit in FIPS!!

In [None]:
# county_annual_GPP_NPP_prod = pd.read_csv(reOrganized_dir + "county_annual_GPP_NPP_productivity.csv")
# county_annual_GPP_NPP_prod.rename(columns=lambda x: x.lower().replace(' ', '_'), inplace=True)

# county_annual_GPP_NPP_prod = county_annual_GPP_NPP_prod[["year", "county", "modis_npp"]].copy()
# county_annual_GPP_NPP_prod.dropna(how='any', inplace=True)
# county_annual_GPP_NPP_prod.sort_values(by=["year", "county"], inplace=True)
# county_annual_GPP_NPP_prod.reset_index(drop=True, inplace=True)
# county_annual_GPP_NPP_prod.head(2)

# NPP = pd.read_csv(reOrganized_dir + "county_annual_GPP_NPP_productivity.csv")
NPP = pd.read_csv(Min_data_base + "county_annual_MODIS_NPP.csv")
NPP.rename(columns={"NPP": "modis_npp"}, inplace=True)
NPP.head(2)

In [None]:
# pick only census years
NPP = NPP[NPP.year.isin(census_years)]
NPP.reset_index(drop=True, inplace=True)
NPP.head(2)

In [None]:
county_id_name_fips = pd.read_csv(Min_data_base + "county_id_name_fips.csv")
county_id_name_fips = county_id_name_fips[county_id_name_fips.STATE.isin(SoI_abb)].copy()

county_id_name_fips.sort_values(by=["STATE", "county"], inplace=True)
county_id_name_fips.reset_index(drop=True, inplace=True)
county_id_name_fips.head(2)

In [None]:
print (f"{len(NPP.county.unique()) = }")

In [None]:
print (NPP.shape)
NPP = NPP[NPP.county.isin(list(county_id_name_fips.county.unique()))].copy()
print (NPP.shape)
NPP.head(2)

In [None]:
print (f"{(NPP.year.unique()) = }")
print (f"{len(NPP.county.unique()) = }")
print (f"{len(cattle_inventory.county_fips.unique()) = }")

In [None]:
for a_year in NPP.year.unique():
    df = NPP[NPP.year == a_year]
    print (f"{len(df.county.unique()) = }")

In [None]:
NPP.head(2)

In [None]:
NPP = rc.correct_Mins_FIPS(df=NPP, col_="county")
NPP.rename(columns={"county": "county_fips"}, inplace=True)
NPP.head(2)

In [None]:
# Rangeland area and Total area:
county_RA_and_TA_fraction = pd.read_csv(reOrganized_dir + "county_rangeland_and_totalarea_fraction.csv")
print (county_RA_and_TA_fraction.shape)
county_RA_and_TA_fraction.head(5)

In [None]:
county_RA_and_TA_fraction.rename(columns={"fips_id": "county_fips"}, inplace=True)
county_RA_and_TA_fraction = rc.correct_Mins_FIPS(df=county_RA_and_TA_fraction, col_="county_fips")
county_RA_and_TA_fraction.head(2)

In [None]:
county_annual_NPP_Ra = pd.merge(NPP, county_RA_and_TA_fraction,
                                on=["county_fips"],
                                how="left")
county_annual_NPP_Ra.head(2)

In [None]:
print (f"{sorted(cattle_inventory.year.unique())     = }")
print (f"{sorted(county_annual_NPP_Ra.year.unique()) = }")

In [None]:
cattle_inventory = cattle_inventory[cattle_inventory.year.isin(list(county_annual_NPP_Ra.year.unique()))]
sorted(cattle_inventory.year.unique())

In [None]:
print (len(cattle_inventory.county_fips.unique()))
print (len(county_annual_NPP_Ra.county_fips.unique()))

In [None]:
cattle_inventory_cnty_missing_from_NPP = [x for x in cattle_inventory.county_fips.unique()\
                                          if not(x in county_annual_NPP_Ra.county_fips.unique())]
len(cattle_inventory_cnty_missing_from_NPP)

In [None]:
NPP_cnty_missing_from_cattle = [x for x in county_annual_NPP_Ra.county_fips.unique()\
                                if not(x in cattle_inventory.county_fips.unique())]
len(NPP_cnty_missing_from_cattle)

In [None]:
print ("01001" in list(county_annual_NPP_Ra.county_fips.unique()))
print ("01001" in list(cattle_inventory.county_fips.unique()))

## NPP has a lot of missing counties

 - Min says he had a threshld about rangeland/pasture.
 - subset the NPP and Cattle to the intersection of counties present.
 - It seems there are different number of counties in each year in cattle inventory. Find intersection of those as well.

In [None]:
all_cattle_counties = set(cattle_inventory.county_fips.unique())
print (f"{len(all_cattle_counties) = }")

for a_year in sorted(cattle_inventory.year.unique()):
    curr_cow = cattle_inventory[cattle_inventory.year == a_year].copy()
    curr_cow_counties = set(curr_cow.county_fips.unique())
    all_cattle_counties = all_cattle_counties.intersection(curr_cow_counties)
    print (a_year)
    print (f"{len(all_cattle_counties) = }")
    print("====================================================================")

In [None]:
all_county_annual_NPP_Ra = set(county_annual_NPP_Ra.county_fips.unique())
print (f"{len(all_county_annual_NPP_Ra) = }")

for a_year in sorted(county_annual_NPP_Ra.year.unique()):
    curr = county_annual_NPP_Ra[county_annual_NPP_Ra.year == a_year].copy()
    curr_counties = set(curr.county_fips.unique())
    all_county_annual_NPP_Ra = all_county_annual_NPP_Ra.intersection(curr_counties)
    print (a_year)
    print (f"{len(all_county_annual_NPP_Ra) = }")
    print("====================================================================")

In [None]:
# choose only the counties that are present in all years:

cattle_inventory = cattle_inventory[cattle_inventory.county_fips.isin(list(all_cattle_counties))]

In [None]:
NPP_counties = set(county_annual_NPP_Ra.county_fips.unique())
cow_counties = set(cattle_inventory.county_fips.unique())
county_intersection = NPP_counties.intersection(cow_counties)

In [None]:
county_annual_NPP_Ra = county_annual_NPP_Ra[county_annual_NPP_Ra.county_fips.isin(list(county_intersection))]
cattle_inventory = cattle_inventory[cattle_inventory.county_fips.isin(list(county_intersection))]

print (f"{county_annual_NPP_Ra.shape = }")
print (f"{cattle_inventory.shape     = }")
print ()
print (f"{len(county_annual_NPP_Ra.county_fips.unique()) = }")
print (f"{len(cattle_inventory.county_fips.unique())     = }")
print ()
print (f"{sorted(county_annual_NPP_Ra.year.unique()) = }")
print (f"{sorted(cattle_inventory.year.unique())     = }")

In [None]:
county_annual_NPP_Ra_cattleInv = pd.merge(county_annual_NPP_Ra, cattle_inventory,
                                          on=["county_fips", "year"],
                                          how="left")

print (f"{cattle_inventory.shape = }")
print (f"{county_annual_NPP_Ra.shape = }")
print (f"{county_annual_NPP_Ra_cattleInv.shape = }")
county_annual_NPP_Ra_cattleInv.head(2)

In [None]:
county_annual_NPP_Ra_cattleInv.sort_values(by=["year", "county_fips"], inplace=True)
county_annual_NPP_Ra_cattleInv.reset_index(drop=True, inplace=True)
county_annual_NPP_Ra_cattleInv.head(2)

In [None]:
NPP_Ra_cattleInv_2017 = county_annual_NPP_Ra_cattleInv[
                                        county_annual_NPP_Ra_cattleInv.year==2017].copy()

In [None]:
expl_var_2017 = NPP_Ra_cattleInv_2017[["modis_npp", "rangeland_acre"]].values
y_2017 = NPP_Ra_cattleInv_2017[["cattle_cow_beef_inventory"]].values.reshape(-1)
print (f"{y_2017.shape = }")
y_2017

In [None]:
expl_var_interc_2017 = np.hstack([expl_var_2017, np.ones(len(expl_var_2017)).reshape(-1, 1)])
print (expl_var_interc_2017.shape)
expl_var_interc_2017

In [None]:
solution_2017, RSS_2017, rank_2017, singular_vals_2017  = np.linalg.lstsq(expl_var_interc_2017, y_2017)

In [None]:
county_annual_NPP_Ra_cattleInv[county_annual_NPP_Ra_cattleInv.year==2017].head(2)

In [None]:
NPP_coef_2017 = solution_2017[0]
Ra_coef_2017 = solution_2017[1]
intercept_2017 = solution_2017[2]

### Apply 2017 model to 2012 data 

In [None]:
NPP_Ra_cattleInv_2012 = county_annual_NPP_Ra_cattleInv[
                                        county_annual_NPP_Ra_cattleInv.year==2012].copy()

y_2012 = NPP_Ra_cattleInv_2012[["cattle_cow_beef_inventory"]].values.reshape(-1)

expl_var_2012 = NPP_Ra_cattleInv_2012[["modis_npp", "rangeland_acre"]].values
expl_var_interc_2012 = np.hstack([expl_var_2012, np.ones(len(expl_var_2012)).reshape(-1, 1)])
expl_var_interc_2012

In [None]:
NPP_Ra_cattleInv_2012.head(2)

In [None]:
y_hat_2012_Model2017 = NPP_coef_2017 * NPP_Ra_cattleInv_2012["modis_npp"].values + \
                       Ra_coef_2017 * NPP_Ra_cattleInv_2012["rangeland_acre"].values + \
                       intercept_2017 * np.ones(len(expl_var_2012))

res_2012_Model2017 = y_2012 - y_hat_2012_Model2017
RSS_2012_Model2017 = np.dot(res_2012_Model2017, res_2012_Model2017)
RSS_2012_Model2017/len(expl_var_2012)

In [None]:
print (f"{NPP_Ra_cattleInv_2012.cattle_cow_beef_inventory.min()=}")
print (f"{NPP_Ra_cattleInv_2012.cattle_cow_beef_inventory.max()=}")