# Importing the Data and Required Packages

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings('ignore')

# PROBLEM STATEMENT #2:
## Predicting Agricultural or Farm-based Emissions

Currently, there are many Small and large Agricultural Farms in US/across the globe where crop- generated/farming-generated  emissions  is  a  pressing  issue  but  there  are  no  data/standard methodologies /tools /resources available to calculate overall emissions. A large part of agriculture land represents privately-owned farms.

One way is to map the farm location (ex. using latitude and longitude) to the public farms locations where  the  emission  data/  information  is  available  and  then  calculate  proxy  based  emissions,
driven by multiple parameters (land usage, land area, existing emissions for the location, farming type, technology used etc.). Hence the GHG Emissions forecast, specific to the given scenario, is
largely based on farm location (determined by address mapping).

The ask, therefore, is to develop an effective solution to match farm addresses (geo mapping) and subsequently developing a predictive emissions calculator and model for the mapped agricultural
farmlands. 


# Project Lifecycle:

* Understanding Problem Statement
* Data Collection
* Data Checks
* EDA
* Data Pre-Processing
* Model Training
* Choose the Best Model

# The major crops selected in this model are:

`corn cotton and sorghum`

In [2]:
df_cotton = pd.read_csv("https://raw.githubusercontent.com/aps0611/experimental/main/dataset/cotton_area.csv")
df_cotton

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376
...,...,...,...,...,...,...,...,...
643,COUNTY,VIRGINIA,51,SOUTHEASTERN,90,ISLE OF WIGHT,93,13316
644,COUNTY,VIRGINIA,51,SOUTHEASTERN,90,SOUTHAMPTON,175,38067
645,COUNTY,VIRGINIA,51,SOUTHEASTERN,90,SUFFOLK CITY,800,14449
646,COUNTY,VIRGINIA,51,SOUTHEASTERN,90,SURRY,181,2023


In [36]:
df_cotton.shape

(648, 8)

# Corn

In [37]:
df_corn = pd.read_csv('https://raw.githubusercontent.com/aps0611/experimental/main/dataset/corn_area.csv')

In [38]:
df_corn

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,Corn_area
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,645
1,COUNTY,ALABAMA,1,BLACK BELT,40,BULLOCK,11,1292
2,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,13876
3,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,2487
4,COUNTY,ALABAMA,1,BLACK BELT,40,GREENE,63,34
...,...,...,...,...,...,...,...,...
2638,COUNTY,WYOMING,56,SOUTHEAST,50,GOSHEN,15,29885
2639,COUNTY,WYOMING,56,SOUTHEAST,50,LARAMIE,21,17181
2640,COUNTY,WYOMING,56,SOUTHEAST,50,NIOBRARA,27,
2641,COUNTY,WYOMING,56,SOUTHEAST,50,PLATTE,31,6606


In [39]:
df_corn.shape

(2643, 8)

In [40]:
# combine the two dataframes
df_combined = pd.merge(df_cotton, df_corn, on = ['Geo Level','State','State ANSI','Ag District',
                                                'Ag District Code','County','County ANSI'], how = 'outer')
df_combined

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261,645
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133,13876
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795,2487
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046,1630
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376,
...,...,...,...,...,...,...,...,...,...
2688,COUNTY,WYOMING,56,SOUTHEAST,50,GOSHEN,15,,29885
2689,COUNTY,WYOMING,56,SOUTHEAST,50,LARAMIE,21,,17181
2690,COUNTY,WYOMING,56,SOUTHEAST,50,NIOBRARA,27,,
2691,COUNTY,WYOMING,56,SOUTHEAST,50,PLATTE,31,,6606


# Sorghum

In [41]:
df_sorghum = pd.read_csv('https://raw.githubusercontent.com/aps0611/experimental/main/dataset/sorghum_area.csv')
df_sorghum.head(3)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,Value
0,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,
1,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,
2,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,


In [42]:
df_sorghum.rename(columns = {'Value': 'SORGHUM_ACRES'}, inplace = True)
df_sorghum.head(5)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,SORGHUM_ACRES
0,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,
1,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,
2,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,
3,COUNTY,ALABAMA,1,BLACK BELT,40,MARENGO,91,
4,COUNTY,ALABAMA,1,BLACK BELT,40,MONTGOMERY,101,


In [43]:
df_combined = pd.merge(df_combined, df_sorghum, on = ['Geo Level','State','State ANSI','Ag District',
                                                'Ag District Code','County','County ANSI'], how = 'outer')
df_combined

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261,645,
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133,13876,
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795,2487,
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046,1630,
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376,,
...,...,...,...,...,...,...,...,...,...,...
2703,COUNTY,TEXAS,48,CROSS TIMBERS,30,MONTAGUE,337,,,
2704,COUNTY,TEXAS,48,CROSS TIMBERS,30,STEPHENS,429,,,300
2705,COUNTY,TEXAS,48,SOUTH TEXAS,96,DUVAL,131,,,
2706,COUNTY,TEXAS,48,UPPER COAST,90,JEFFERSON,245,,,


In [44]:
len(df_combined['County'].unique())

1572

In [45]:
df_precipitation = pd.read_csv('https://raw.githubusercontent.com/aps0611/experimental/main/dataset/precipitation.csv')
df_precipitation

Unnamed: 0,State,precipitation_cm
0,ALABAMA,148.0
1,ALASKA,57.2
2,ARIZONA,34.5
3,ARKANSAS,128.4
4,CALIFORNIA,56.3
5,COLORADO,40.5
6,CONNECTICUT,127.9
7,DELAWARE,116.0
8,FLORIDA,138.5
9,GEORGIA,128.7


In [46]:
df_combined = pd.merge(df_combined, df_precipitation, on = ['State'], how = 'inner')
df_combined

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261,645,,148.0
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133,13876,,148.0
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795,2487,,148.0
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046,1630,,148.0
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376,,,148.0
...,...,...,...,...,...,...,...,...,...,...,...
2703,COUNTY,WYOMING,56,SOUTHEAST,50,GOSHEN,15,,29885,,32.8
2704,COUNTY,WYOMING,56,SOUTHEAST,50,LARAMIE,21,,17181,,32.8
2705,COUNTY,WYOMING,56,SOUTHEAST,50,NIOBRARA,27,,,,32.8
2706,COUNTY,WYOMING,56,SOUTHEAST,50,PLATTE,31,,6606,,32.8


In [47]:
# df_combined.drop(['precipitation_cm_y'],inplace = True, axis = 'columns')
# df_combined.head(3)

In [48]:
d = dict()
for key in list(df_combined['State'].unique()):
  d[key] = list(df_combined[df_combined['State'] == key]['County'])

In [49]:
# from geopy.geocoders import Nominatim

# import matplotlib.pyplot as plt
# %matplotlib inline

In [50]:
# data = []
# for key in d:
#   class FarmGeocoder:
#     def __init__(self):
#         self.geocoder = Nominatim(user_agent=key)

#     def geocode_address(self, address):
#         location = self.geocoder.geocode(address)
#         return (location.latitude, location.longitude) if location else None

 
#   # Get the farm address
#   farm_addresses = d[key]

#   geocoder = FarmGeocoder()

 
#   for address in farm_addresses:
#       coordinates = geocoder.geocode_address(address + ',' + key)
#       if coordinates:
#           latitude, longitude = coordinates
#           print(f"Address: {address}")
#           print(f"Latitude: {latitude}")
#           print(f"Longitude: {longitude}")
#           print("---------")
#           data.append({'County': address, 'Latitude': latitude, 'Longitude': longitude})
#       else:
#           print(f"Failed to geocode address: {address}")
#           print("---------")



In [51]:
# df_geolocation = pd.DataFrame(data)
# df_geolocation.shape

In [52]:
# df_geolocation.to_csv('geolocation.csv')

In [53]:
# import folium

# # Create a Folium map centered on the US
# map_us = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

# # Iterate over each row in the DataFrame
# for index, row in df_geolocation.iterrows():
#     address = row['County']
#     latitude = row['Latitude']
#     longitude = row['Longitude']
    
#     # Add a marker for each location
#     folium.Marker(location=[latitude, longitude], popup=address).add_to(map_us)

# # Display the map
# map_us


In [54]:
df_geolocation = pd.read_csv('https://raw.githubusercontent.com/aps0611/experimental/main/dataset/geolocation.csv')

In [55]:
df_combined = pd.merge(df_combined, 
                       df_geolocation, 
                       on = ['State','County'],
                       how = 'inner')


In [56]:
df_combined

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261,645,,148.0,32.516526,-86.631940
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133,13876,,148.0,32.311797,-87.104664
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795,2487,,148.0,32.580123,-86.125195
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046,1630,,148.0,32.108807,-86.640254
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376,,,148.0,32.366606,-85.666031
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2700,COUNTY,WYOMING,56,SOUTHEAST,50,GOSHEN,15,,29885,,32.8,42.065710,-104.354142
2701,COUNTY,WYOMING,56,SOUTHEAST,50,LARAMIE,21,,17181,,32.8,41.311367,-105.591101
2702,COUNTY,WYOMING,56,SOUTHEAST,50,NIOBRARA,27,,,,32.8,43.008758,-104.456552
2703,COUNTY,WYOMING,56,SOUTHEAST,50,PLATTE,31,,6606,,32.8,42.092314,-104.987099


In [57]:
df_combined = df_combined.replace(['',' '], np.nan, regex=False)

In [58]:
df_combined

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261,645,,148.0,32.516526,-86.631940
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133,13876,,148.0,32.311797,-87.104664
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795,2487,,148.0,32.580123,-86.125195
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046,1630,,148.0,32.108807,-86.640254
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376,,,148.0,32.366606,-85.666031
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2700,COUNTY,WYOMING,56,SOUTHEAST,50,GOSHEN,15,,29885,,32.8,42.065710,-104.354142
2701,COUNTY,WYOMING,56,SOUTHEAST,50,LARAMIE,21,,17181,,32.8,41.311367,-105.591101
2702,COUNTY,WYOMING,56,SOUTHEAST,50,NIOBRARA,27,,,,32.8,43.008758,-104.456552
2703,COUNTY,WYOMING,56,SOUTHEAST,50,PLATTE,31,,6606,,32.8,42.092314,-104.987099


Gas	| CO2-e |
----| ------|
CO2	| 1 |
CH4	| 28 |
N2O	| 265 |
CF4	|6,630|
C2F6|	12,200|
SF6|	22,800|
NF3|	17,200|
	
Gas|	Conversion Factor|
---|-------------------|
CO2|	3.67|
CH4|	1.33|
N2O|	1.57|
Nox|	3.29|
CO|	2.33|
CO2 Lime|	3.67|
NMVOC|	1.17|

* Region 1: 0.55
* Region 2: 0.5
* Region 3: 0.45
* Region 4: 0.45
* Region 5: 0.4
* Region 6 :0.4

#### This is categorized based on precipitation pattern. Correlation between precipitation-soil-Vegetation-[temperature] to some extent

### To reduce the multicollinearity we can consider only one factor!!

In [59]:
def calculate_application_factor(rainfall):
    if rainfall > 200:
      return 0.55 # this factors are for hectares of land
    elif 150 <= rainfall <= 200:
      return 0.5
    elif 100 <= rainfall < 150:
      return 0.45
    elif 50 <= rainfall < 100:
      return 0.4
    elif 25 <= rainfall < 50:
      return 0.4
    else:
        return 0.35

In [60]:
df_combined.columns

Index(['Geo Level', 'State', 'State ANSI', 'Ag District', 'Ag District Code',
       'County', 'County ANSI', 'cotton_area', 'Corn_area', 'SORGHUM_ACRES',
       'precipitation_cm', 'Latitude', 'Longitude'],
      dtype='object')

In [61]:
df_combined['cotton_area']

0        9,261
1       14,133
2       13,795
3        4,046
4       12,376
         ...  
2700       NaN
2701       NaN
2702       NaN
2703       NaN
2704       NaN
Name: cotton_area, Length: 2705, dtype: object

In [62]:
# convert the df colum cotton area acres into float for further calculation purpose
df_combined['cotton_area'] = df_combined['cotton_area'].str.replace(',', '').astype(float)
df_combined['Corn_area'] = df_combined['Corn_area'].str.replace(',', '').astype(float)
df_combined['SORGHUM_ACRES'] = df_combined['SORGHUM_ACRES'].str.replace(',', '').astype(float)

In [63]:
df_combined.head(3)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,,148.0,32.516526,-86.63194
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,,148.0,32.311797,-87.104664
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,,148.0,32.580123,-86.125195


In [64]:
# Create a new column 'Application Factor' in the DataFrame
df_combined['Application Factor'] = df_combined['precipitation_cm'].apply(calculate_application_factor)

# Multiply the 'Area' column with the 'Application Factor'
df_combined['Lime-cotton-tonnes'] = df_combined['cotton_area'] * df_combined['Application Factor'] * 0.404686
df_combined['Lime-corn-tonnes'] = df_combined['Corn_area'] * df_combined['Application Factor'] * 0.404686
df_combined['Lime-sorghum-tonnes'] = df_combined['SORGHUM_ACRES'] * df_combined['Application Factor'] * 0.404686

In [65]:
df_combined.head(3)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,


E = ((M x FracLime x P X EF) + (M x (1-FracLime) x P x EF)) x Cg/1000

In [66]:
FracLime = 1
P = 0.9 # PURITY OF LIMESTONE
EF = 0.12 # EMISSION FACTOR FOR LIMESTONE
Cg = 3.67

# Calculate the 'E' column:
df_combined['E-cotton-CO2'] = ((df_combined['Lime-cotton-tonnes'] * FracLime * P * EF) + (df_combined['Lime-cotton-tonnes'] * (1 - FracLime) * P * EF)) * Cg
df_combined['E-corn-CO2'] = ((df_combined['Lime-corn-tonnes'] * FracLime * P * EF) + (df_combined['Lime-corn-tonnes'] * (1 - FracLime) * P * EF)) * Cg
df_combined['E-sorghum-CO2'] = ((df_combined['Lime-sorghum-tonnes'] * FracLime * P * EF) + (df_combined['Lime-sorghum-tonnes'] * (1 - FracLime) * P * EF)) * Cg

# Display the updated DataFrame
df_combined.head(3)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,,668.464577,46.55649,
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,,1020.128481,1001.578066,
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,,995.731437,179.513163,


# UREA

## COTTON

In [67]:
# Conversion factor: 1 Ha = 2.47105 acres
conversion_factor = 2.47105

# Convert urea application rate from kg/Ha to kg/acre
df_combined['Urea_tonne_cotton'] = (df_combined['cotton_area'] * 270 * 0.001) / conversion_factor 

# Display the DataFrame
df_combined.head(5)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2,Urea_tonne_cotton
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,,668.464577,46.55649,,1011.90587
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,,1020.128481,1001.578066,,1544.246373
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,,995.731437,179.513163,,1507.314704
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046.0,1630.0,,148.0,32.108807,-86.640254,0.45,736.8118,296.837181,,292.042725,117.654385,,442.087372
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376.0,,,148.0,32.366606,-85.666031,0.45,2253.777271,,,893.307159,,,1352.267255


In [68]:
EF = 0.20
C = 3.67

df_combined['E-cotton-urea-CO2'] = df_combined['Urea_tonne_cotton'] * EF * C
df_combined.head(5)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2,Urea_tonne_cotton,E-cotton-urea-CO2
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,,668.464577,46.55649,,1011.90587,742.738909
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,,1020.128481,1001.578066,,1544.246373,1133.476838
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,,995.731437,179.513163,,1507.314704,1106.368993
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046.0,1630.0,,148.0,32.108807,-86.640254,0.45,736.8118,296.837181,,292.042725,117.654385,,442.087372,324.492131
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376.0,,,148.0,32.366606,-85.666031,0.45,2253.777271,,,893.307159,,,1352.267255,992.564165


Urea application for CORN:

150 pounds * 0.453592 = kg/acre

https://www.derthickscornmaze.com/interesting-about-corn/how-much-urea-per-acre-for-corn.html

In [69]:
urea_per_acre = 150 * 0.453592 * 0.001 #tonnes per acre

df_combined['Urea_tonne_corn'] = df_combined['Corn_area'] * urea_per_acre
df_combined.head(3)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2,Urea_tonne_cotton,E-cotton-urea-CO2,Urea_tonne_corn
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,,668.464577,46.55649,,1011.90587,742.738909,43.885026
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,,1020.128481,1001.578066,,1544.246373,1133.476838,944.106389
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,,995.731437,179.513163,,1507.314704,1106.368993,169.212496


In [70]:
EF = 0.20
C = 3.67

df_combined['E-corn-urea-CO2'] = df_combined['Urea_tonne_corn'] * EF * C
df_combined.head(5)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2,Urea_tonne_cotton,E-cotton-urea-CO2,Urea_tonne_corn,E-corn-urea-CO2
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,,668.464577,46.55649,,1011.90587,742.738909,43.885026,32.211609
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,,1020.128481,1001.578066,,1544.246373,1133.476838,944.106389,692.974089
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,,995.731437,179.513163,,1507.314704,1106.368993,169.212496,124.201972
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046.0,1630.0,,148.0,32.108807,-86.640254,0.45,736.8118,296.837181,,292.042725,117.654385,,442.087372,324.492131,110.903244,81.402981
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376.0,,,148.0,32.366606,-85.666031,0.45,2253.777271,,,893.307159,,,1352.267255,992.564165,,


According to the sources, the recommended urea rate for sorghum in the US is 4.535924 Kg per acre when conditions are moist and 9.071847 pounds per acre when conditions are dry

In [72]:
threshold = 100  # Set desired threshold value. 
factor_above_threshold = 4.5359 * 0.001 # if precipitation is above the threshold
factor_below_threshold = 9.071847 * 0.001 # if precipitation is below or equal to the threshold

df_combined['Urea_tonne_sorghum'] = np.where(df_combined['precipitation_cm'] > threshold, df_combined['SORGHUM_ACRES'] * factor_above_threshold, df_combined['SORGHUM_ACRES'] * factor_below_threshold)

In [73]:
df_combined.head(5)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2,Urea_tonne_cotton,E-cotton-urea-CO2,Urea_tonne_corn,E-corn-urea-CO2,Urea_tonne_sorghum
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,,668.464577,46.55649,,1011.90587,742.738909,43.885026,32.211609,
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,,1020.128481,1001.578066,,1544.246373,1133.476838,944.106389,692.974089,
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,,995.731437,179.513163,,1507.314704,1106.368993,169.212496,124.201972,
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046.0,1630.0,,148.0,32.108807,-86.640254,0.45,736.8118,296.837181,,292.042725,117.654385,,442.087372,324.492131,110.903244,81.402981,
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376.0,,,148.0,32.366606,-85.666031,0.45,2253.777271,,,893.307159,,,1352.267255,992.564165,,,


In [74]:
EF = 0.20
C = 3.67

df_combined['E-sorghum-urea-CO2'] = df_combined['Urea_tonne_sorghum'] * EF * C
df_combined.head(5)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2,Urea_tonne_cotton,E-cotton-urea-CO2,Urea_tonne_corn,E-corn-urea-CO2,Urea_tonne_sorghum,E-sorghum-urea-CO2
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,,668.464577,46.55649,,1011.90587,742.738909,43.885026,32.211609,,
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,,1020.128481,1001.578066,,1544.246373,1133.476838,944.106389,692.974089,,
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,,995.731437,179.513163,,1507.314704,1106.368993,169.212496,124.201972,,
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046.0,1630.0,,148.0,32.108807,-86.640254,0.45,736.8118,296.837181,,292.042725,117.654385,,442.087372,324.492131,110.903244,81.402981,,
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376.0,,,148.0,32.366606,-85.666031,0.45,2253.777271,,,893.307159,,,1352.267255,992.564165,,,,


In [75]:
# replace all the NaN values with zero

df_combined = df_combined.fillna(0)
df_combined.head(5)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2,Urea_tonne_cotton,E-cotton-urea-CO2,Urea_tonne_corn,E-corn-urea-CO2,Urea_tonne_sorghum,E-sorghum-urea-CO2
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,0.0,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,0.0,668.464577,46.55649,0.0,1011.90587,742.738909,43.885026,32.211609,0.0,0.0
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,0.0,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,0.0,1020.128481,1001.578066,0.0,1544.246373,1133.476838,944.106389,692.974089,0.0,0.0
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,0.0,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,0.0,995.731437,179.513163,0.0,1507.314704,1106.368993,169.212496,124.201972,0.0,0.0
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046.0,1630.0,0.0,148.0,32.108807,-86.640254,0.45,736.8118,296.837181,0.0,292.042725,117.654385,0.0,442.087372,324.492131,110.903244,81.402981,0.0,0.0
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376.0,0.0,0.0,148.0,32.366606,-85.666031,0.45,2253.777271,0.0,0.0,893.307159,0.0,0.0,1352.267255,992.564165,0.0,0.0,0.0,0.0


In [76]:
# df_combined['E-CH4-total-tonne'] = (df_combined['CH4-Kg/year-milk'] + df_combined['CH4-Kg/year-non-milk'] + df_combined['CH4-Kg/year-calves']) * 0.001
# df_combined.head(5)

# N20 emissions from Urea

EF
* Non-Irrigated Crop	0.0005
* Irrigated Crop	0.0085
* Sugar cane	0.0199
* Cotton	0.0055
* Horticulture	0.0085

In [77]:
## Urea application * 0.46 # gives the Actual N content in the fertilizer  * Area sown * Cg 1.57 *EF
df_combined['E-N2O-cotton'] = df_combined['Urea_tonne_cotton'] * 0.46 * df_combined['cotton_area'] * 1.57 * 0.0055 * 265 * 0.001
df_combined['E-N2O-corn'] = df_combined['Urea_tonne_corn'] * 0.46 * df_combined['Corn_area'] * 1.57 * 0.0085 * 265 * 0.001
df_combined['E-N2O-sorghum'] = df_combined['Urea_tonne_sorghum'] * 0.46 * df_combined['SORGHUM_ACRES'] * 1.57 * 0.0085 * 265 * 0.001
df_combined.head(3)
# gives in tCO2e

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2,Urea_tonne_cotton,E-cotton-urea-CO2,Urea_tonne_corn,E-corn-urea-CO2,Urea_tonne_sorghum,E-sorghum-urea-CO2,E-N2O-cotton,E-N2O-corn,E-N2O-sorghum
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,0.0,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,0.0,668.464577,46.55649,0.0,1011.90587,742.738909,43.885026,32.211609,0.0,0.0,9864.249465,46.046684,0.0
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,0.0,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,0.0,1020.128481,1001.578066,0.0,1544.246373,1133.476838,944.106389,692.974089,0.0,0.0,22972.962119,21311.180696,0.0
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,0.0,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,0.0,995.731437,179.513163,0.0,1507.314704,1106.368993,169.212496,124.201972,0.0,0.0,21887.274676,684.589919,0.0


In [78]:
## Managment Practices
'''1. Conventional Tillage
2. Reduced Tillage
3. NoTill
'''

'1. Conventional Tillage\n2. Reduced Tillage\n3. NoTill\n'

In [79]:
# Conversion factors
g_to_kg = 0.001  # Conversion factor for grams to kilograms
ha_to_acre = 2.47105  # Conversion factor for hectares to acres
factor = (g_to_kg/ha_to_acre)*365*0.001  #tonne/acre-year

# since the data is available in g/ha - day

In [80]:
df_combined['Tillage'] = 'CT'

In [81]:
df_combined.head(3)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2,Urea_tonne_cotton,E-cotton-urea-CO2,Urea_tonne_corn,E-corn-urea-CO2,Urea_tonne_sorghum,E-sorghum-urea-CO2,E-N2O-cotton,E-N2O-corn,E-N2O-sorghum,Tillage
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,0.0,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,0.0,668.464577,46.55649,0.0,1011.90587,742.738909,43.885026,32.211609,0.0,0.0,9864.249465,46.046684,0.0,CT
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,0.0,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,0.0,1020.128481,1001.578066,0.0,1544.246373,1133.476838,944.106389,692.974089,0.0,0.0,22972.962119,21311.180696,0.0,CT
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,0.0,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,0.0,995.731437,179.513163,0.0,1507.314704,1106.368993,169.212496,124.201972,0.0,0.0,21887.274676,684.589919,0.0,CT


# assumption: if you are following CT then you are following for all the areas. No discretion to user

In [82]:
df_combined['E-N20-T'] = 9.1 * factor * (df_combined['cotton_area'] + df_combined['Corn_area'] + df_combined['SORGHUM_ACRES'])
df_combined['E-CO2-T'] = 102 * factor * (df_combined['cotton_area'] + df_combined['Corn_area'] + df_combined['SORGHUM_ACRES'])

In [83]:
df_combined.head(5)

Unnamed: 0,Geo Level,State,State ANSI,Ag District,Ag District Code,County,County ANSI,cotton_area,Corn_area,SORGHUM_ACRES,precipitation_cm,Latitude,Longitude,Application Factor,Lime-cotton-tonnes,Lime-corn-tonnes,Lime-sorghum-tonnes,E-cotton-CO2,E-corn-CO2,E-sorghum-CO2,Urea_tonne_cotton,E-cotton-urea-CO2,Urea_tonne_corn,E-corn-urea-CO2,Urea_tonne_sorghum,E-sorghum-urea-CO2,E-N2O-cotton,E-N2O-corn,E-N2O-sorghum,Tillage,E-N20-T,E-CO2-T
0,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,1,9261.0,645.0,0.0,148.0,32.516526,-86.63194,0.45,1686.508671,117.460111,0.0,668.464577,46.55649,0.0,1011.90587,742.738909,43.885026,32.211609,0.0,0.0,9864.249465,46.046684,0.0,CT,13.315303,149.248449
1,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,47,14133.0,13876.0,0.0,148.0,32.311797,-87.104664,0.45,2573.742257,2526.940321,0.0,1020.128481,1001.578066,0.0,1544.246373,1133.476838,944.106389,692.974089,0.0,0.0,22972.962119,21311.180696,0.0,CT,37.64873,421.99675
2,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,51,13795.0,2487.0,0.0,148.0,32.580123,-86.125195,0.45,2512.189517,452.904337,0.0,995.731437,179.513163,0.0,1507.314704,1106.368993,169.212496,124.201972,0.0,0.0,21887.274676,684.589919,0.0,CT,21.885702,245.31226
3,COUNTY,ALABAMA,1,BLACK BELT,40,LOWNDES,85,4046.0,1630.0,0.0,148.0,32.108807,-86.640254,0.45,736.8118,296.837181,0.0,292.042725,117.654385,0.0,442.087372,324.492131,110.903244,81.402981,0.0,0.0,1882.78199,294.072313,0.0,CT,7.629483,85.517282
4,COUNTY,ALABAMA,1,BLACK BELT,40,MACON,87,12376.0,0.0,0.0,148.0,32.366606,-85.666031,0.45,2253.777271,0.0,0.0,893.307159,0.0,0.0,1352.267255,992.564165,0.0,0.0,0.0,0.0,17616.064019,0.0,0.0,CT,16.635391,186.462629


In [84]:
# !pip install pandas-profiling

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pandas-profiling
  Downloading pandas_profiling-3.6.6-py2.py3-none-any.whl (324 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m324.4/324.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ydata-profiling (from pandas-profiling)
  Downloading ydata_profiling-4.2.0-py2.py3-none-any.whl (352 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m352.3/352.3 kB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
Collecting visions[type_image_path]==0.7.5 (from ydata-profiling->pandas-profiling)
  Downloading visions-0.7.5-py3-none-any.whl (102 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.7/102.7 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
Collecting htmlmin==0.1.12 (from ydata-profiling->pandas-profiling)
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting phik<0

In [85]:
# from pandas_profiling import ProfileReport
# profile = ProfileReport(df_combined)
# profile.to_file("profile_report.html")

In [86]:
df_combined.to_csv('data_v0-0-1.csv')