# Biogas Production Analysis


In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
from termcolor import colored



In [7]:
df = pd.read_csv('veri_seti_son_2.csv')
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [8]:
df.head(3)

Unnamed: 0,Project Name,Project Type,City,County,State,Digester Type,Status,Year Operational,Animal/Farm Type(s),Cattle,Dairy,Poultry,Swine,Co-Digestion,Biogas Generation Estimate (cu-ft/day),Electricity Generated (kWh/yr),Biogas End Use(s),LCFS Pathway?,Receiving Utility,Total Emission Reductions (MTCO2e/yr),Awarded USDA Funding?,Operational Years,Total_Animals,Biogas_per_Animal (cu-ft/day),Emission_Reduction_per_Year,Electricity_to_Biogas_Ratio,Total_Waste_kg/day,Waste_Efficiency,Electricity_Efficiency
0,Cargill - Sandy River Farm Digester,Farm Scale,Morrilton,Conway,Arkansas,Covered Lagoon,Operational,2008.0,Swine,0.0,0.0,0.0,4200.0,No-Info,1814400.0,0.0,Flared Full-time,No-Info,No-Info,4482.755,No-Info,15.0,4200.0,432.0,298.85,0.0,21000.0,86.4,0.0
1,Butterfield RNG Digester,Farm Scale,Buckeye,Maricopa,Arizona,Mixed Plug Flow,Operational,2022.0,Dairy,0.0,11760.0,0.0,0.0,No-Info,109486.0,389698.2,Pipeline Gas,No-Info,Yes,14030.129,Yes,1.0,11760.0,9.31,14030.129,3.559,399840.0,0.274,0.975
2,Caballero Dairy Farms Digester,Farm Scale,Eloy,Pinal,Arizona,Unknown or Unspecified,Construction,2022.0,Dairy,0.0,8800.0,0.0,0.0,No-Info,94500.0,2813080.4,Pipeline Gas,No-Info,No-Info,98999.89,No-Info,1.0,8800.0,10.739,98999.89,29.768,299200.0,0.316,9.402


In [12]:
def print_section_title(title):
    print(colored(title, 'blue', attrs=['bold', 'underline']))

def print_heads_and_tails(dataframe, head = 5):
    display(dataframe.head(head).style.set_caption('Heads'))
    display(dataframe.tail(head).style.set_caption('Tails'))

def display_na(dataframe):
    na_df = df.isnull().sum().reset_index()
    na_df.columns = ['Column', 'Number of NA Values']
    display(na_df.style.set_caption('Number of Na Values'))


def display_quantiles(dataframe):
    quantiles_df = dataframe.describe([0, 0.05, 0.50, 0.95, 0.99, 1])
    display(quantiles_df.style.format("{:.2f}").set_caption('Quantiles'))

def check_df(dataframe, head = 5):
    print_section_title('Shape')
    print(dataframe.shape)
    print_section_title('Types')
    print(dataframe.dtypes.to_frame('Data Type').style.set_caption("Data Types"))
    print_section_title('Info')
    print(dataframe.info())
    print_section_title('Head and Tail')
    print_heads_and_tails(dataframe, head)
    print_section_title('Na Values')
    display_na(dataframe)
    print_section_title('Quantiles')
    display_quantiles(dataframe)



In [13]:
check_df(df)

[4m[1m[34mShape[0m
(491, 29)
[4m[1m[34mTypes[0m
<pandas.io.formats.style.Styler object at 0x0000021D37A106E0>
[4m[1m[34mInfo[0m
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 491 entries, 0 to 490
Data columns (total 29 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Project Name                            491 non-null    object 
 1   Project Type                            491 non-null    object 
 2   City                                    491 non-null    object 
 3   County                                  491 non-null    object 
 4   State                                   491 non-null    object 
 5   Digester Type                           491 non-null    object 
 6   Status                                  491 non-null    object 
 7   Year Operational                        491 non-null    float64
 8   Animal/Farm Type(s)                     491 non-null    obje

Unnamed: 0,Project Name,Project Type,City,County,State,Digester Type,Status,Year Operational,Animal/Farm Type(s),Cattle,Dairy,Poultry,Swine,Co-Digestion,Biogas Generation Estimate (cu-ft/day),Electricity Generated (kWh/yr),Biogas End Use(s),LCFS Pathway?,Receiving Utility,Total Emission Reductions (MTCO2e/yr),Awarded USDA Funding?,Operational Years,Total_Animals,Biogas_per_Animal (cu-ft/day),Emission_Reduction_per_Year,Electricity_to_Biogas_Ratio,Total_Waste_kg/day,Waste_Efficiency,Electricity_Efficiency
0,Cargill - Sandy River Farm Digester,Farm Scale,Morrilton,Conway,Arkansas,Covered Lagoon,Operational,2008.0,Swine,0.0,0.0,0.0,4200.0,No-Info,1814400.0,0.0,Flared Full-time,No-Info,No-Info,4482.755303,No-Info,15.0,4200.0,432.0,298.850354,0.0,21000.0,86.4,0.0
1,Butterfield RNG Digester,Farm Scale,Buckeye,Maricopa,Arizona,Mixed Plug Flow,Operational,2022.0,Dairy,0.0,11760.0,0.0,0.0,No-Info,109486.0,389698.2,Pipeline Gas,No-Info,Yes,14030.128975,Yes,1.0,11760.0,9.310034,14030.128975,3.559343,399840.0,0.273825,0.974635
2,Caballero Dairy Farms Digester,Farm Scale,Eloy,Pinal,Arizona,Unknown or Unspecified,Construction,2022.0,Dairy,0.0,8800.0,0.0,0.0,No-Info,94500.0,2813080.4,Pipeline Gas,No-Info,No-Info,98999.890159,No-Info,1.0,8800.0,10.738636,98999.890159,29.768047,299200.0,0.315842,9.402007
3,Green Gas Partners Stanfield Digester,Centralized/Regional,Maricopa,Pinal,Arizona,Complete Mix,Operational,2018.0,Dairy,0.0,30000.0,0.0,0.0,No-Info,3454500.0,2760336.0,Pipeline Gas,No-Info,Yes,337499.62554,No-Info,5.0,30000.0,115.15,67499.925108,0.799055,1020000.0,3.386765,2.706212
4,Paloma Dairy Digester,Farm Scale,Gila Bend,Maricopa,Arizona,Complete Mix,Operational,2021.0,Dairy,0.0,10000.0,0.0,0.0,No-Info,852545.2,0.0,CNG,No-Info,Yes,100569.493399,No-Info,2.0,10000.0,85.25452,50284.746699,0.0,340000.0,2.507486,0.0


Unnamed: 0,Project Name,Project Type,City,County,State,Digester Type,Status,Year Operational,Animal/Farm Type(s),Cattle,Dairy,Poultry,Swine,Co-Digestion,Biogas Generation Estimate (cu-ft/day),Electricity Generated (kWh/yr),Biogas End Use(s),LCFS Pathway?,Receiving Utility,Total Emission Reductions (MTCO2e/yr),Awarded USDA Funding?,Operational Years,Total_Animals,Biogas_per_Animal (cu-ft/day),Emission_Reduction_per_Year,Electricity_to_Biogas_Ratio,Total_Waste_kg/day,Waste_Efficiency,Electricity_Efficiency
486,Quantum Dairy Digester,Farm Scale,Weyauwega,Waupaca,Wisconsin,Mixed Plug Flow,Shut down,2005.0,Dairy,0.0,1200.0,0.0,0.0,No-Info,356020.0,3350700.0,Cogeneration,No-Info,Yes,45346.436601,Yes,18.0,1200.0,296.683333,2519.246478,9.41155,40800.0,8.72598,82.125
487,Stencil Farm Digester,Farm Scale,Denmark,Brown,Wisconsin,Horizontal Plug Flow,Shut down,2002.0,Dairy,0.0,1000.0,0.0,0.0,No-Info,310865.6,946080.0,Electricity,No-Info,Yes,50391.122119,No-Info,21.0,1000.0,310.8656,2399.577244,3.043373,34000.0,9.143106,27.825882
488,Tinedale Farms Digester,Farm Scale,Wrightstown,Jackson,Wisconsin,Fixed Film/Attached Media,Shut down,1999.0,Dairy,0.0,1800.0,0.0,0.0,No-Info,200000.0,5584500.0,Electricity; Boiler/Furnace fuel,No-Info,No-Info,46163.455081,No-Info,24.0,1800.0,111.111111,1923.477295,27.9225,61200.0,3.267974,91.25
489,USEMCO - Peters Farm Digester,Farm Scale,Chaseburg,Vernon,Wisconsin,Complete Mix,Shut down,2011.0,Dairy,0.0,200.0,0.0,0.0,Yes,23000.0,3206160.0,Electricity,No-Info,Yes,91464.824612,No-Info,12.0,200.0,115.0,7622.068718,139.398261,6800.0,3.382353,471.494118
490,Wyoming Premium Farms 2 Digester,Farm Scale,Wheatland,Platte,Wyoming,Complete Mix,Shut down,2004.0,Swine,0.0,0.0,0.0,18000.0,No-Info,445540.0,1340280.0,Electricity,No-Info,No-Info,30798.542361,No-Info,19.0,18000.0,24.752222,1620.975914,3.008215,90000.0,4.950444,14.892


[4m[1m[34mNa Values[0m


Unnamed: 0,Column,Number of NA Values
0,Project Name,0
1,Project Type,0
2,City,0
3,County,0
4,State,0
5,Digester Type,0
6,Status,0
7,Year Operational,0
8,Animal/Farm Type(s),0
9,Cattle,0


[4m[1m[34mQuantiles[0m


Unnamed: 0,Year Operational,Cattle,Dairy,Poultry,Swine,Biogas Generation Estimate (cu-ft/day),Electricity Generated (kWh/yr),Total Emission Reductions (MTCO2e/yr),Operational Years,Total_Animals,Biogas_per_Animal (cu-ft/day),Emission_Reduction_per_Year,Electricity_to_Biogas_Ratio,Total_Waste_kg/day,Waste_Efficiency,Electricity_Efficiency
count,491.0,491.0,491.0,491.0,491.0,491.0,491.0,491.0,491.0,491.0,491.0,491.0,491.0,491.0,491.0,491.0
mean,2012.57,183.52,3543.31,6087.58,2969.49,443918.46,2218724.16,38377.98,10.49,12783.9,527.76,9914.18,10.06,139538.21,51.69,40.77
std,8.77,2237.16,4984.93,61662.9,17713.66,414336.11,4487113.72,45532.72,8.69,63625.76,4241.48,18366.55,19.01,195557.63,819.43,93.33
min,1972.0,0.0,0.0,0.0,0.0,5000.0,0.0,4.22,0.0,10.0,0.39,0.0,0.0,50.0,0.04,0.0
0%,1972.0,0.0,0.0,0.0,0.0,5000.0,0.0,4.22,0.0,10.0,0.39,0.0,0.0,50.0,0.04,0.0
5%,1998.0,0.0,0.0,0.0,0.0,33692.5,0.0,2018.51,0.0,350.0,4.05,0.0,0.0,8500.0,0.53,0.0
50%,2012.0,0.0,2000.0,0.0,0.0,344690.0,756437.8,24094.43,11.0,3157.0,75.63,1695.22,4.0,76500.0,2.69,18.02
95%,2023.0,0.0,10375.0,0.0,9696.0,1017422.6,8216981.4,103921.22,25.0,34000.0,1445.03,57594.2,40.08,403920.0,57.57,141.57
99%,2025.0,2700.0,30250.0,132000.0,80050.0,1817830.0,16276400.0,255068.93,41.0,166000.0,5324.62,84177.47,97.79,1106700.0,199.09,382.19
100%,2025.0,40000.0,39000.0,1200000.0,239200.0,3454500.0,70364700.0,390000.01,51.0,1200000.0,90654.52,130000.0,174.27,2126000.0,18130.9,1150.82


In [14]:
# let's drop columns that we do not need. We focus only on Dairy cattle
columns_to_drop = ["Project Name", "City", "County", "State", 'Cattle', 'Poultry', 'Swine', 'Total_Animals']

df.drop(columns_to_drop, axis =1, inplace=True)


In [18]:
# drop rows where dairy is 0
df = df[df['Dairy'] != 0]

In [19]:
df.shape

(420, 21)

In [20]:
df.head()

Unnamed: 0,Project Type,Digester Type,Status,Year Operational,Animal/Farm Type(s),Dairy,Co-Digestion,Biogas Generation Estimate (cu-ft/day),Electricity Generated (kWh/yr),Biogas End Use(s),LCFS Pathway?,Receiving Utility,Total Emission Reductions (MTCO2e/yr),Awarded USDA Funding?,Operational Years,Biogas_per_Animal (cu-ft/day),Emission_Reduction_per_Year,Electricity_to_Biogas_Ratio,Total_Waste_kg/day,Waste_Efficiency,Electricity_Efficiency
1,Farm Scale,Mixed Plug Flow,Operational,2022.0,Dairy,11760.0,No-Info,109486.0,389698.2,Pipeline Gas,No-Info,Yes,14030.129,Yes,1.0,9.31,14030.129,3.559,399840.0,0.274,0.975
2,Farm Scale,Unknown or Unspecified,Construction,2022.0,Dairy,8800.0,No-Info,94500.0,2813080.4,Pipeline Gas,No-Info,No-Info,98999.89,No-Info,1.0,10.739,98999.89,29.768,299200.0,0.316,9.402
3,Centralized/Regional,Complete Mix,Operational,2018.0,Dairy,30000.0,No-Info,3454500.0,2760336.0,Pipeline Gas,No-Info,Yes,337499.626,No-Info,5.0,115.15,67499.925,0.799,1020000.0,3.387,2.706
4,Farm Scale,Complete Mix,Operational,2021.0,Dairy,10000.0,No-Info,852545.2,0.0,CNG,No-Info,Yes,100569.493,No-Info,2.0,85.255,50284.747,0.0,340000.0,2.507,0.0
5,Farm Scale,Covered Lagoon,Operational,2011.0,Dairy,15000.0,No-Info,663685.2,5256000.0,Electricity,Yes,Yes,154991.55,No-Info,12.0,44.246,12915.962,7.919,510000.0,1.301,10.306


In [21]:

def grab_col_names(dataframe, cat_th = 10, car_th =20):
    cat_cols = [col for col in dataframe.columns if dataframe[col].dtypes == "O"] # categorical columns
    num_but_cat = [col for col in dataframe.columns if dataframe[col].nunique() < cat_th and dataframe[col].dtypes != "O"] # numerical categorical
    cat_but_car = [col for col in dataframe.columns if dataframe[col].nunique() > car_th and dataframe[col].dtypes == "O"] # continuous but in categ
    cat_cols = cat_cols + num_but_cat
    cat_cols = [col for col in cat_cols if col not in cat_but_car]
    num_cols = [col for col in dataframe.columns if dataframe[col].dtypes != "O"]
    num_cols = [col for col in num_cols if col not in num_but_cat]
    print(f"Observations: {dataframe.shape[0]}")
    print(f"Variables: {dataframe.shape[1]}")
    print(f'cat_cols: {len(cat_cols)}')
    print(f'num_cols: {len(num_cols)}')
    print(f'cat_but_car: {len(cat_but_car)}')
    print(f'num_but_cat: {len(num_but_cat)}')


    return cat_cols, num_cols, cat_but_car, num_but_cat


In [22]:
cat_cols, num_cols, cat_but_car, num_but_cat = grab_col_names(df)

Observations: 420
Variables: 21
cat_cols: 9
num_cols: 12
cat_but_car: 0
num_but_cat: 0


In [23]:
# lets outline categorical columns
cat_cols

['Project Type',
 'Digester Type',
 'Status',
 'Animal/Farm Type(s)',
 'Co-Digestion',
 'Biogas End Use(s)',
 'LCFS Pathway?',
 'Receiving Utility',
 'Awarded USDA Funding?']

In [27]:
num_cols

['Year Operational',
 'Dairy',
 'Biogas Generation Estimate (cu-ft/day)',
 'Electricity Generated (kWh/yr)',
 'Total Emission Reductions (MTCO2e/yr)',
 'Operational Years',
 'Biogas_per_Animal (cu-ft/day)',
 'Emission_Reduction_per_Year',
 'Electricity_to_Biogas_Ratio',
 'Total_Waste_kg/day',
 'Waste_Efficiency',
 'Electricity_Efficiency']

271