In [1]:
import pandas as pd
import numpy as np

In [2]:
def load_hainan_data():
    df = pd.read_csv("Data/HainanClean_FINAL.csv")

    df = df.drop("Month #", axis=1)
    df = df.drop("Day #", axis=1)
    df = df.drop("BioCNG Sold (m3)", axis=1)
    df = df.drop("Vehicle use (m3)", axis=1)
    df.head()

    # biogas production is in units of m3
    df = df.rename(index=str, columns={"Raw Biogas Produced (m3)": "RawBiogas", "BioCNG Produced (m3)": "BioCNG"})

    # fertilizer production is in units of t
    df = df.rename(index=str, columns={"Liquid Fertilizer Produced (t)": "LiquidFertilizer", "Solid fertilizer produced (t)": "SolidFertilizer"})

    # solid residues is in units of kg
    df = df.rename(index=str, columns={"Solid residues (kg)": "SolidResidues"})

    # all inputs are in units of t
    df = df.rename(index=str, columns={"Pig Manure (t)": "PigManure", "Cassava (t)": "Cassava", "Fish waste water (t)": "FishWastewater", 
                                       "Kitchen food waste (t)": "KitchenFoodWaste", "Municipal fecal residue (t)": "MunicipalFecalWaste", 
                                       "Tea waste (t)": "TeaWaste", "Chicken litter (t)": "ChickenWaste", "Bagasse feed (t)": "Bagasse",
                                      "Alcohol waste (t)": "AlcoholWaste", "Chinese medicine waste (t)": "ChineseMedicineWaste", 
                                       "Energy grass (t)": "EnergyGrass", "Banana fruit shafts (t)": "BananaFruitShafts", "Lemon waste (t)": "LemonWaste",
                                      "Percolate (t)": "Percolate", "Other waste (t)": "Other"})

    # all aux chemical inputs are in units of kg
    df = df.rename(index=str, columns={"50% NaOH/kg": "NaOH", "FeCl2/kg": "FeCl2", "PAM/kg": "PAM", "Defoamer/kg": "Defoamer"})

    # electricity outputs are in units of kWh
    df = df.rename(index=str, columns={"Project electricity use/kWh": "ProjectElectricity", "Office space electricity use/kWh": "OfficeElectricity"})

    # water is in units of m3 and diesel is in units of L
    # Ashis added wastewater column name change - he doesn't know what the units were (not in heading)
    df = df.rename(index=str, columns={"Water/m3": "Water", "Diesel/L": "Diesel", "Wastewater flow to WWTP (unit?)": "WasteWater"})
    
    # convert all strings to numbers
    df.TeaWaste = pd.to_numeric(df.TeaWaste, errors="coerce")
    df.FeCl2 = pd.to_numeric(df.FeCl2, errors="coerce")
    df.Defoamer = pd.to_numeric(df.Defoamer, errors="coerce")
    df.ProjectElectricity = pd.to_numeric(df.ProjectElectricity, errors="coerce")
    df.Water = pd.to_numeric(df.Water, errors="coerce")
    
    # fill nan values
    df = df.fillna(0)
    
    df.head()
    return df