In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [11]:
GLEAM = pd.read_csv("Data/GLEAMorig1.csv")

### Checking out our dataset before wrangling the data

In [12]:
GLEAM.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 583 entries, 0 to 582
Data columns (total 22 columns):
 #   Column                                           Non-Null Count  Dtype  
---  ------                                           --------------  -----  
 0   Region                                           583 non-null    object 
 1   Animal_species                                   583 non-null    object 
 2   Production_system                                583 non-null    object 
 3   Commodity                                        583 non-null    object 
 4   EmissionIntensity                                583 non-null    float64
 5   Production_kgProtein                             543 non-null    float64
 6   TotalGHG_CO2                                     543 non-null    float64
 7   TotalCO2                                         543 non-null    float64
 8   Total CH4 emissions (kg CO2e)                    543 non-null    float64
 9   Total N2O emissions (kg CO2e)   

### We have some data that aren't integers. Transforming these columns will need to be done.  First to remove some data that is not needed in this analysis. using drop() on multiple columns. 

In [13]:
GLEAM1 = GLEAM.drop(['Commodity', 'Region', 'LUC: soy & palm, CO2 (kg CO2e)', 'LUC: pasture expansion, CO2 (kg CO2e)'], axis=1)

In [48]:
GLEAM1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 583 entries, 0 to 582
Data columns (total 20 columns):
 #   Column                                           Non-Null Count  Dtype  
---  ------                                           --------------  -----  
 0   Animal_species                                   583 non-null    object 
 1   Production_system                                583 non-null    object 
 2   EmissionIntensity                                583 non-null    float64
 3   Production_kgProtein                             543 non-null    float64
 4   TotalGHG_CO2                                     543 non-null    float64
 5   TotalCO2                                         543 non-null    float64
 6   Total CH4 emissions (kg CO2e)                    543 non-null    float64
 7   Total N2O emissions (kg CO2e)                    543 non-null    float64
 8   FeedCO2                                          543 non-null    float64
 9   Feed, CH4 (kg CO2e)             

### Getting value counts for the next transformation from object to integer. 

In [41]:
GLEAM1.Animal_species.value_counts()

Cattle       121
Chicken      121
Buffaloes     99
Sheep         99
Goats         99
Pigs          44
Name: Animal_species, dtype: int64

In [42]:
GLEAM1.Production_system.value_counts()

Aggregated              176
Grassland systems       132
Mixed systems           132
Backyard systems         44
Layers                   33
Feedlots                 22
Broilers                 22
Intermediate systems     11
Industrial systems       11
Name: Production_system, dtype: int64

In [43]:
def Production_system (series):
    if series =="Aggregated":
        return 0
    if series == "Grassland systems":
        return 1
    if series == "Mixed systems":
        return 3
    if series == "Backyard systems":
        return 4
    if series == "Layers":
        return 5
    if series == "Feedlots":
        return 6
    if series == "Broilers":
        return 7
    if series == "Intermediate systems":
        return 8
    if series == "Industrial systems":
        return 9
GLEAM1['ProdSystemR'] = GLEAM1['Production_system'].apply(Production_system)

In [46]:
def Animal_species (series):
    if series == "Cattle":
        return 0
    if series == "Chicken":
        return 1
    if series == "Buffaloes":
        return 2
    if series == "Sheep":
        return 3
    if series == "Goats":
        return 4
    if series == "Pigs":
        return 5
GLEAM1['Species'] = GLEAM1['Animal_species'].apply(Animal_species)

### Checking out the transormation of our columns. 

In [47]:
GLEAM1.head()

Unnamed: 0,Animal_species,Production_system,EmissionIntensity,Production_kgProtein,TotalGHG_CO2,TotalCO2,Total CH4 emissions (kg CO2e),Total N2O emissions (kg CO2e),FeedCO2,"Feed, CH4 (kg CO2e)","Feed: fertilizer & crop residues, N2O (kg CO2e)","Feed: applied & deposited manure, N2O (kg CO2e)","Enteric fermentation, CH4 (kg CO2e)","Manure management, CH4 (kg CO2e)","Manure management, N2O (kg CO2e)",Direct energyCO2,"Indirect energy, CO2 (kg CO2e)",PostfarmCO2,ProdSystemR,Species
0,Cattle,Aggregated,160.3,29163100000.0,4670000000000.0,945000000000.0,2650000000000.0,1080000000000.0,353000000000.0,,174000000000.0,742000000000.0,2510000000000.0,139000000000.0,165000000000.0,42155420000.0,15856530000.0,99559320000.0,0,0
1,Cattle,Aggregated,86.7,18880890000.0,1640000000000.0,276000000000.0,968000000000.0,394000000000.0,138000000000.0,,71662080000.0,254000000000.0,886000000000.0,82009060000.0,68132780000.0,29934850000.0,3252889000.0,89295440000.0,0,0
2,Cattle,Aggregated,295.4,10282220000.0,3040000000000.0,669000000000.0,1680000000000.0,687000000000.0,215000000000.0,,102000000000.0,488000000000.0,1620000000000.0,57443100000.0,96780190000.0,12220570000.0,12603640000.0,10263880000.0,0,0
3,Cattle,Grassland systems,206.3,10338170000.0,2130000000000.0,563000000000.0,1050000000000.0,517000000000.0,111000000000.0,,52536600000.0,427000000000.0,997000000000.0,56055520000.0,37985080000.0,14173260000.0,4303272000.0,37730920000.0,1,0
4,Cattle,Grassland systems,95.0,6940654000.0,659000000000.0,92861400000.0,371000000000.0,195000000000.0,44040030000.0,,21561260000.0,159000000000.0,337000000000.0,34444280000.0,14731880000.0,10786050000.0,999843500.0,34623380000.0,1,0
