In [28]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [11]:
GLEAM = pd.read_csv("Data/GLEAMorig1.csv")

### Checking out our dataset before wrangling the data

In [12]:
GLEAM.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 583 entries, 0 to 582
Data columns (total 22 columns):
 #   Column                                           Non-Null Count  Dtype  
---  ------                                           --------------  -----  
 0   Region                                           583 non-null    object 
 1   Animal_species                                   583 non-null    object 
 2   Production_system                                583 non-null    object 
 3   Commodity                                        583 non-null    object 
 4   EmissionIntensity                                583 non-null    float64
 5   Production_kgProtein                             543 non-null    float64
 6   TotalGHG_CO2                                     543 non-null    float64
 7   TotalCO2                                         543 non-null    float64
 8   Total CH4 emissions (kg CO2e)                    543 non-null    float64
 9   Total N2O emissions (kg CO2e)   

### We have some data that aren't integers. Transforming these columns will need to be done.  First to remove some data that is not needed in this analysis. using drop() on multiple columns. 

In [56]:
GLEAM1 = GLEAM.drop(['Commodity', 'LUC: soy & palm, CO2 (kg CO2e)', 'LUC: pasture expansion, CO2 (kg CO2e)'], axis=1)

In [48]:
GLEAM1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 583 entries, 0 to 582
Data columns (total 20 columns):
 #   Column                                           Non-Null Count  Dtype  
---  ------                                           --------------  -----  
 0   Animal_species                                   583 non-null    object 
 1   Production_system                                583 non-null    object 
 2   EmissionIntensity                                583 non-null    float64
 3   Production_kgProtein                             543 non-null    float64
 4   TotalGHG_CO2                                     543 non-null    float64
 5   TotalCO2                                         543 non-null    float64
 6   Total CH4 emissions (kg CO2e)                    543 non-null    float64
 7   Total N2O emissions (kg CO2e)                    543 non-null    float64
 8   FeedCO2                                          543 non-null    float64
 9   Feed, CH4 (kg CO2e)             

### Getting value counts for the next transformation from object to integer. 

In [57]:
GLEAM1.Animal_species.value_counts()

Cattle       121
Chicken      121
Buffaloes     99
Sheep         99
Goats         99
Pigs          44
Name: Animal_species, dtype: int64

In [42]:
GLEAM1.Production_system.value_counts()

Aggregated              176
Grassland systems       132
Mixed systems           132
Backyard systems         44
Layers                   33
Feedlots                 22
Broilers                 22
Intermediate systems     11
Industrial systems       11
Name: Production_system, dtype: int64

In [62]:
GLEAM1.Region.value_counts()

Global                             53
East Asia and Southeast Asia       53
Eastern Europe                     53
Latin America and the Caribbean    53
Near East and North Africa         53
North America                      53
Oceania                            53
Russian Federation                 53
South Asia                         53
Sub-Saharan Africa                 53
Western Europe                     53
Name: Region, dtype: int64

### Transforming the data from our object columns after finding the values. 

In [58]:
def Production_system (series):
    if series =="Aggregated":
        return 0
    if series == "Grassland systems":
        return 1
    if series == "Mixed systems":
        return 3
    if series == "Backyard systems":
        return 4
    if series == "Layers":
        return 5
    if series == "Feedlots":
        return 6
    if series == "Broilers":
        return 7
    if series == "Intermediate systems":
        return 8
    if series == "Industrial systems":
        return 9
GLEAM1['ProdSystemR'] = GLEAM1['Production_system'].apply(Production_system)

In [59]:
def Animal_species (series):
    if series == "Cattle":
        return 0
    if series == "Chicken":
        return 1
    if series == "Buffaloes":
        return 2
    if series == "Sheep":
        return 3
    if series == "Goats":
        return 4
    if series == "Pigs":
        return 5
GLEAM1['Species'] = GLEAM1['Animal_species'].apply(Animal_species)

In [63]:
def Region (series):
    if series == "Global":
        return 0
    if series == "East Asia and Southeast Asia":
        return 1
    if series == "Eastern Europe":
        return 2
    if series == "Latin America and the Caribbean":
        return 3
    if series == "Near East and North Africa":
        return 4
    if series == "North America":
        return 5
    if series == "Oceania":
        return 6
    if series == "Russian Federation":
        return 7 
    if series == "South Asia":
        return 8
    if series == "Sub-Saharan Africa":
        return 9
    if series == "Western Europe":
        return 10
GLEAM1['RegionR'] = GLEAM1['Region'].apply(Region)

### Checking out the transormation of our columns. 

In [64]:
GLEAM1.head()

Unnamed: 0,Region,Animal_species,Production_system,EmissionIntensity,Production_kgProtein,TotalGHG_CO2,TotalCO2,Total CH4 emissions (kg CO2e),Total N2O emissions (kg CO2e),FeedCO2,...,"Feed: applied & deposited manure, N2O (kg CO2e)","Enteric fermentation, CH4 (kg CO2e)","Manure management, CH4 (kg CO2e)","Manure management, N2O (kg CO2e)",Direct energyCO2,"Indirect energy, CO2 (kg CO2e)",PostfarmCO2,ProdSystemR,Species,RegionR
0,Global,Cattle,Aggregated,160.3,29163100000.0,4670000000000.0,945000000000.0,2650000000000.0,1080000000000.0,353000000000.0,...,742000000000.0,2510000000000.0,139000000000.0,165000000000.0,42155420000.0,15856530000.0,99559320000.0,0,0,0
1,Global,Cattle,Aggregated,86.7,18880890000.0,1640000000000.0,276000000000.0,968000000000.0,394000000000.0,138000000000.0,...,254000000000.0,886000000000.0,82009060000.0,68132780000.0,29934850000.0,3252889000.0,89295440000.0,0,0,0
2,Global,Cattle,Aggregated,295.4,10282220000.0,3040000000000.0,669000000000.0,1680000000000.0,687000000000.0,215000000000.0,...,488000000000.0,1620000000000.0,57443100000.0,96780190000.0,12220570000.0,12603640000.0,10263880000.0,0,0,0
3,Global,Cattle,Grassland systems,206.3,10338170000.0,2130000000000.0,563000000000.0,1050000000000.0,517000000000.0,111000000000.0,...,427000000000.0,997000000000.0,56055520000.0,37985080000.0,14173260000.0,4303272000.0,37730920000.0,1,0,0
4,Global,Cattle,Grassland systems,95.0,6940654000.0,659000000000.0,92861400000.0,371000000000.0,195000000000.0,44040030000.0,...,159000000000.0,337000000000.0,34444280000.0,14731880000.0,10786050000.0,999843500.0,34623380000.0,1,0,0


In [65]:
GLEAM1.corr(method='pearson')

Unnamed: 0,EmissionIntensity,Production_kgProtein,TotalGHG_CO2,TotalCO2,Total CH4 emissions (kg CO2e),Total N2O emissions (kg CO2e),FeedCO2,"Feed, CH4 (kg CO2e)","Feed: fertilizer & crop residues, N2O (kg CO2e)","Feed: applied & deposited manure, N2O (kg CO2e)","Enteric fermentation, CH4 (kg CO2e)","Manure management, CH4 (kg CO2e)","Manure management, N2O (kg CO2e)",Direct energyCO2,"Indirect energy, CO2 (kg CO2e)",PostfarmCO2,ProdSystemR,Species,RegionR
EmissionIntensity,1.0,-0.084033,0.173321,0.094549,0.20093,0.162009,0.004201,0.289618,0.019845,0.206389,0.167511,-0.032064,0.098768,-0.16205,0.000746,-0.123772,-0.317356,0.01554897,-0.1467117
Production_kgProtein,-0.084033,1.0,0.778108,0.849576,0.677091,0.797776,0.945389,0.491606,0.933474,0.680043,0.839568,0.724107,0.823926,0.931646,0.808579,0.916217,-0.02033321,-0.2190787,-0.2845201
TotalGHG_CO2,0.173321,0.778108,1.0,0.889467,0.977823,0.990693,0.814539,0.67462,0.833524,0.966014,0.984958,0.669039,0.929895,0.589799,0.848962,0.720039,-0.1066867,-0.2494815,-0.2419795
TotalCO2,0.094549,0.849576,0.889467,1.0,0.777306,0.870825,0.876347,0.499462,0.856453,0.819352,0.873359,0.637681,0.782907,0.751739,0.809136,0.713655,-0.04667099,-0.2165882,-0.2797953
Total CH4 emissions (kg CO2e),0.20093,0.677091,0.977823,0.777306,1.0,0.966025,0.717873,0.93393,0.753247,0.955775,0.997186,0.629329,0.925213,0.459156,0.798497,0.660929,-0.1232411,-0.2425114,-0.2015294
Total N2O emissions (kg CO2e),0.162009,0.797776,0.990693,0.870825,0.966025,1.0,0.827544,0.638671,0.84694,0.975001,0.977797,0.675677,0.93197,0.614635,0.85674,0.735401,-0.1138351,-0.2565044,-0.2522996
FeedCO2,0.004201,0.945389,0.814539,0.876347,0.717873,0.827544,1.0,0.625032,0.990703,0.695689,0.853993,0.732654,0.866729,0.851892,0.809461,0.8248,-0.01892684,-0.1820104,-0.3174703
"Feed, CH4 (kg CO2e)",0.289618,0.491606,0.67462,0.499462,0.93393,0.638671,0.625032,1.0,0.656701,0.403717,0.90003,0.907921,0.771021,0.210091,0.306427,0.467863,0.01373338,0.3004104,-0.3994697
"Feed: fertilizer & crop residues, N2O (kg CO2e)",0.019845,0.933474,0.833524,0.856453,0.753247,0.84694,0.990703,0.656701,1.0,0.713768,0.860619,0.741359,0.8977,0.818564,0.800817,0.847474,-0.0382848,-0.1899112,-0.3123379
"Feed: applied & deposited manure, N2O (kg CO2e)",0.206389,0.680043,0.966014,0.819352,0.955775,0.975001,0.695689,0.403717,0.713768,1.0,0.95917,0.579266,0.844403,0.495467,0.804424,0.625501,-0.1354734,-0.260867,-0.2133455


In [29]:
GLEAM1.dropna(inplace=True)