# Creating Correlations Data to map the over time relationship of energy consumption and GDP

In [1]:
#package import of DA standard packages
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#reading in the data from the updated consumptions table
consume_df = pd.read_csv('/Users/sebastianlorenzen-schmidt/neuefische/capstone_project/data/energy-consumption-by-source-and-region-solar-zero.csv')

In [3]:
consume_df

Unnamed: 0.1,Unnamed: 0,iso_code,country,year,nuclear_consumption,coal_consumption,hydro_consumption,oil_consumption,gas_consumption,wind_consumption,solar_consumption,other_renewable_consumption,biofuel_consumption,low_carbon_consumption,renewables_consumption
0,0,AGO,Angola,1990,0.0,0.0,2.051429,15.325936,5.834961,0.0,0.0,0.0,0.0,2.051429,2.051429
1,1,AGO,Angola,1991,0.0,0.0,2.042857,16.144753,6.142351,0.0,0.0,0.0,0.0,2.042857,2.042857
2,2,AGO,Angola,1992,0.0,0.0,2.377143,16.129702,6.182138,0.0,0.0,0.0,0.0,2.377143,2.377143
3,3,AGO,Angola,1993,0.0,0.0,2.517143,15.644819,6.073679,0.0,0.0,0.0,0.0,2.517143,2.517143
4,4,AGO,Angola,1994,0.0,0.0,2.531429,15.469970,5.638678,0.0,0.0,0.0,0.0,2.531429,2.531429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7170,7170,,Yugoslavia,2012,,,,,,,0.0,,,,
7171,7171,,Yugoslavia,2013,,,,,,,0.0,,,,
7172,7172,,Yugoslavia,2014,,,,,,,0.0,,,,
7173,7173,,Yugoslavia,2015,,,,,,,0.0,,,,


In [4]:
consume_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7175 entries, 0 to 7174
Data columns (total 15 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Unnamed: 0                   7175 non-null   int64  
 1   iso_code                     6433 non-null   object 
 2   country                      7175 non-null   object 
 3   year                         7175 non-null   int64  
 4   nuclear_consumption          3787 non-null   float64
 5   coal_consumption             4267 non-null   float64
 6   hydro_consumption            3787 non-null   float64
 7   oil_consumption              4267 non-null   float64
 8   gas_consumption              4267 non-null   float64
 9   wind_consumption             3787 non-null   float64
 10  solar_consumption            7175 non-null   float64
 11  other_renewable_consumption  3787 non-null   float64
 12  biofuel_consumption          4308 non-null   float64
 13  low_carbon_consump

### Creating the missing columns for correlation calculation

In [5]:
consume_df['total_consumption'] =(consume_df['nuclear_consumption']
                                +consume_df['coal_consumption']
                                +consume_df['gas_consumption']
                                +consume_df['oil_consumption']
                                +consume_df['biofuel_consumption']
                                +consume_df['wind_consumption']
                                +consume_df['solar_consumption']
                                +consume_df['hydro_consumption']
                                +consume_df['other_renewable_consumption'])

consume_df['fossil_consumption'] = (consume_df['coal_consumption']
                                    +consume_df['oil_consumption']
                                    +consume_df['gas_consumption'])
                

In [6]:
#checking for missing values as a measure of precaution and to have an overview of the amount of missing data
#Missing data might come from countries, that don't exist in their form any more, that renamed themselves or simply have no data for specific years or categories. 
consume_df['total_consumption'].isna().value_counts()

False    3787
True     3388
Name: total_consumption, dtype: int64

In [7]:
consume_df['country'][consume_df['total_consumption'] == 0.0].nunique()

0

## Merging the GDP table to the Consumption table

In [8]:
gdp_frame = pd.read_csv('/Users/sebastianlorenzen-schmidt/neuefische/capstone_project/data/gdp-ppp-current-international-dollar-clean.csv')
gdp_frame.head()

Unnamed: 0,Country Name,Country Code,Year,GDP_PPP
0,Aruba,ABW,1990,1447709000.0
1,Africa Eastern and Southern,AFE,1990,565349500000.0
2,Afghanistan,AFG,1990,
3,Africa Western and Central,AFW,1990,354456400000.0
4,Angola,AGO,1990,38853490000.0


### standardizing the columns of the GDP table

In [9]:
cols = gdp_frame.columns.to_list()

cols = [col.replace(' ','_')for col in cols]
cols = [col.replace('Country_Code','iso_code')for col in cols]
cols = [col.lower() for col in cols]

gdp_frame.columns = cols

gdp_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8246 entries, 0 to 8245
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   country_name  8246 non-null   object 
 1   iso_code      8246 non-null   object 
 2   year          8246 non-null   int64  
 3   gdp_ppp       7241 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 257.8+ KB


### Merging the frames

In [10]:
consume_gdp_df = consume_df.merge(gdp_frame,how = 'inner', on =['year','iso_code'])
consume_gdp_df

Unnamed: 0.1,Unnamed: 0,iso_code,country,year,nuclear_consumption,coal_consumption,hydro_consumption,oil_consumption,gas_consumption,wind_consumption,solar_consumption,other_renewable_consumption,biofuel_consumption,low_carbon_consumption,renewables_consumption,total_consumption,fossil_consumption,country_name,gdp_ppp
0,0,AGO,Angola,1990,0.0,0.0,2.051429,15.325936,5.834961,0.0,0.0,0.0,0.0,2.051429,2.051429,23.212326,21.160898,Angola,3.885349e+10
1,1,AGO,Angola,1991,0.0,0.0,2.042857,16.144753,6.142351,0.0,0.0,0.0,0.0,2.042857,2.042857,24.329961,22.287104,Angola,4.056562e+10
2,2,AGO,Angola,1992,0.0,0.0,2.377143,16.129702,6.182138,0.0,0.0,0.0,0.0,2.377143,2.377143,24.688982,22.311839,Angola,3.906777e+10
3,3,AGO,Angola,1993,0.0,0.0,2.517143,15.644819,6.073679,0.0,0.0,0.0,0.0,2.517143,2.517143,24.235641,21.718499,Angola,3.040193e+10
4,4,AGO,Angola,1994,0.0,0.0,2.531429,15.469970,5.638678,0.0,0.0,0.0,0.0,2.531429,2.531429,23.640077,21.108648,Angola,3.146702e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6012,7143,YEM,Yemen,2015,,,,,,,0.0,,0.0,,,,,"Yemen, Rep.",
6013,7144,YEM,Yemen,2016,,,,,,,0.0,,0.0,,,,,"Yemen, Rep.",
6014,7145,YEM,Yemen,2017,,,,,,,0.0,,0.0,,,,,"Yemen, Rep.",
6015,7146,YEM,Yemen,2018,,,,,,,0.0,,0.0,,,,,"Yemen, Rep.",


## Adding ht eCorrelation Columns for the different correlations of GDP and Energy Sources

In [11]:
#using iso_code since it is on of the parameters we merged on, country is also an option for list iteration
codes_list = consume_df['iso_code'].unique().tolist()
print(codes_list)

#empty list for storing the individual data frames for seperation by country
li = []

#iterationg through the list of iso_codes to separate the countries and create individual correlation and not have the corr() method do an overall correlation 
for code in codes_list:
    df = consume_gdp_df[consume_gdp_df['iso_code']== code]
    df['corr_gdp_totalcons']        = df['gdp_ppp'].corr(consume_gdp_df['total_consumption'], method= 'pearson')
    df['corr_gdp_fossilcons']       = df['gdp_ppp'].corr(consume_gdp_df['fossil_consumption'], method= 'pearson')
    df['corr_gdp_renewcons']        = df['gdp_ppp'].corr(consume_gdp_df['renewables_consumption'], method= 'pearson')
    df['corr_gdp_gascons']          = df['gdp_ppp'].corr(consume_gdp_df['gas_consumption'], method= 'pearson')
    df['corr_gdp_coalcons']         = df['gdp_ppp'].corr(consume_gdp_df['coal_consumption'], method= 'pearson')
    df['corr_gdp_oilcons']          = df['gdp_ppp'].corr(consume_gdp_df['oil_consumption'], method= 'pearson')
    df['corr_gdp_hydrocons']        = df['gdp_ppp'].corr(consume_gdp_df['hydro_consumption'], method= 'pearson')
    df['corr_gdp_windcons']         = df['gdp_ppp'].corr(consume_gdp_df['wind_consumption'], method= 'pearson')
    df['corr_gdp_biofuelcons']      = df['gdp_ppp'].corr(consume_gdp_df['biofuel_consumption'], method= 'pearson')
    df['corr_gdp_othercons']        = df['gdp_ppp'].corr(consume_gdp_df['other_renewable_consumption'], method= 'pearson')
    df['corr_gdp_nuclearcons']      = df['gdp_ppp'].corr(consume_gdp_df['nuclear_consumption'], method= 'pearson')


    li.append(df)

#concatenation the dataframes in the list to a single dataframe
consume_gdp_df2 = pd.concat(li, axis=0, ignore_index=True)

consume_gdp_df2
#Yes the slicing and copying of the dataframe is intended! 
#But still thanks pandas for pointing it out and remind me, that this is not always a best practice or a good idea!

['AGO', 'BEN', 'BWA', 'BFA', 'BDI', 'CPV', 'CMR', 'CAF', 'TCD', 'COM', 'COG', 'COD', 'CIV', 'DJI', 'ERI', 'SWZ', 'ETH', 'GAB', 'GMB', 'GHA', 'GIN', 'GNB', 'KEN', 'LSO', 'LBR', 'LBY', 'MDG', 'MWI', 'MLI', 'MRT', 'MUS', 'MOZ', 'NAM', 'NER', 'NGA', 'RWA', 'STP', 'SEN', 'SYC', 'SLE', 'SOM', 'SSD', 'SDN', 'TZA', 'TGO', 'TUN', 'UGA', 'ZMB', 'ZWE', 'AFG', nan, 'ALB', 'DZA', 'ASM', 'ATG', 'ARG', 'ARM', 'ABW', 'AUS', 'AUT', 'AZE', 'BHS', 'BHR', 'BGD', 'BRB', 'BLR', 'BEL', 'BLZ', 'BMU', 'BTN', 'BOL', 'BIH', 'BRA', 'VGB', 'BRN', 'BGR', 'KHM', 'CAN', 'CYM', 'CHL', 'CHN', 'COL', 'COK', 'CRI', 'HRV', 'CUB', 'CYP', 'CZE', 'DNK', 'DMA', 'DOM', 'ECU', 'EGY', 'SLV', 'GNQ', 'EST', 'FRO', 'FJI', 'FIN', 'FRA', 'GUF', 'PYF', 'GEO', 'DEU', 'GIB', 'GRC', 'GRL', 'GRD', 'GLP', 'GUM', 'GTM', 'GUY', 'HTI', 'HND', 'HKG', 'HUN', 'ISL', 'IND', 'IDN', 'IRN', 'IRQ', 'IRL', 'ISR', 'ITA', 'JAM', 'JPN', 'JOR', 'KAZ', 'KIR', 'OWID_KOS', 'KWT', 'KGZ', 'LAO', 'LVA', 'LBN', 'LTU', 'LUX', 'MAC', 'MYS', 'MDV', 'MLT', 'MTQ', 'M

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['corr_gdp_totalcons']        = df['gdp_ppp'].corr(consume_gdp_df['total_consumption'], method= 'pearson')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['corr_gdp_fossilcons']       = df['gdp_ppp'].corr(consume_gdp_df['fossil_consumption'], method= 'pearson')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ve

Unnamed: 0.1,Unnamed: 0,iso_code,country,year,nuclear_consumption,coal_consumption,hydro_consumption,oil_consumption,gas_consumption,wind_consumption,...,corr_gdp_fossilcons,corr_gdp_renewcons,corr_gdp_gascons,corr_gdp_coalcons,corr_gdp_oilcons,corr_gdp_hydrocons,corr_gdp_windcons,corr_gdp_biofuelcons,corr_gdp_othercons,corr_gdp_nuclearcons
0,0,AGO,Angola,1990,0.0,0.0,2.051429,15.325936,5.834961,0.0,...,0.986419,0.980582,0.209357,,0.984368,0.98247,,0.484164,0.869216,
1,1,AGO,Angola,1991,0.0,0.0,2.042857,16.144753,6.142351,0.0,...,0.986419,0.980582,0.209357,,0.984368,0.98247,,0.484164,0.869216,
2,2,AGO,Angola,1992,0.0,0.0,2.377143,16.129702,6.182138,0.0,...,0.986419,0.980582,0.209357,,0.984368,0.98247,,0.484164,0.869216,
3,3,AGO,Angola,1993,0.0,0.0,2.517143,15.644819,6.073679,0.0,...,0.986419,0.980582,0.209357,,0.984368,0.98247,,0.484164,0.869216,
4,4,AGO,Angola,1994,0.0,0.0,2.531429,15.469970,5.638678,0.0,...,0.986419,0.980582,0.209357,,0.984368,0.98247,,0.484164,0.869216,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6012,7143,YEM,Yemen,2015,,,,,,,...,,,,,,,,,,
6013,7144,YEM,Yemen,2016,,,,,,,...,,,,,,,,,,
6014,7145,YEM,Yemen,2017,,,,,,,...,,,,,,,,,,
6015,7146,YEM,Yemen,2018,,,,,,,...,,,,,,,,,,


In [12]:
#checking on the top positive relationships 
consume_gdp_df2[['corr_gdp_totalcons','country','year']][consume_gdp_df2['corr_gdp_totalcons']> 0.994]

#it is very surprising to find countries that have such a strong relation between energy consumption and GDP generation

Unnamed: 0,corr_gdp_totalcons,country,year
3321,0.997815,India,1990
3322,0.997815,India,1991
3323,0.997815,India,1992
3324,0.997815,India,1993
3325,0.997815,India,1994
...,...,...,...
5982,0.998179,Vietnam,2016
5983,0.998179,Vietnam,2017
5984,0.998179,Vietnam,2018
5985,0.998179,Vietnam,2019


In [13]:
#checking on who has decoupled/a negative relationship between GDP and energy consumption
consume_gdp_df2[['corr_gdp_totalcons','country','year']][consume_gdp_df2['corr_gdp_totalcons']< -0.8]

Unnamed: 0,corr_gdp_totalcons,country,year
2928,-0.832578,Germany,1990
2929,-0.832578,Germany,1991
2930,-0.832578,Germany,1992
2931,-0.832578,Germany,1993
2932,-0.832578,Germany,1994
...,...,...,...
5146,-0.806036,Slovakia,2016
5147,-0.806036,Slovakia,2017
5148,-0.806036,Slovakia,2018
5149,-0.806036,Slovakia,2019


## Adding the population table to calculate per capita GDP and fill in the correlations for per capita GDP 

### Reading in and merging the population data to the correlations table

In [15]:
pop_df = pd.read_csv('/Users/sebastianlorenzen-schmidt/neuefische/capstone_project/data/population.csv')

pop_df

Unnamed: 0,iso_code,country,year,population
0,AFG,Afghanistan,1990,12412311.0
1,AFG,Afghanistan,1991,13299016.0
2,AFG,Afghanistan,1992,14485543.0
3,AFG,Afghanistan,1993,15816601.0
4,AFG,Afghanistan,1994,17075728.0
...,...,...,...,...
7239,ZWE,Zimbabwe,2015,13814642.0
7240,ZWE,Zimbabwe,2016,14030338.0
7241,ZWE,Zimbabwe,2017,14236599.0
7242,ZWE,Zimbabwe,2018,14438812.0


In [16]:
corr_pop_df = consume_gdp_df2.merge(pop_df, how = 'inner', on = ['iso_code','year'])

corr_pop_df

Unnamed: 0.1,Unnamed: 0,iso_code,country_x,year,nuclear_consumption,coal_consumption,hydro_consumption,oil_consumption,gas_consumption,wind_consumption,...,corr_gdp_gascons,corr_gdp_coalcons,corr_gdp_oilcons,corr_gdp_hydrocons,corr_gdp_windcons,corr_gdp_biofuelcons,corr_gdp_othercons,corr_gdp_nuclearcons,country_y,population
0,0,AGO,Angola,1990,0.0,0.0,2.051429,15.325936,5.834961,0.0,...,0.209357,,0.984368,0.98247,,0.484164,0.869216,,Angola,11848385.0
1,1,AGO,Angola,1991,0.0,0.0,2.042857,16.144753,6.142351,0.0,...,0.209357,,0.984368,0.98247,,0.484164,0.869216,,Angola,12248901.0
2,2,AGO,Angola,1992,0.0,0.0,2.377143,16.129702,6.182138,0.0,...,0.209357,,0.984368,0.98247,,0.484164,0.869216,,Angola,12657361.0
3,3,AGO,Angola,1993,0.0,0.0,2.517143,15.644819,6.073679,0.0,...,0.209357,,0.984368,0.98247,,0.484164,0.869216,,Angola,13075044.0
4,4,AGO,Angola,1994,0.0,0.0,2.531429,15.469970,5.638678,0.0,...,0.209357,,0.984368,0.98247,,0.484164,0.869216,,Angola,13503753.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6012,7143,YEM,Yemen,2015,,,,,,,...,,,,,,,,,Yemen,26497881.0
6013,7144,YEM,Yemen,2016,,,,,,,...,,,,,,,,,Yemen,27168210.0
6014,7145,YEM,Yemen,2017,,,,,,,...,,,,,,,,,Yemen,27834811.0
6015,7146,YEM,Yemen,2018,,,,,,,...,,,,,,,,,Yemen,28498683.0


In [17]:
#dropping the unnecessary additional country and the unnamed column

corr_pop_df.drop(['country_y','country_name', 'Unnamed: 0'], axis = 1, inplace = True)

corr_pop_df

Unnamed: 0,iso_code,country_x,year,nuclear_consumption,coal_consumption,hydro_consumption,oil_consumption,gas_consumption,wind_consumption,solar_consumption,...,corr_gdp_renewcons,corr_gdp_gascons,corr_gdp_coalcons,corr_gdp_oilcons,corr_gdp_hydrocons,corr_gdp_windcons,corr_gdp_biofuelcons,corr_gdp_othercons,corr_gdp_nuclearcons,population
0,AGO,Angola,1990,0.0,0.0,2.051429,15.325936,5.834961,0.0,0.0,...,0.980582,0.209357,,0.984368,0.98247,,0.484164,0.869216,,11848385.0
1,AGO,Angola,1991,0.0,0.0,2.042857,16.144753,6.142351,0.0,0.0,...,0.980582,0.209357,,0.984368,0.98247,,0.484164,0.869216,,12248901.0
2,AGO,Angola,1992,0.0,0.0,2.377143,16.129702,6.182138,0.0,0.0,...,0.980582,0.209357,,0.984368,0.98247,,0.484164,0.869216,,12657361.0
3,AGO,Angola,1993,0.0,0.0,2.517143,15.644819,6.073679,0.0,0.0,...,0.980582,0.209357,,0.984368,0.98247,,0.484164,0.869216,,13075044.0
4,AGO,Angola,1994,0.0,0.0,2.531429,15.469970,5.638678,0.0,0.0,...,0.980582,0.209357,,0.984368,0.98247,,0.484164,0.869216,,13503753.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6012,YEM,Yemen,2015,,,,,,,0.0,...,,,,,,,,,,26497881.0
6013,YEM,Yemen,2016,,,,,,,0.0,...,,,,,,,,,,27168210.0
6014,YEM,Yemen,2017,,,,,,,0.0,...,,,,,,,,,,27834811.0
6015,YEM,Yemen,2018,,,,,,,0.0,...,,,,,,,,,,28498683.0


In [18]:
corr_pop_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6017 entries, 0 to 6016
Data columns (total 29 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   iso_code                     6017 non-null   object 
 1   country_x                    6017 non-null   object 
 2   year                         6017 non-null   int64  
 3   nuclear_consumption          3637 non-null   float64
 4   coal_consumption             3637 non-null   float64
 5   hydro_consumption            3637 non-null   float64
 6   oil_consumption              3637 non-null   float64
 7   gas_consumption              3637 non-null   float64
 8   wind_consumption             3637 non-null   float64
 9   solar_consumption            6017 non-null   float64
 10  other_renewable_consumption  3637 non-null   float64
 11  biofuel_consumption          4158 non-null   float64
 12  low_carbon_consumption       3637 non-null   float64
 13  renewables_consump

In [19]:
#dropping further unneeded columns


### Calculating per capita GDP

In [20]:
corr_pop_df['gdp_capita'] = corr_pop_df['gdp_ppp'] / corr_pop_df['population']
corr_pop_df['gdp_capita'].isna().value_counts() #null values are to be expected due to possible unreported Data in the population table and Null values for gdp in the gdp table

False    5365
True      652
Name: gdp_capita, dtype: int64

### Calculating the Correlations between the per capita gdp and the energy consumption values

In [21]:

#empty list for storing the individual data frames for seperation by country
li = []

#iterationg through the list of iso_codes to separate the countries and create individual correlation and not have the corr() method do an overall correlation 
for code in codes_list:
    df = corr_pop_df[corr_pop_df['iso_code']== code]
    df['corr_gdp_capita_totalcons']     = df['gdp_capita'].corr(consume_gdp_df['total_consumption'], method= 'pearson')
    df['corr_gdp_capita_fossilcons']    = df['gdp_capita'].corr(consume_gdp_df['fossil_consumption'], method= 'pearson')
    df['corr_gdp_capita_renewcons']     = df['gdp_capita'].corr(consume_gdp_df['renewables_consumption'], method= 'pearson')
    df['corr_gdp_capita_gascons']       = df['gdp_capita'].corr(consume_gdp_df['gas_consumption'], method= 'pearson')
    df['corr_gdp_capita_coalcons']      = df['gdp_capita'].corr(consume_gdp_df['coal_consumption'], method= 'pearson')
    df['corr_gdp_capita_oilcons']       = df['gdp_capita'].corr(consume_gdp_df['oil_consumption'], method= 'pearson')
    df['corr_gdp_capita_hydrocons']     = df['gdp_capita'].corr(consume_gdp_df['hydro_consumption'], method= 'pearson')
    df['corr_gdp_capita_windcons']      = df['gdp_capita'].corr(consume_gdp_df['wind_consumption'], method= 'pearson')
    df['corr_gdp_capita_biofuelcons']   = df['gdp_capita'].corr(consume_gdp_df['biofuel_consumption'], method= 'pearson')
    df['corr_gdp_capita_othercons']     = df['gdp_capita'].corr(consume_gdp_df['other_renewable_consumption'], method= 'pearson')
    df['corr_gdp_capita_nuclearcons']   = df['gdp_capita'].corr(consume_gdp_df['nuclear_consumption'], method= 'pearson')


    li.append(df)

#concatenation the dataframes in the list to a single dataframe
corr_pop_df2 = pd.concat(li, axis=0, ignore_index=True)

corr_pop_df2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['corr_gdp_capita_totalcons']     = df['gdp_capita'].corr(consume_gdp_df['total_consumption'], method= 'pearson')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['corr_gdp_capita_fossilcons']    = df['gdp_capita'].corr(consume_gdp_df['fossil_consumption'], method= 'pearson')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

Unnamed: 0,iso_code,country_x,year,nuclear_consumption,coal_consumption,hydro_consumption,oil_consumption,gas_consumption,wind_consumption,solar_consumption,...,corr_gdp_capita_fossilcons,corr_gdp_capita_renewcons,corr_gdp_capita_gascons,corr_gdp_capita_coalcons,corr_gdp_capita_oilcons,corr_gdp_capita_hydrocons,corr_gdp_capita_windcons,corr_gdp_capita_biofuelcons,corr_gdp_capita_othercons,corr_gdp_capita_nuclearcons
0,AGO,Angola,1990,0.0,0.0,2.051429,15.325936,5.834961,0.0,0.0,...,0.960854,0.947596,0.2336,,0.957304,0.951124,,0.38587,0.797974,
1,AGO,Angola,1991,0.0,0.0,2.042857,16.144753,6.142351,0.0,0.0,...,0.960854,0.947596,0.2336,,0.957304,0.951124,,0.38587,0.797974,
2,AGO,Angola,1992,0.0,0.0,2.377143,16.129702,6.182138,0.0,0.0,...,0.960854,0.947596,0.2336,,0.957304,0.951124,,0.38587,0.797974,
3,AGO,Angola,1993,0.0,0.0,2.517143,15.644819,6.073679,0.0,0.0,...,0.960854,0.947596,0.2336,,0.957304,0.951124,,0.38587,0.797974,
4,AGO,Angola,1994,0.0,0.0,2.531429,15.469970,5.638678,0.0,0.0,...,0.960854,0.947596,0.2336,,0.957304,0.951124,,0.38587,0.797974,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6012,YEM,Yemen,2015,,,,,,,0.0,...,,,,,,,,,,
6013,YEM,Yemen,2016,,,,,,,0.0,...,,,,,,,,,,
6014,YEM,Yemen,2017,,,,,,,0.0,...,,,,,,,,,,
6015,YEM,Yemen,2018,,,,,,,0.0,...,,,,,,,,,,


In [32]:
corr_pop_df2[corr_pop_df2['country_x']== 'Russia']

Unnamed: 0,iso_code,country_x,year,nuclear_consumption,coal_consumption,hydro_consumption,oil_consumption,gas_consumption,wind_consumption,solar_consumption,...,corr_gdp_capita_fossilcons,corr_gdp_capita_renewcons,corr_gdp_capita_gascons,corr_gdp_capita_coalcons,corr_gdp_capita_oilcons,corr_gdp_capita_hydrocons,corr_gdp_capita_windcons,corr_gdp_capita_biofuelcons,corr_gdp_capita_othercons,corr_gdp_capita_nuclearcons
4877,RUS,Russia,1990,328.681,2120.215,463.46,2984.635,4141.954,0.0,0.0,...,-0.005462,-0.119844,0.78995,-0.601915,-0.133925,-0.167177,0.693942,,0.854774,0.927485
4878,RUS,Russia,1991,333.289,1942.913,466.928,2908.481,4249.552,0.0,0.0,...,-0.005462,-0.119844,0.78995,-0.601915,-0.133925,-0.167177,0.693942,,0.854774,0.927485
4879,RUS,Russia,1992,332.295,1825.708,479.428,2777.476,4113.452,0.0,0.0,...,-0.005462,-0.119844,0.78995,-0.601915,-0.133925,-0.167177,0.693942,,0.854774,0.927485
4880,RUS,Russia,1993,331.072,1647.871,484.136,2316.031,4101.491,0.0,0.0,...,-0.005462,-0.119844,0.78995,-0.601915,-0.133925,-0.167177,0.693942,,0.854774,0.927485
4881,RUS,Russia,1994,271.722,1441.226,488.684,2046.039,3853.204,0.0,0.0,...,-0.005462,-0.119844,0.78995,-0.601915,-0.133925,-0.167177,0.693942,,0.854774,0.927485
4882,RUS,Russia,1995,276.478,1388.347,489.598,1785.451,3724.455,0.0,0.0,...,-0.005462,-0.119844,0.78995,-0.601915,-0.133925,-0.167177,0.693942,,0.854774,0.927485
4883,RUS,Russia,1996,302.85,1362.638,428.636,1539.511,3745.234,0.0,0.0,...,-0.005462,-0.119844,0.78995,-0.601915,-0.133925,-0.167177,0.693942,,0.854774,0.927485
4884,RUS,Russia,1997,301.384,1256.449,437.317,1526.909,3454.085,0.0,0.0,...,-0.005462,-0.119844,0.78995,-0.601915,-0.133925,-0.167177,0.693942,,0.854774,0.927485
4885,RUS,Russia,1998,288.109,1176.452,440.27,1462.642,3595.705,0.0,0.0,...,-0.005462,-0.119844,0.78995,-0.601915,-0.133925,-0.167177,0.693942,,0.854774,0.927485
4886,RUS,Russia,1999,338.539,1185.994,445.811,1495.317,3584.664,0.0,0.0,...,-0.005462,-0.119844,0.78995,-0.601915,-0.133925,-0.167177,0.693942,,0.854774,0.927485


In [29]:
#checking for the amount of null values in total consumption correlation
corr_pop_df2[['country_x','corr_gdp_capita_totalcons']][corr_pop_df2['corr_gdp_capita_totalcons'].isna() == True]

Unnamed: 0,country_x,corr_gdp_capita_totalcons
1384,Afghanistan,
1385,Afghanistan,
1386,Afghanistan,
1387,Afghanistan,
1388,Afghanistan,
...,...,...
6012,Yemen,
6013,Yemen,
6014,Yemen,
6015,Yemen,


In [31]:
#saving it to csv to share and export. Bag it and ship it!
corr_pop_df2.to_csv(path_or_buf= 'data/correlations.csv')