# Fossil Fuel Consumption Progression over the time frame of our data

In [20]:
import pandas as pd
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt

### Reading in data

In [46]:
fossil_df = pd.read_csv('/Users/sebastianlorenzen-schmidt/neuefische/capstone_project/data/energy-consumption-by-source-and-region.csv')

fossil_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7175 entries, 0 to 7174
Data columns (total 14 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   iso_code                     6433 non-null   object 
 1   country                      7175 non-null   object 
 2   year                         7175 non-null   int64  
 3   nuclear_consumption          3787 non-null   float64
 4   coal_consumption             4267 non-null   float64
 5   hydro_consumption            3787 non-null   float64
 6   oil_consumption              4267 non-null   float64
 7   gas_consumption              4267 non-null   float64
 8   wind_consumption             3787 non-null   float64
 9   solar_consumption            2490 non-null   float64
 10  other_renewable_consumption  3787 non-null   float64
 11  biofuel_consumption          4308 non-null   float64
 12  low_carbon_consumption       3787 non-null   float64
 13  renewables_consump

### Creating total fossil fuel consumption and total consumption columns

In [47]:
fossil_df['fossil_consumption']= fossil_df[['gas_consumption','coal_consumption','oil_consumption']].sum(axis = 1,skipna=True)
fossil_df['total_consumption']= fossil_df[['gas_consumption','coal_consumption','oil_consumption','nuclear_consumption',
                                           'hydro_consumption','wind_consumption','solar_consumption','biofuel_consumption','other_renewable_consumption' ]].sum(axis = 1,skipna=True)

### Calculating the differences seperated by counties

In [48]:

li = []
codes_list = fossil_df['iso_code'].unique()

codes_list

for code in codes_list:
    df = fossil_df[fossil_df['iso_code']== code]
    df['fossil_diff'] = df['fossil_consumption'].diff(periods=1) #positive periods go down the rows, negative periods go up the rows
    df['total_cons_diff'] = df['total_consumption'].diff(periods=1) #positive periods go down the rows, negative periods go up the rows
 
    li.append(df)


#concatenation the dataframes in the list to a single dataframe
fossil_df2 = pd.concat(li, axis=0, ignore_index=True)

#corr_pop_df#2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['fossil_diff'] = df['fossil_consumption'].diff(periods=1) #positive periods go down the rows, negative periods go up the rows
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['total_cons_diff'] = df['total_consumption'].diff(periods=1) #positive periods go down the rows, negative periods go up the rows


In [57]:
fossil_df2[['fossil_diff','fossil_consumption','total_cons_diff','year','country']].head(27)

#as expected the 1990 year entries are NaNs as they should be

Unnamed: 0,fossil_diff,fossil_consumption,total_cons_diff,year,country
0,0.0,21.160898,0.0,1990,Angola
1,1.126206,22.287104,1.117635,1991,Angola
2,0.024735,22.311839,0.359021,1992,Angola
3,-0.593341,21.718499,-0.453341,1993,Angola
4,-0.609851,21.108648,-0.595565,1994,Angola
5,1.714417,22.823065,1.728702,1995,Angola
6,-1.075424,21.747641,-1.003995,1996,Angola
7,3.206612,24.954253,3.063755,1997,Angola
8,-3.297326,21.656928,-2.774468,1998,Angola
9,3.363844,25.020772,2.895273,1999,Angola


### Filling in the null values with 0

In [54]:
fossil_df2['fossil_diff'].fillna(0,inplace=True)
fossil_df2['total_cons_diff'].fillna(0,inplace=True)

### short check on data

In [55]:
fossil_df2.info()

#Everything is as it should be

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6433 entries, 0 to 6432
Data columns (total 18 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   iso_code                     6433 non-null   object 
 1   country                      6433 non-null   object 
 2   year                         6433 non-null   int64  
 3   nuclear_consumption          3697 non-null   float64
 4   coal_consumption             3697 non-null   float64
 5   hydro_consumption            3697 non-null   float64
 6   oil_consumption              3697 non-null   float64
 7   gas_consumption              3697 non-null   float64
 8   wind_consumption             3697 non-null   float64
 9   solar_consumption            2400 non-null   float64
 10  other_renewable_consumption  3697 non-null   float64
 11  biofuel_consumption          4218 non-null   float64
 12  low_carbon_consumption       3697 non-null   float64
 13  renewables_consump

### Exporting data to .csv

In [56]:
fossil_df2.to_csv(path_or_buf= 'data/fossil_consumption_diff.csv')

# Looking at production differences for primary energy

In [37]:
foss_prod_diff_df= pd.read_csv('/Users/sebastianlorenzen-schmidt/neuefische/capstone_project/data/world_primary_energy_production.csv')

foss_prod_diff_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5024 entries, 0 to 5023
Data columns (total 27 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Unnamed: 0                    5024 non-null   int64  
 1   country                       5024 non-null   object 
 2   location_code                 5024 non-null   object 
 3   world_region                  5024 non-null   object 
 4   year                          5024 non-null   int64  
 5   total_prod                    5024 non-null   float64
 6   coal                          5024 non-null   float64
 7   oil                           5024 non-null   float64
 8   gas                           5024 non-null   float64
 9   nuclear                       5024 non-null   float64
 10  hydroelectricity              5024 non-null   float64
 11  wind                          5024 non-null   float64
 12  biofuel                       5024 non-null   float64
 13  sol

In [38]:
li = []
codes_list = foss_prod_diff_df['location_code'].unique()

codes_list

for code in codes_list:
    df = foss_prod_diff_df[foss_prod_diff_df['location_code']== code]
    df['fossil_prod_diff'] = df['fossil_fuels'].diff(periods=1) #positive periods go down the rows, negative periods go up the rows
    li.append(df)


#concatenation the dataframes in the list to a single dataframe
foss_prod_diff_df2 = pd.concat(li, axis=0, ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['fossil_prod_diff'] = df['fossil_fuels'].diff(periods=1) #positive periods go down the rows, negative periods go up the rows


In [45]:
foss_prod_diff_df2[['country','year','fossil_fuels','fossil_prod_diff']][foss_prod_diff_df2['country']=='Russian Federation']

Unnamed: 0,country,year,fossil_fuels,fossil_prod_diff
2056,Russian Federation,1990,19771.557542,
2057,Russian Federation,1991,18098.155338,-1673.402205
2058,Russian Federation,1992,13851.216608,-4246.93873
2059,Russian Federation,1993,12815.272729,-1035.943878
2060,Russian Federation,1994,12099.478793,-715.793936
2061,Russian Federation,1995,11807.804674,-291.67412
2062,Russian Federation,1996,11359.153001,-448.651673
2063,Russian Federation,1997,11034.370946,-324.782055
2064,Russian Federation,1998,11121.224949,86.854003
2065,Russian Federation,1999,11396.462622,275.237673


In [40]:
foss_prod_diff_df2.to_csv(path_or_buf='data/prod_difference.csv')