In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [3]:
dataset = pd.read_csv("co2emission.csv")
dataset.head(100)

Unnamed: 0,country,year,iso_code,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,...,share_global_other_co2,share_of_temperature_change_from_ghg,temperature_change_from_ch4,temperature_change_from_co2,temperature_change_from_ghg,temperature_change_from_n2o,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share
0,Afghanistan,1850,AFG,3752993.0,,,,,,,...,,,,,,,,,,
1,Afghanistan,1851,AFG,3767956.0,,,,,,,...,,0.165,0.0,0.0,0.0,0.0,,,,
2,Afghanistan,1852,AFG,3783940.0,,,,,,,...,,0.164,0.0,0.0,0.0,0.0,,,,
3,Afghanistan,1853,AFG,3800954.0,,,,,,,...,,0.164,0.0,0.0,0.0,0.0,,,,
4,Afghanistan,1854,AFG,3818038.0,,,,,,,...,,0.163,0.0,0.0,0.0,0.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Afghanistan,1945,AFG,6917471.0,,,,,,,...,,0.129,0.0,0.0,0.0,0.0,,,,
96,Afghanistan,1946,AFG,7023527.0,,,,,,,...,,0.129,0.0,0.0,0.0,0.0,,,,
97,Afghanistan,1947,AFG,7131209.0,,,,,,,...,,0.129,0.0,0.0,0.0,0.0,,,,
98,Afghanistan,1948,AFG,7240542.0,,,,,,,...,,0.129,0.0,0.0,0.0,0.0,,,,


In [43]:
new_data = dataset[["country", "year", "co2_per_capita"]]
new_data.head()

Unnamed: 0,country,year,co2_per_capita
0,Afghanistan,1850,
1,Afghanistan,1851,
2,Afghanistan,1852,
3,Afghanistan,1853,
4,Afghanistan,1854,


In [87]:
europe = new_data[new_data["country"].isin(["Austria","Andorra","Armenia","Austria","Azerbaijan", 
                                            "Belarus","Belgium","Bosnia and Hersegovina","Bulgaria",
                                            "Croatia","Cyprus","Czechia",
                                            "Denmark",
                                            "Estonia",
                                            "Finland","France",
                                            "Georgia","Germany","Greece",
                                            "Hungary",
                                            "Iceland","Ireland","Italy",
                                           "Kazakhstan","Kosovo",
                                           "Latvia","Liechtenstein","Lithuania","Luxembourg",
                                           "Malta","Moldova","Monaco","Montenegro",
                                           "Netherlands","North Macedonia","Norway",
                                           "Poland","Portugal",
                                          "Romania","Russia",
                                           "San Marino","Serbia","Slovakia","Spain","Sweden","Switzerland",
                                           "Turkey",
                                           "Ukraine", "United Kingdom"])]
europe.head(9000)

Unnamed: 0,country,year,co2_per_capita
1304,Andorra,1750,0.000
1305,Andorra,1751,
1306,Andorra,1752,
1307,Andorra,1753,
1308,Andorra,1754,
...,...,...,...
47932,United Kingdom,2017,5.863
47933,United Kingdom,2018,5.716
47934,United Kingdom,2019,5.462
47935,United Kingdom,2020,4.865


In [88]:
europe.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8364 entries, 1304 to 47936
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   country         8364 non-null   object 
 1   year            8364 non-null   int64  
 2   co2_per_capita  7165 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 261.4+ KB


In [89]:
# Interpolate missing values using linear interpolation along the columns
europe_interpolated = europe.interpolate(method='linear', axis=0)
europe_interpolated.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8364 entries, 1304 to 47936
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   country         8364 non-null   object 
 1   year            8364 non-null   int64  
 2   co2_per_capita  8364 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 261.4+ KB


In [90]:
europe.head(9000)

Unnamed: 0,country,year,co2_per_capita
1304,Andorra,1750,0.000
1305,Andorra,1751,
1306,Andorra,1752,
1307,Andorra,1753,
1308,Andorra,1754,
...,...,...,...
47932,United Kingdom,2017,5.863
47933,United Kingdom,2018,5.716
47934,United Kingdom,2019,5.462
47935,United Kingdom,2020,4.865


In [91]:
europe_interpolated.head(1000)
#eu = new_data[new_data["country"].isin(["Ireland"])]
#eu.head(1000)
#eu = europe_interpolated[europe_interpolated["country"].isin(["Ireland"])]
#eu.head(1000)           

Unnamed: 0,country,year,co2_per_capita
1304,Andorra,1750,0.000
1305,Andorra,1751,0.000
1306,Andorra,1752,0.000
1307,Andorra,1753,0.000
1308,Andorra,1754,0.000
...,...,...,...
5005,Belarus,2013,6.617
5006,Belarus,2014,6.566
5007,Belarus,2015,6.061
5008,Belarus,2016,5.988


Unnamed: 0,country,year,co2_per_capita
22153,Ireland,1850,0.057
22154,Ireland,1851,
22155,Ireland,1852,
22156,Ireland,1853,
22157,Ireland,1854,
...,...,...,...
22320,Ireland,2017,8.189
22321,Ireland,2018,8.070
22322,Ireland,2019,7.624
22323,Ireland,2020,7.107


Unnamed: 0,country,year,co2_per_capita
22153,Ireland,1850,0.057000
22154,Ireland,1851,0.057367
22155,Ireland,1852,0.057733
22156,Ireland,1853,0.058100
22157,Ireland,1854,0.058467
...,...,...,...
22320,Ireland,2017,8.189000
22321,Ireland,2018,8.070000
22322,Ireland,2019,7.624000
22323,Ireland,2020,7.107000


In [None]:
\