# Analyse des CO2 Verbrauchs zwischen 1900-2023 :
---

## Daten Bearbeitung
---

### Daten Import

In [1]:
# imports
import seaborn as sbs
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import seaborn as sns



### Vorbereitung der CSV Daten

In [2]:

coal = pd.read_csv('CSV_daten/annual-co-emissions-from-coal.csv')
gas = pd.read_csv('CSV_daten/annual-co-emissions-from-gas.csv')
land_use= pd.read_csv('CSV_daten/annual-co-emissions-from-land-use-change.csv')
oil= pd.read_csv('CSV_daten/annual-co-emissions-from-oil.csv')
land_use_incl = pd.read_csv('CSV_daten/annual-co-emissions-including-land-use-change.csv')
co2_emissions = pd.read_csv('CSV_daten/annual-co-emissions.csv')
fossil_ressources_mean = pd.read_csv('CSV_daten/contribution-to-global-mean-surface-temperature-rise-from-fossil-sources.csv')
capita_co2_emmissions = pd.read_csv('CSV_daten/per-capita-greenhouse-gas-emissions.csv')
capita_greenhouse = pd.read_csv('CSV_daten/per-capita-greenhouse-gas-emissions.csv')
capita_methane = pd.read_csv('CSV_daten/per-capita-methane-emissions.csv')
capita_nitrous_oxide = pd.read_csv('CSV_daten/per-capita-nitrous-oxide-emissions.csv')



### Reinigung der Daten

In [3]:

def load_and_clean_emission_data(folder_path='CSV_daten/'):
    file_map = {
        'coal': ('annual-co-emissions-from-coal.csv', 'Co2_coal'),
        'gas': ('annual-co-emissions-from-gas.csv', 'Co2_gas'),
        'oil': ('annual-co-emissions-from-oil.csv', 'Co2_oil'),
        'land_use': ('annual-co-emissions-from-land-use-change.csv', 'Co2_land_use'),
        'land_use_incl': ('annual-co-emissions-including-land-use-change.csv', 'Co2_land_use_incl'),
        'co2_emissions': ('annual-co-emissions.csv', 'Co2_emissions'),
        'fossil_ressources_mean': ('contribution-to-global-mean-surface-temperature-rise-from-fossil-sources.csv', 'Co2_fossil_ressources_mean'),
        'capita_co2_emmissions': ('per-capita-greenhouse-gas-emissions.csv', 'Co2_capita_emissions'),
        'capita_greenhouse': ('per-capita-greenhouse-gas-emissions.csv', 'Co2_capita_greenhouse'),
        'capita_methane': ('per-capita-methane-emissions.csv', 'Co2_capita_methane'),
        'capita_nitrous_oxide': ('per-capita-nitrous-oxide-emissions.csv', 'Co2_capita_nitrous_oxide')
    }

    cleaned_data = {}

    for varname, (filename, new_colname) in file_map.items():
        path = folder_path + filename
        df = pd.read_csv(path)

        # Umbenennung der Spalten
        df.columns = ['Entity', 'Year', new_colname]

        # Speicher
        cleaned_data[varname] = df

    return cleaned_data




In [4]:

data = load_and_clean_emission_data()

coal = data['coal']
gas = data['gas']
land_use = data['land_use']
oil = data['oil']
land_use_incl = data['land_use_incl']
co2_emissions = data['co2_emissions']
fossil_ressources_mean = data['fossil_ressources_mean']
capita_co2_emmissions = data['capita_co2_emmissions']
capita_greenhouse = data['capita_greenhouse']
capita_methane = data['capita_methane']
capita_nitrous_oxide = data['capita_nitrous_oxide']

In [5]:
excluded_entities_dict = {
    "global": [
        "World", "International shipping", "International aviation"
    ],
    "regions": [
        "Asia", "EurGCP)", "Africa (GCP)", "North America (GCP)", "South America (GCP)", "Oceania (GCP)",
        "Asia (excl. Chinaope", "Africa", "North America", "South America", "Oceania",
        "Asia (GCP)", "Europe ( and India)", "North America (excl. USA)",
        "Europe (excl. EU-27)", "Europe (excl. EU-28)"
    ],
    "economic_groups": [
        "High-income countries", "Upper-middle-income countries", 
        "Lower-middle-income countries", "Low-income countries",
        "OECD (GCP)", "Non-OECD (GCP)"
    ],
    "political_unions": [
        "European Union (27)", "European Union (28)"
    ],
    "all" :[
        "World", "International shipping", "International aviation","Asia", "Europe", "Africa", "North America", "South America", "Oceania",
        "Asia (GCP)", "Europe (GCP)", "Africa (GCP)", "North America (GCP)", "South America (GCP)", "Oceania (GCP)",
        "Asia (excl. China and India)", "North America (excl. USA)",
        "Europe (excl. EU-27)", "Europe (excl. EU-28)", "High-income countries", "Upper-middle-income countries", 
        "Lower-middle-income countries", "Low-income countries",
        "OECD (GCP)", "Non-OECD (GCP)", "European Union (27)", "European Union (28)"],

    'exclusions' : 
    ['European Union (27)', 'European Union (28)', 
    "Europe (excl. EU-27)", "Europe (excl. EU-28)",
    "Asia (excl. China and India)", "North America (excl. USA)"

    ]
}

def exclude_entities(df, categories_to_exclude):
    to_exclude = []
    for category in categories_to_exclude:
        to_exclude.extend(excluded_entities_dict.get(category, []))
    
    return df[~df['Entity'].isin(to_exclude)]





## Grafik Visualisierungen
---

### 1. Welweite Co2 Emissionen 

In [31]:
Co2_welweit = co2_emissions[co2_emissions['Entity'] == 'World']
Co2_welweit.columns

Index(['Entity', 'Year', 'Co2_emissions'], dtype='object')

In [39]:
co2_welweit_fig= px.line(Co2_welweit, x = 'Year', y = 'Co2_emissions', title= 'Gesamte menschliche Co2 Emissionen Weltweit 1900-2025' )
co2_welweit_fig.update_layout( xaxis_range= [1900, 2025],
                              yaxis_title = 'Co2 Verbrauch in Gt',
                              xaxis_title = 'Jahr')
co2_welweit_fig.show()


### 2. Woher kommen diese Emissionen? 

In [48]:




df_gas = gas[gas['Entity'] == 'World'][['Year', 'Co2_gas']]
df_gas['Source'] = 'gas'
df_gas.rename(columns={'Co2_gas': 'Emissions'}, inplace=True)


df_coal = coal[coal['Entity'] == 'World'][['Year', 'Co2_coal']]
df_coal['Source'] = 'coal'
df_coal.rename(columns={'Co2_coal': 'Emissions'}, inplace=True)


df_oil = oil[oil['Entity'] == 'World'][['Year', 'Co2_oil']]
df_oil['Source'] = 'oil'
df_oil.rename(columns={'Co2_oil': 'Emissions'}, inplace=True)

# Concat um die df zusammenzufuegen
df_welt= pd.concat([df_gas, df_coal, df_oil], ignore_index=True)


In [49]:
# 

df_welt['Total'] = df_welt.groupby('Year')['Emissions'].transform('sum')

# Layout Vorbereitung
df_welt['Part (%)'] = (df_welt['Emissions'] / df_welt['Total'] * 100).round(1)
df_welt['Emissions (Mt)'] = (df_welt['Emissions'] / 1_000_000).round(1)
df_welt['Source'] = df_welt['Source'].replace({
    'coal': 'Kohle',
    'oil': 'Öl',
    'gas': 'Gas'
})


In [50]:
fig = px.bar(
    df_welt,
    x='Year',
    y='Emissions',
    color='Source',
    title='Weltweite CO₂-Emissionen nach Energiequelle',
    hover_data= {
        'Emissions (Mt)': ':.2f',
        'Part (%)': True,
        'Year': False,
        'Source': False,
        'Total': False,
        'Emissions': False
    },
    labels= {  
         'Year': 'Jahr',
        'Emissions': 'Emissionen',
        'Emissions (Mt)': 'Emissionen (MtCO₂)',
        'Emissions (Mt)': 'Emissions (MtCO₂)',
        'Anteil (%)': '% vom total'}
)
fig.update_layout(xaxis_range =[1900, 2025],
                  legend_title = 'Ressourcen',
                  xaxis_title= 'Jahr',
                  yaxis_title= 'Emissionen in Milliarden Tonnen')
fig.show()


### 3. 10 groessten kumulative Verbraucher

In [11]:

lander_ohne_regionen = exclude_entities(co2_emissions, ['all'])

In [12]:
top_laender = (
    lander_ohne_regionen
    .groupby('Entity')['Co2_emissions']
    .sum()
    .sort_values(ascending=False)
    .head(11)
    .index
)
top_laender

df_top10 = lander_ohne_regionen[lander_ohne_regionen['Entity'].isin(top_laender)]

df_top10['Total'] = df_top10.groupby('Year')['Co2_emissions'].transform('sum')
df_top10['Part (%)'] = (df_top10['Co2_emissions'] / df_top10['Total'] * 100).round(1)





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [52]:
fig_top_10 = px.bar(
    df_top10,
    x='Year',
    y='Co2_emissions',
    color='Entity',
    title='Top 10 Länder mit den höchsten kumulierten CO₂-Emissionen weltweit (1900–2025)',
    hover_data={
        'Part (%)': True,
        'Co2_emissions': True,
        'Total': False
    }
)
fig_top_10.update_layout(xaxis_range=[1950, 2025],
                         yaxis_title= 'Co2 emissions',
                         legend_title = 'Entität')
fig_top_10.show()



### 4. Weltweite Landnutzung

In [14]:
# Welt filtern

land_use_incl_weltweit = land_use_incl[land_use_incl['Entity'] == 'World']
land_use_weltweit= land_use[land_use['Entity'] == 'World']

In [15]:
# beide Tabellen zusammenfuegen auf Jahr spalte
land_use_vergleich= pd.merge(
    land_use_incl_weltweit[['Year', 'Co2_land_use_incl']],
    land_use_weltweit[['Year', 'Co2_land_use']],
    on = 'Year',
    how = 'inner'
)

fig = px.line(
    land_use_vergleich,
    x='Year',
    y=['Co2_land_use_incl', 'Co2_land_use'],
    labels={
        'value': ' CO₂ Emissionen (Mt)',
        'Year': 'Jahr',
        'variable': 'Emmissionstyp'
    },
    title="Weltweite CO₂ Emissionen : mit ond ohne Landnutzung"
)

fig.show()

Landnutzung : z.B Wenn Wälder abgeholzt werden, Wiesen in Felder umgewandelt werden ...


### 5. 10 top Verbraucher

In [16]:
excluded = ["economic_groups", 'political_unions', 'global', 'exclusions']
coal_clean = exclude_entities(coal, excluded)
gas_clean = exclude_entities(gas, excluded)
oil_clean = exclude_entities(oil, excluded)


# Filter auf Jahr
coal_2020 = coal_clean[coal_clean['Year'] == 2020]
gas_2020 = gas_clean[gas_clean['Year'] == 2020]
oil_2020 = oil_clean[oil_clean['Year'] == 2020]

#Filter auf top_10
coal_top10_2020 = coal_2020.nlargest(10, 'Co2_coal')
gas_top10_2020 = gas_clean[gas_clean['Year'] == 2020].nlargest(10, 'Co2_gas')
oil_top10_2020 = oil_clean[oil_clean['Year'] == 2020].nlargest(10, 'Co2_oil')

coal_2020.head()



Unnamed: 0,Entity,Year,Co2_coal
71,Afghanistan,2020,4115975.0
211,Africa,2020,433637900.0
302,Albania,2020,567920.0
410,Algeria,2020,542272.0
444,Andorra,2020,0.0


In [17]:
# Umnennung der Spalten
coal_top10_2020 = coal_top10_2020[['Entity', 'Co2_coal']].rename(columns={'Co2_coal': 'Kohle'})
gas_top10_2020 = gas_top10_2020[['Entity', 'Co2_gas']].rename(columns={'Co2_gas': 'Gas'})
oil_top10_2020 = oil_top10_2020[['Entity', 'Co2_oil']].rename(columns={'Co2_oil': 'Oel'})

# # ressourcen mergen auf eine df
merged = coal_top10_2020.merge(gas_top10_2020, on='Entity', how='outer')
merged = merged.merge(oil_top10_2020, on='Entity', how='outer')


# Umwandlung in merged_long
merged_long = pd.melt(
    merged,
    id_vars='Entity',
    value_vars=['Kohle', 'Gas', 'Oel'],
    var_name='Energie',
    value_name='CO2 (tonnen)'
)

# Grafik erstellen
fig = px.bar(
    merged_long,
    x='Entity',
    y='CO2 (tonnen)',
    color='Energie',
    barmode='group',
    title='Vergleich der 10 groeßten CO₂-Emissionen Verbraucher (2020)'
)

fig.show()

### 6. Vergleich Bevölkerung mit Co2/ Einwohner

In [18]:
def plot_population_vs_emissions(capita_df, total_df, year=2020, excluded_categories=['economic_groups', 'political_unions', 'global', 'exclusions']):

    # Reinigung
    capita_clean = exclude_entities(capita_df, excluded_categories)
    total_clean = exclude_entities(total_df, excluded_categories)

    # Jahr Filtern
    capita_year = capita_clean[capita_clean['Year'] == year]
    total_year = total_clean[total_clean['Year'] == year]

    # Die beide DataFrames zusammenfügen
    merged = capita_year.merge(
        total_year[['Entity', 'Year', 'Co2_emissions']],
        on=['Entity', 'Year'],
        how='inner'
    )
    # In eine neue variabel speicher udn die 20 groeßten rausholen 
    merged = merged[merged['Co2_capita_emissions'] > 0].copy()
    top20_entites = merged.nlargest(20, 'Co2_emissions')["Entity"]
    merged = merged[merged["Entity"].isin(top20_entites)].copy()

    # Berechnung der theoretischen Bevölkerung
    merged['population_est'] = merged['Co2_emissions'] / merged['Co2_capita_emissions']
    merged['Bevölkerung (geschätzt)'] = (merged['population_est'] / 1_000_000).round(1).astype(str) + " Mio"

    # Gesamtemissionen in Milliarden umrechnen 
    merged['Co2_emissions_Gt'] = merged['Co2_emissions'] / 1e9

    # Grafik erstellen
    fig = px.scatter(
        merged,
        x='Co2_capita_emissions',
        y='Co2_emissions_Gt',
        color='Entity',
        size='population_est',
        hover_name='Entity',
        hover_data={
            'Bevölkerung (geschätzt)': True,
            'Co2_capita_emissions': ':.2f',
            'Co2_emissions_Gt': ':.2f',
            'population_est': False
        },
        size_max=60,
        log_x=True,
        labels={
            'Co2_capita_emissions': 'CO₂-Emissionen pro Kopf (Tonnen)',
            'Co2_emissions_Gt': 'Gesamte CO₂-Emissionen (Gt)',
            'Entity': 'Land'
        },
        title=f'Vergleich Bevölkerung vs CO₂-Emissionen pro Kopf ({year})'
    )

    # Layout
    fig.update_layout(
        template='plotly_white',
        xaxis_title='CO₂-Emissionen pro Kopf (Tonne)',
        yaxis_title='Gesamte CO₂-Emissionen (Gt)',
        legend_title='Entitaet',
        margin=dict(t=80, l=60, r=30, b=60)
    )

    fig.show()

# Appel de la fonction
plot_population_vs_emissions(capita_co2_emmissions, co2_emissions, year=2020)


### 7.Durchschnittlisches Co2-Verbrauch pro Kopf nach Region

In [19]:
# 1. Dict um eine neune Spalte zuzufuegen. Entity → Region
# Die wird 
entity_to_region = {
    # Europe
    'Albania': 'Europe', 'Andorra': 'Europe', 'Austria': 'Europe', 'Belarus': 'Europe',
    'Belgium': 'Europe', 'Bosnia and Herzegovina': 'Europe', 'Bulgaria': 'Europe',
    'Croatia': 'Europe', 'Cyprus': 'Europe', 'Czechia': 'Europe', 'Denmark': 'Europe',
    'Estonia': 'Europe', 'Faroe Islands': 'Europe', 'Finland': 'Europe', 'France': 'Europe',
    'Germany': 'Europe', 'Greece': 'Europe', 'Greenland': 'Europe', 'Hungary': 'Europe',
    'Iceland': 'Europe', 'Ireland': 'Europe', 'Italy': 'Europe', 'Latvia': 'Europe',
    'Lithuania': 'Europe', 'Luxembourg': 'Europe', 'Malta': 'Europe', 'Moldova': 'Europe',
    'Monaco': 'Europe', 'Montenegro': 'Europe', 'Netherlands': 'Europe', 'North Macedonia': 'Europe',
    'Norway': 'Europe', 'Poland': 'Europe', 'Portugal': 'Europe', 'Romania': 'Europe',
    'San Marino': 'Europe', 'Serbia': 'Europe', 'Slovakia': 'Europe',
    'Slovenia': 'Europe', 'Spain': 'Europe', 'Sweden': 'Europe', 'Switzerland': 'Europe',
    'Ukraine': 'Europe', 'United Kingdom': 'Europe', 'Vatican': 'Europe',
    # Russia séparée
    'Russia': 'Russia',
    # Asie
    'Afghanistan': 'Asia', 'Armenia': 'Asia', 'Azerbaijan': 'Asia', 'Bahrain': 'Asia',
    'Bangladesh': 'Asia', 'Bhutan': 'Asia', 'Brunei': 'Asia', 'Cambodia': 'Asia',
    'China': 'Asia', 'East Timor': 'Asia', 'Georgia': 'Asia', 'Hong Kong': 'Asia',
    'India': 'Asia', 'Indonesia': 'Asia', 'Iran': 'Asia', 'Iraq': 'Asia',
    'Israel': 'Asia', 'Japan': 'Asia', 'Jordan': 'Asia', 'Kazakhstan': 'Asia',
    'Kuwait': 'Asia', 'Kyrgyzstan': 'Asia', 'Laos': 'Asia', 'Lebanon': 'Asia',
    'Macau': 'Asia', 'Malaysia': 'Asia', 'Maldives': 'Asia', 'Mongolia': 'Asia',
    'Myanmar': 'Asia', 'Nepal': 'Asia', 'North Korea': 'Asia', 'Oman': 'Asia',
    'Pakistan': 'Asia', 'Palestine': 'Asia', 'Philippines': 'Asia', 'Qatar': 'Asia',
    'Saudi Arabia': 'Asia', 'Singapore': 'Asia', 'South Korea': 'Asia', 'Sri Lanka': 'Asia',
    'Syria': 'Asia', 'Taiwan': 'Asia', 'Tajikistan': 'Asia', 'Thailand': 'Asia',
    'Turkey': 'Asia', 'Turkmenistan': 'Asia', 'United Arab Emirates': 'Asia',
    'Uzbekistan': 'Asia', 'Vietnam': 'Asia', 'Yemen': 'Asia',
    # Afrique
    'Algeria': 'Africa', 'Angola': 'Africa', 'Benin': 'Africa', 'Botswana': 'Africa',
    'Burkina Faso': 'Africa', 'Burundi': 'Africa', 'Cabo Verde': 'Africa',
    'Cameroon': 'Africa', 'Central African Republic': 'Africa', 'Chad': 'Africa',
    'Comoros': 'Africa', 'Congo': 'Africa', 'Democratic Republic of Congo': 'Africa',
    "Cote d'Ivoire": 'Africa', 'Djibouti': 'Africa', 'Egypt': 'Africa',
    'Equatorial Guinea': 'Africa', 'Eritrea': 'Africa', 'Eswatini': 'Africa',
    'Ethiopia': 'Africa', 'Gabon': 'Africa', 'Gambia': 'Africa', 'Ghana': 'Africa',
    'Guinea': 'Africa', 'Guinea-Bissau': 'Africa', 'Kenya': 'Africa', 'Lesotho': 'Africa',
    'Liberia': 'Africa', 'Libya': 'Africa', 'Madagascar': 'Africa', 'Malawi': 'Africa',
    'Mali': 'Africa', 'Mauritania': 'Africa', 'Mauritius': 'Africa', 'Morocco': 'Africa',
    'Mozambique': 'Africa', 'Namibia': 'Africa', 'Niger': 'Africa', 'Nigeria': 'Africa',
    'Rwanda': 'Africa', 'Sao Tome and Principe': 'Africa', 'Senegal': 'Africa',
    'Seychelles': 'Africa', 'Sierra Leone': 'Africa', 'Somalia': 'Africa',
    'South Africa': 'Africa', 'South Sudan': 'Africa', 'Sudan': 'Africa',
    'Tanzania': 'Africa', 'Togo': 'Africa', 'Tunisia': 'Africa', 'Uganda': 'Africa',
    'Zambia': 'Africa', 'Zimbabwe': 'Africa',
    # Amérique du Nord
    'Antigua and Barbuda': 'North America', 'Bahamas': 'North America', 'Barbados': 'North America',
    'Belize': 'North America', 'Bermuda': 'North America', 'Canada': 'North America',
    'Costa Rica': 'North America', 'Cuba': 'North America', 'Dominica': 'North America',
    'Dominican Republic': 'North America', 'El Salvador': 'North America', 'Grenada': 'North America',
    'Guatemala': 'North America', 'Haiti': 'North America', 'Honduras': 'North America',
    'Jamaica': 'North America', 'Mexico': 'North America', 'Nicaragua': 'North America',
    'Panama': 'North America', 'Saint Kitts and Nevis': 'North America', 'Saint Lucia': 'North America',
    'Saint Vincent and the Grenadines': 'North America', 'Trinidad and Tobago': 'North America',
    'United States': 'North America',
    # Amérique du Sud
    'Argentina': 'South America', 'Bolivia': 'South America', 'Brazil': 'South America',
    'Chile': 'South America', 'Colombia': 'South America', 'Ecuador': 'South America',
    'Guyana': 'South America', 'Paraguay': 'South America', 'Peru': 'South America',
    'Suriname': 'South America', 'Uruguay': 'South America', 'Venezuela': 'South America',
    # Océanie
    'Australia': 'Oceania', 'Fiji': 'Oceania', 'Kiribati': 'Oceania', 'Marshall Islands': 'Oceania',
    'Micronesia': 'Oceania', 'Nauru': 'Oceania', 'New Zealand': 'Oceania',
    'Palau': 'Oceania', 'Papua New Guinea': 'Oceania', 'Samoa': 'Oceania',
    'Solomon Islands': 'Oceania', 'Tonga': 'Oceania', 'Tuvalu': 'Oceania',
    'Vanuatu': 'Oceania',
    # Continent-level pseudo-pays
    'Europe': 'Europe',
    'Asia': 'Asia',
    'Africa': 'Africa',
    'North America': 'North America',
    'South America': 'South America',
    'Oceania': 'Oceania',
}

In [20]:
def plot_emissions_per_capita_by_region(capita_df, excluded_categories=['all']):

    # Reinigung
    capita_clean = exclude_entities(capita_df, excluded_categories)

    # Mapping
    capita_clean['Region'] = capita_clean['Entity'].map(entity_to_region)
    capita_clean = capita_clean.dropna(subset=['Region'])

    # Mean des Verbrauchs / Einwohner & Region
    df_grouped = capita_clean.groupby(['Year', 'Region'])['Co2_capita_emissions'].mean().reset_index()

    # Graphique
    fig4 = px.area(
        df_grouped,
        x='Year',
        y='Co2_capita_emissions',
        color='Region',
        labels={
            'Year': 'Jahr',
            'Co2_capita_emissions': 'CO₂-Emissionen pro Kopf (Tonnen)',
            'Region': 'Region'
        },
        title='Durchschnittliche CO₂-Emissionen pro Kopf nach Region'
    )

    fig4.update_layout(template='plotly_white')
    fig4.show()
plot_emissions_per_capita_by_region(capita_co2_emmissions)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



 Wo sollte Russland hin?

### 8. Top 20 Laender nach Co2 Verbrauch pro Kopf

In [21]:

def barplot_top_capita_emitters(capita_df, total_df, year=2020, excluded_categories=['all']):

    # Reinigung
    capita_clean = exclude_entities(capita_df, excluded_categories)
    total_clean = exclude_entities(total_df, excluded_categories)

    # Jahr Filtern
    capita_year = capita_clean[capita_clean['Year'] == year]
    total_year = total_clean[total_clean['Year'] == year]

    # Zusammenfügen
    merged = capita_year.merge(
        total_year[['Entity', 'Year', 'Co2_emissions']],
        on=['Entity', 'Year'],
        how='inner'
    )

    merged = merged[merged['Co2_capita_emissions'] > 0].copy()
    top20_capita = merged.nlargest(20, 'Co2_capita_emissions').copy()

  
    # Bevoeklerung in Mio fuer bessere Lesbarkeit
    top20_capita['Bevölkerung (Mio)'] = (top20_capita['Co2_emissions'] / top20_capita['Co2_capita_emissions'] / 1e6).round(2)
    #Co2 Verbrauch in Gt:

    fig = px.bar(
        top20_capita,
        x='Co2_capita_emissions',
        y='Entity',
        orientation='h',
        color= 'Entity',
        text='Co2_capita_emissions',
        hover_data={
            'Bevölkerung (Mio)': True,
            'Co2_capita_emissions': ':.2f',
            'Co2_emissions': ':.0f',
        },
        labels={
            'Entity': 'Land',
            'Co2_capita_emissions': 'CO₂-Emissionen pro Kopf (Tonnen)',
        },
        title=f'Top 20 Länder nach CO₂-Emissionen pro Kopf ({year})'
    )

    fig.update_traces(texttemplate='%{x:.2f}', textposition='outside')
    fig.update_layout(
        template='plotly_white',
        xaxis_title='CO₂-Emissionen pro Kopf (Tonnen)',
        yaxis_title='Land',
        margin=dict(t=80, l=80, r=30, b=60)
    )
    fig.show()

# Los geht's!
barplot_top_capita_emitters(capita_co2_emmissions, co2_emissions, year=2020)


### 9. Vergleich Methan- und Lachgasemissionen pro Kopf nach Region

In [22]:
def prepare_ghg_df(capita_df, total_df, colname, gas_label, region_map, excluded):
    df = exclude_entities(capita_df, excluded)
    total = exclude_entities(total_df, excluded)

    df = df.merge(total[['Entity', 'Year', 'Co2_emissions']], on=['Entity', 'Year'], how='inner')
    df = df[df[colname] > 0].copy()
    df['population_est'] = df['Co2_emissions'] / df[colname]
    df['Region'] = df['Entity'].map(region_map)
    df = df.dropna(subset=['Region'])

    grouped = df.groupby(['Year', 'Region']).apply(
        lambda g: (g[colname] * g['population_est']).sum() / g['population_est'].sum()
    ).reset_index(name='Emissionen pro Kopf')
    grouped['Gas'] = gas_label

    return grouped


In [23]:
excluded = ['all']

df_methane = prepare_ghg_df(capita_methane, co2_emissions, 'Co2_capita_methane', 'Methan (CH₄)', entity_to_region, excluded)
df_n2o = prepare_ghg_df(capita_nitrous_oxide, co2_emissions, 'Co2_capita_nitrous_oxide', 'Lachgas (N₂O)', entity_to_region, excluded)

df_combined = pd.concat([df_methane, df_n2o])

fig = px.line(
    df_combined,
    x='Year',
    y='Emissionen pro Kopf',
    color='Region',
    line_dash='Gas',
    labels={
        'Emissionen pro Kopf': 'Emissionen pro Kopf (Tonnen CO₂e)',
        'Year': 'Jahr',
        'Region': 'Region',
        'Gas': 'Gas'
    },
    title='Vergleich Methan- und Lachgasemissionen pro Kopf nach Region'
)
fig.update_layout(template='plotly_white')
fig.show()



invalid value encountered in scalar divide




invalid value encountered in scalar divide





### 10. Zusammenhang zwischen Co2-Emmisionen und globaler Temparatur

In [24]:
import plotly.graph_objects as go

def plot_temp_vs_emissions_dual_axis(co2_df, temp_df):
    # Filter auf Welt
    co2_world = co2_df[co2_df['Entity'] == 'World']
    temp_world = temp_df[temp_df['Entity'] == 'World']

    # Fusion
    merged = co2_world.merge(temp_world, on='Year', how='inner')
    merged = merged.rename(columns={
        'Co2_emissions': 'CO₂-Emissionen (Gt)',
        'Co2_fossil_ressources_mean': 'Δ Temperatur (°C)'
    })

    # Grafik
    fig = go.Figure()

    # Trace 1 : CO₂-Emissionen
    fig.add_trace(go.Scatter(
        x=merged['Year'],
        y=merged['CO₂-Emissionen (Gt)'],
        name='CO₂-Emissionen (Gt)',
        line=dict(color='firebrick'),
        yaxis='y1'
    ))

    # Trace 2 : Temperatur
    fig.add_trace(go.Scatter(
        x=merged['Year'],
        y=merged['Δ Temperatur (°C)'],
        name='Δ Temperatur (°C)',
        line=dict(color='royalblue', dash='dash'),
        yaxis='y2'
    ))

    # Layout
    fig.update_layout(
        title='Zusammenhang zwischen CO₂-Emissionen und globaler Temperatur',
        template='plotly_white',
        xaxis=dict(title='Jahr'),
        yaxis=dict(
            title=dict(text='CO₂-Emissionen (Gt)', font=dict(color='firebrick')),
            tickfont=dict(color='firebrick')
        ),
        yaxis2=dict(
            title=dict(text='Δ Temperatur (°C)', font=dict(color='royalblue')),
            tickfont=dict(color='royalblue'),
            anchor='x',
            overlaying='y',
            side='right'
        ),
        legend=dict(x=0.02, y=0.95)
    )

    fig.show()


In [25]:
plot_temp_vs_emissions_dual_axis(co2_emissions, fossil_ressources_mean)


### Bonus : Entwicklung der Top-10 Co2 Verbraucher 1750-2023

In [55]:
def plot_top_emitters_rank_over_time(
    co2_df, excluded_categories=None, top_n=10, show_labels=True, min_year=None
):
    if excluded_categories is None:
        excluded_categories = ['all']

    # Unerwünschte Kategorien entfernen
    df = exclude_entities(co2_df, excluded_categories)
    df = df[df['Co2_emissions'] > 0].copy()
    if min_year:
        df = df[df['Year'] >= min_year]
        
    # Jährliche Rangliste erstellen
    df_ranked = (
        df.groupby('Year')
        .apply(lambda g: g.sort_values('Co2_emissions', ascending=False).assign(Rang=range(1, len(g)+1)))
        .reset_index(drop=True)
    )

    # Top-N Länder auswählen, die am häufigsten im Top-N vertreten sind
    top_entities = (
        df_ranked[df_ranked['Rang'] <= top_n]['Entity']
        .value_counts()
        .head(top_n)
        .index
    )
    df_top = df_ranked[df_ranked['Entity'].isin(top_entities)]
    df_top = df_top[df_top['Rang'] <= 25]

    # Namen der Länder nur im letzten Jahr anzeigen
    if show_labels:
        last_year = df_top['Year'].max()
        df_top['Text'] = df_top.apply(lambda row: row['Entity'] if row['Year'] == last_year else "", axis=1)
    else:
        df_top['Text'] = ""

    # Linien-Plot erstellen
    fig = px.line(
        df_top,
        x='Year',
        y='Rang',
        color='Entity',
        markers=True,
        text='Text',  # Länderkürzel am Ende anzeigen
        labels={
            'Year': 'Jahr',
            'Rang': 'Platz im CO₂-Ranking',
            'Entity': 'Land'
        },
        title=f'Entwicklung der Top-{top_n} CO₂-Verbraucher im Zeitverlauf'
    )

    # Layout anpassen
    fig.update_layout(
        template='plotly_white',
        yaxis=dict(
            autorange='reversed',
            dtick=1,
            range=[1, 25],
            title='Rang',
            tickmode='linear',
            tickfont=dict(size=11)
        ),
        xaxis=dict(title='Jahr', tickfont=dict(size=11)),
        legend=dict(font=dict(size=11)),
        margin=dict(l=60, r=30, t=60, b=40),
        hovermode='x unified'
    )

    fig.update_traces(
        mode='lines+markers+text',
        textposition='middle right',
        line=dict(width=2.5)
    )
    fig.update_layout(xaxis=dict(rangeslider=dict(visible=True)),
                      yaxis_title = 'PLatz')

    fig.show()


In [56]:
plot_top_emitters_rank_over_time(co2_emissions)






## Fazit
---

Mehr Zeit = bessere Daten (kein json benutzt, kaum ueber Temparatur geredet, gas nur oberflaechlich...)

jetzt fangt eigentlich der Job an : 

- was sagen eigentlich diese Nummern?
- Welches Zusammenhang gibt es zu Ereignisse
- Dash Darstellung
- ...