In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px


file_path = r"C:\Users\chris\Corporate_Environmental_Impact\data\raw\Scope-3-Environmental-Impact-Data-2022.xlsx"

df = pd.read_excel(file_path, sheet_name="0%")

# Zeilen mit fehlenden Werten entfernen
df=df.dropna()

# Nicht relevante Spalten entfernen (Angaben ob Wert geschätzt oder für ML verwendet wurde)
df = df.loc[:, ~df.columns.str.contains('Imputed|Set', case=False)]
print(df.columns)



In [None]:
# Wieviel Umsatz machen Unternehmen im Verhältnis zu den Emissionen? 

# In positive Werte umwandeln
df_eff['GHG Intensity (Sales)'] = df_eff['GHG Intensity (Sales)'].abs()
df_eff['Total GHG Environmental Impact (Scope 1, 2, 3)'] = df_eff['Total GHG Environmental Impact (Scope 1, 2, 3)'].abs()

# Geschätzter Umsatz berechnen
df_eff['Estimated Revenue'] = df_eff['Total GHG Environmental Impact (Scope 1, 2, 3)'] / df_eff['GHG Intensity (Sales)']

# Nur sinnvolle, positive Werte behalten
df_eff = df_eff[
    df_eff['Estimated Revenue'] > 0
]

# Streudiagramm
plt.figure(figsize=(10, 7))
plt.scatter(
    df_eff['Estimated Revenue'],
    df_eff['Total GHG Environmental Impact (Scope 1, 2, 3)'],
    alpha=0.5
)

plt.xscale('log')
plt.yscale('log')
plt.xlabel('Geschätzter Umsatz (log)')
plt.ylabel('GHG Impact (log, USD)')
plt.title('Klimaeffizienz: Umsatz vs. GHG Impact')
plt.grid(True, which="both", ls="--", alpha=0.5)
plt.tight_layout()
plt.savefig("turnover_efficiency_scatter.png")
plt.show()


In [None]:


df['GHG Intensity (Sales)'] = df['GHG Intensity (Sales)'].abs()
df['Total GHG Environmental Impact (Scope 1, 2, 3)'] = df['Total GHG Environmental Impact (Scope 1, 2, 3)'].abs()

df['Estimated Revenue'] = df['Total GHG Environmental Impact (Scope 1, 2, 3)'] / df['GHG Intensity (Sales)']
df = df[df['Estimated Revenue'] > 0]

# Interaktives Plotly-Diagramm erstellen
fig = px.scatter(
    df,
    x='Estimated Revenue',
    y='Total GHG Environmental Impact (Scope 1, 2, 3)',
    hover_name='Company Name',
    hover_data=['Country', 'Industry (Exiobase)', 'Year'],
    log_x=True,
    log_y=True,
    title='Klimaeffizienz: Umsatz vs. GHG Impact (interaktiv)',
    labels={
        'Estimated Revenue': 'Geschätzter Umsatz',
        'Total GHG Environmental Impact (Scope 1, 2, 3)': 'GHG Impact (USD)'
    }
)

fig.update_traces(marker=dict(size=6, opacity=0.5))
fig.update_layout(height=700)
fig.write_html("../revenue_vs_emissions.html")
fig.show()


In [None]:
df['IsGerman'] = df['Country'].str.strip().str.lower() == 'germany'

fig_compare = px.scatter(
    df,
    x='Estimated Revenue',
    y='Total GHG Environmental Impact (Scope 1, 2, 3)',
    color='IsGerman',
    hover_name='Company Name',
    hover_data=['Country', 'Industry (Exiobase)', 'Year'],
    log_x=True,
    log_y=True,
    title='Klimaeffizienz: Deutschland im internationalen Vergleich',
    labels={
        'Estimated Revenue': 'Geschätzter Umsatz',
        'Total GHG Environmental Impact (Scope 1, 2, 3)': 'GHG Impact (USD)',
        'IsGerman': 'Deutsches Unternehmen'
    },
    color_discrete_map={True: 'green', False: 'lightgray'}
)

fig_compare.update_traces(marker=dict(size=6, opacity=0.5))
fig_compare.update_layout(height=700)
fig_compare.write_html("../revenue_vs_emissions_compare_germany.html")
fig_compare.show()
