In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from functions_hypo_2 import *

### Read and Clean Main Data File

In [94]:
df_main = pd.read_csv('data/df_main.csv').drop(columns=["Unnamed: 0"])

In [95]:
df_main.head(3)

Unnamed: 0,country_name,date,parameter,product,value,unit,year
0,Austria,12/1/2023,Net Electricity Production,Electricity,6783,GWh,2023
1,Austria,12/1/2023,Net Electricity Production,Total Combustible Fuels,1747,GWh,2023
2,Austria,12/1/2023,Net Electricity Production,"Coal, Peat and Manufactured Gases",167,GWh,2023


### Read, Clean and Merge Demographic Data

Load population and area data for EU countries:
- Population Data for each Country: [Eurostat](https://ec.europa.eu/eurostat/databrowser/view/DEMO_GIND__custom_7127262/default/table)
- Area Data for each Country: [Eurostat](https://ec.europa.eu/eurostat/databrowser/view/reg_area3__custom_11352231/bookmark/table?lang=en&bookmarkId=fabcfca6-4abb-4a84-ac1c-7bb335af436a)

In [96]:
# Load demographic data (Population and Area)
df_pop = pd.read_csv("data/eu_country_population.csv")
df_area = pd.read_csv("data/eu_country_area.csv")

# Rename columns
df_pop = df_pop.rename(columns={'OBS_VALUE': "population"})
df_area = df_area.rename(columns={'OBS_VALUE': "area_[km2]"})

# Drop the row for EU27
df_pop = df_pop[df_pop['geo'] != "EU27_2020"]
df_area = df_area[df_area['geo'] != "EU27_2020"]

In [97]:
# Create dataframe with one row per country and aggregate demographic information
df_countries = df_pop[['geo', 'population']].merge(df_area[['geo', 'area_[km2]']], how='inner', on='geo')
country_codes_dict = get_country_codes_dict() # Get dictionary translating country codes to country names
df_countries = df_countries.rename(columns={'geo': 'country_code'}) # Rename country code column
df_countries['country_name'] = df_countries['country_code'].map(country_codes_dict) # Create column with country names
df_countries.head(3)

Unnamed: 0,country_code,population,area_[km2],country_name
0,AT,9104772,83878,Austria
1,BE,11742796,30667,Belgium
2,BG,6447710,110996,Bulgaria


### Add further Columns

Building up from the countries demographic data frame created above, the radiation categories and electricity production (solar and total) data are included in further columns. In addition, intensity figures are calculated and stored in separate columns

In [98]:
# Add data about solar radiation classification
eu_countries_solar_radiation_classification_dict = get_solar_class_dict() # Get dictionary with classification info
df_countries['solar_class'] = df_countries['country_name'].map(eu_countries_solar_radiation_classification_dict)

In [99]:
# Add data from df_main (solar electricity production)
df_pivot = df_main.loc[(df_main["product"] == "Solar") & (df_main["year"] > 2013)].pivot_table(index="country_name", values="value", aggfunc="mean")*365
df_countries = df_countries.merge(df_pivot, how='inner', on='country_name')
df_countries = df_countries.rename(columns={'value': 'solar_power_[GWh]'})

In [100]:
# Add data from df_main (total electricity production))
df_pivot = df_main.loc[(df_main["product"] == "Electricity") & (df_main["year"] > 2013)].pivot_table(index="country_name", values="value", aggfunc="mean")*365
df_countries = df_countries.merge(df_pivot, how='inner', on='country_name')
df_countries = df_countries.rename(columns={'value': 'total_annual_electricity_[GWh]'})

In [101]:
# Calculate solar power intensity (production per area)
df_countries['solar_power_intensity_[GWh_per_km2]'] = df_countries['solar_power_[GWh]'] / df_countries["area_[km2]"]
df_countries['solar_power_intensity_[GWh_per_1000_capita]'] = df_countries['solar_power_[GWh]'] / df_countries["population"] * 1000
df_countries['solar_share_in_total_electricity_[%]'] = df_countries['solar_power_[GWh]'] / df_countries["total_annual_electricity_[GWh]"] * 100
df_countries.head(3)

Unnamed: 0,country_code,population,area_[km2],country_name,solar_class,solar_power_[GWh],total_annual_electricity_[GWh],solar_power_intensity_[GWh_per_km2],solar_power_intensity_[GWh_per_1000_capita],solar_share_in_total_electricity_[%]
0,AT,9104772,83878,Austria,2: low,51382.875,944443.6,0.612591,5.64351,5.440545
1,BE,11742796,30667,Belgium,2: low,138383.666667,1004391.0,4.512462,11.784559,13.777871
2,BG,6447710,110996,Bulgaria,3: medium,51356.851852,1262052.0,0.462691,7.965131,4.069314


### Data Visualizations

#### Country Solar Radiation Categorization Map
In order to visualize the Country Solar Radiation Categorization, a map is created, which depicts all considered countries in a color depending on their respective category. The colors are defined in a way, so that green represents countries with low solar radiation, and red represents countries with high solar radiation. The categorization itself is defined further above. In subsequent charts, the countries' categories are indicated by applying the colormap to the color of the bars.

In [11]:
# Define Color to Visualize Country Solar Radiation Categories
color_dict = {
    '1: very low': '#00FF00',
    '2: low': '#ADFF2F',
    '3: medium': '#FFFF00',
    '4: high': '#FFA500',
    '5: very high': '#FF0000'
}

In [76]:
# Create an plot the Map
fig = px.choropleth(df_countries.sort_values('solar_class'), 
                    locations='country_name', 
                    locationmode='country names',
                    color='solar_class', # categorize colors by solar class
                    color_discrete_map=color_dict, # apply specified colors to color categories
                    title='Country Categorization by Solar Radiation',
                    projection='natural earth',
                    scope='europe',
                    labels={'solar_class': 'Radiation Category'}) # Legend label

fig.update_layout(height=600, width=800, showlegend=True, # set size of the figure
                  title={'y': 0.83, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}) # place title in center of the map

fig.show()


#### Barcharts showing the Average Annual Solar Net Electricity Production

In order to verify the hypothesis, the countries' average annual solar net electricity productions in relation to their area are compared. The figure displays the data aggregated per radiation category. Countries categorized for higher radiation tend to have higher solar electricity production rates. Countries in the category "low", however, on average have a higher solar electricity production rate than countries in the categories "medium" or "high".

In [77]:
df_pivot_class = df_countries.pivot_table(index="solar_class", values=["solar_power_intensity_[GWh_per_km2]"], aggfunc="mean")

fig = px.bar(df_pivot_class, x=df_pivot_class.index, y="solar_power_intensity_[GWh_per_km2]",
             color=df_pivot_class.index,
             color_discrete_map=color_dict,
             title='Average Annual Solar Net Electricity Production per Area')

fig.update_layout(height=500, width=800, showlegend=False,
                  title={'y': 0.85, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'})

fig.update_xaxes(title_text="Solar Radiation Category")
fig.update_yaxes(title_text="GWh/km²")

fig.show()

In order to investigate the above findings further, the following figure shows the same data without category aggregation, but separately for each country. It ca be seen, that the "low"-radiation countries' solar electricity production rates are dominated by three countries: Netherlands, Belgium and Germany. Each one of these show a higher rate than the "very high"-radiation countries Spain and Cyprus. In the "very low"-radiation category, Denmark shows a solar rate comparable to the ones of "medium"-radiation. The country with the highest solar rate per area by far is the "very high"-radiation country Malta. 

In [78]:
fig = px.bar(df_countries.sort_values(['solar_class', 'solar_power_intensity_[GWh_per_km2]']), x='country_name', y='solar_power_intensity_[GWh_per_km2]',
             color='solar_class',
             color_discrete_map=color_dict,
             labels={'solar_class': 'Radiation Category', 'country_code': "Country Code", 'solar_power_intensity_[GWh_per_km2]': 'Solar Electricity Production per km²'},
             title='Average Annual Solar Net Electricity Production per Area')

fig.update_yaxes(range=[0, 7], title="GWh/km²")
fig.update_xaxes(title_text="")
fig.update_layout(height=450, width=800, showlegend=True,
                  title={'y': 0.85, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
                  legend={'y': 0.9, 'x': 0.15, 'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(tickangle=-45)

fig.show()


In [84]:
fig = px.bar(df_countries.sort_values(['solar_class', 'solar_power_intensity_[GWh_per_1000_capita]']), x='country_name', y='solar_power_intensity_[GWh_per_1000_capita]',
             color='solar_class',
             color_discrete_map=color_dict,
             labels={'solar_class': 'Radiation Category', 'country_code': "Country Code", 'solar_power_intensity_[GWh_per_1000_capita]': 'Solar Electricity Production per 1000 People'},
             title='Average Annual Solar Net Electricity Production per Capita')

fig.update_yaxes(range=[0, 18], title="GWh/1000 People")
fig.update_xaxes(title_text="")
fig.update_layout(height=450, width=800, showlegend=True,
                  title={'y': 0.85, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
                  legend={'y': 0.9, 'x': 0.15, 'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(tickangle=-45)

fig.show()


In [83]:
fig = px.bar(df_countries.sort_values(['solar_class', 'solar_share_in_total_electricity_[%]']), x='country_name', y='solar_share_in_total_electricity_[%]',
             color='solar_class',
             color_discrete_map=color_dict,
             labels={'solar_class': 'Radiation Category', 'country_code': "Country Code", 'solar_share_in_total_electricity_[%]': 'Share in Total Electricity'},
             title='Average Annual Solar Net Electricity Production per Total Electricity')

fig.update_yaxes(range=[0, 28], title="%")
fig.update_xaxes(title_text="")
fig.update_layout(height=450, width=800, showlegend=True,
                  title={'y': 0.85, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
                  legend={'y': 0.9, 'x': 0.15, 'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(tickangle=-45)

fig.show()