# Validity Checks
This section tests the results in various ways to find errors
## Visual validity check
### Sum of capacity by fuel type

In [None]:
pivot_fuel_capacity = pd.pivot_table(
                        plantlist, 
                        values='capacity', 
                        index='fuel', 
                        aggfunc=[np.sum]
                        )
pivot_fuel_capacity.sort_values(by='sum', inplace=True, ascending=0)
#pivot_fuel_capacity
pivot_fuel_capacity_plot=pivot_fuel_capacity.plot(kind='bar', legend=False, figsize=(12, 6))
pivot_fuel_capacity_plot.set_ylabel("MW")
pivot_fuel_capacity_plot

### Capacities by plant status

In [None]:
pivot_status_capacity = pd.pivot_table(
                        plantlist, 
                        values='capacity',
                        columns='status',
                        index='fuel', 
                        aggfunc=np.sum
                        )
pivot_status_capacity.sort_values(by='operating', inplace=True, ascending=0)
#pivot_status_capacity
pivot_status_capacity_plot=pivot_status_capacity.plot(kind='barh', stacked=True,legend=True, figsize=(12, 6))
pivot_status_capacity_plot.set_xlabel("MW")
pivot_status_capacity_plot

### Power plant age

In [None]:
plantlist_filtered = plantlist#[plantlist.fuel=='coal']
pivot_age_capacity = pd.pivot_table(
                        plantlist_filtered, 
                        values='capacity',
                        columns='fuel',
                        index='commissioned', 
#                        index='fuel',
#                        columns='commissioned',     
                        aggfunc=np.sum,
                        dropna=True
                        )
#pivot_age_capacity
pivot_age_capacity_plot=pivot_age_capacity.plot(kind='bar', stacked=True,legend=True, figsize=(17, 10))
pivot_age_capacity_plot.set_ylabel("MW")
pivot_age_capacity_plot

### Block size vs year of commissioning
This chart is suitable to check outliers of commissioning years and block sizes. 
In theory, there should be no unexpected values, e.g. all commissioning years should be greater than 1900. 
Block sizes above 2000 MW are also unlikely.

In [None]:
plantlist_for_plot = plantlist.copy(deep=True)
plantlist_for_plot['capacity_float'] = pd.to_numeric(plantlist_for_plot['capacity'], errors='coerce')
plantlist_for_plot['commissioned_float'] = pd.to_numeric(plantlist_for_plot['commissioned'], errors='coerce')
plot_blocksize_year = Scatter(plantlist_for_plot, 
                              notebook=True, 
                              x='commissioned_float', 
                              y='capacity_float',
                              color='fuel', 
                              title='Block-Size vs Year of Commissioning', 
                              xlabel='Year', 
                              ylabel='MW',
                              legend="top_left",
                              height=500,
                              width=700,
                             )
show(plot_blocksize_year)

## Logical checks
### Every power plant needs a capacity
List all entries with zero capacity.

In [None]:
plantlist[plantlist.capacity == 0]

### Commissioning Dates

In [1]:
#Show all Plants with commisioning dates below 1900 
plantlist[plantlist['commissioned_float'] <=1900]

NameError: name 'plantlist' is not defined

In [None]:
#Show all Plants with invalid commisioning dates
plantlist[plantlist['commissioned_float'].isnull()]

### Compare UBA and BNetzA data
#### Postcodes of BNetzA and UBA lists should match

In [None]:
# List all entries with diverging postcodes (if a postcode is given)
plantlist[(plantlist['uba_postcode'].notnull() == True) & (pd.to_numeric(plantlist.postcode, errors='coerce') != pd.to_numeric(plantlist.uba_postcode, errors='coerce'))]

#### Compare Installed capacities

In [None]:
capacitycomparison = pd.DataFrame(plantlist.capacity / plantlist.uba_capacity)
capacitycomparison['Name'] = plantlist.name
capacitycomparison['Block'] = plantlist.block
capacitycomparison['BnetzaCapacity'] = plantlist.capacity
capacitycomparison['UBACapacity'] = plantlist.uba_capacity
capacitycomparison.dropna(inplace=True)
capacitycomparison.sort_values(by=0)

#### Compare Comissioning Years

In [None]:
commissioningcomparison = pd.DataFrame(plantlist.commissioned)
commissioningcomparison['UBACommissioned'] = plantlist.uba_commissioned
commissioningcomparison['commissioned_float'] = pd.to_numeric(commissioningcomparison['commissioned'],errors='coerce')
commissioningcomparison['UBACommissioned_float'] = pd.to_numeric(commissioningcomparison['UBACommissioned'],errors='coerce')

commissioningcomparison['Faktor'] = commissioningcomparison.commissioned_float - commissioningcomparison.UBACommissioned_float
commissioningcomparison['Name'] = plantlist.name
commissioningcomparison['Block'] = plantlist.block

commissioningcomparison.dropna(subset = ['commissioned'], inplace=True)
commissioningcomparison.dropna(subset = ['UBACommissioned'], inplace=True)
commissioningcomparison = commissioningcomparison.sort_values(by='Faktor')

commissioningcomparison