In [3]:
import pandas as pd
import glob
import yfinance as yf

In [32]:
# Extract volume data specifically
sp500_volume = sp500_data['Volume'].copy()
dax_volume = dax_data['Volume'].copy()

# Create a combined volume DataFrame with proper index handling
volume_comparison = pd.DataFrame(index=sp500_volume.index)
volume_comparison['SP500_Volume'] = sp500_volume
volume_comparison['DAX_Volume'] = dax_volume

# Remove any rows where either volume is NaN
volume_comparison = volume_comparison.dropna()

print("Volume Comparison DataFrame:")
print(volume_comparison.head(10))
print(f"\nData points available: {len(volume_comparison)}")
print(f"\nSummary statistics:")
print(volume_comparison.describe())

Volume Comparison DataFrame:
            SP500_Volume   DAX_Volume
Date                                 
2015-10-23     144442300  120121400.0
2015-10-26      69033000   75352100.0
2015-10-27      77905800   83819800.0
2015-10-28     135906700   79923400.0
2015-10-29      90525500  126302200.0
2015-10-30     131076900   92030900.0
2015-11-02      86270800   95950600.0
2015-11-03      95246100   84574500.0
2015-11-04      96224500   99251200.0
2015-11-05      78408700   94791800.0

Data points available: 2467

Summary statistics:
       SP500_Volume    DAX_Volume
count  2.467000e+03  2.467000e+03
mean   8.315855e+07  8.235947e+07
std    4.134971e+07  3.421517e+07
min    2.604870e+07  0.000000e+00
25%    5.766675e+07  6.185085e+07
50%    7.372250e+07  7.638410e+07
75%    9.589530e+07  9.526120e+07
max    3.922207e+08  4.001654e+08


In [49]:
# Save outlier analysis results
outlier_summary_export = outlier_summary[outlier_summary['SP500_Outlier_Count'] + outlier_summary['DAX_Outlier_Count'] > 0].copy()
outlier_summary_export['Date_Formatted'] = outlier_summary_export['Date'].dt.strftime('%Y-%m-%d')

# Reorder columns for better readability
cols_order = ['Date_Formatted', 'SP500_Volume', 'DAX_Volume', 
              'SP500_Outlier_Count', 'DAX_Outlier_Count',
              'SP500_ZScore_Outlier', 'SP500_IQR_Outlier', 'SP500_ModZ_Outlier', 'SP500_Pct_Outlier',
              'DAX_ZScore_Outlier', 'DAX_IQR_Outlier', 'DAX_ModZ_Outlier', 'DAX_Pct_Outlier']

outlier_summary_export = outlier_summary_export[cols_order]
outlier_summary_export.to_csv('volume_outliers_analysis.csv', index=False)

print("Outlier analysis saved to 'volume_outliers_analysis.csv'")
print(f"\nTotal dates with outliers: {len(outlier_summary_export)}")
print(f"High-confidence outliers (3+ methods agree): {len(outlier_summary_export[(outlier_summary_export['SP500_Outlier_Count'] >= 3) | (outlier_summary_export['DAX_Outlier_Count'] >= 3)])}")

# Summary statistics
print(f"\nOutlier Detection Summary:")
print(f"{'Method':<20} {'SP500 Outliers':<15} {'DAX Outliers'}")
print(f"{'='*20:<20} {'='*15:<15} {'='*12}")
print(f"{'Z-Score':<20} {sp500_outliers_zscore.sum():<15} {dax_outliers_zscore.sum()}")
print(f"{'IQR':<20} {sp500_outliers_iqr.sum():<15} {dax_outliers_iqr.sum()}")
print(f"{'Modified Z-Score':<20} {sp500_outliers_mod_z.sum():<15} {dax_outliers_mod_z.sum()}")
print(f"{'Percentile':<20} {sp500_outliers_pct.sum():<15} {dax_outliers_pct.sum()}")

Outlier analysis saved to 'volume_outliers_analysis.csv'

Total dates with outliers: 238
High-confidence outliers (3+ methods agree): 0

Outlier Detection Summary:
Method               SP500 Outliers  DAX Outliers
Z-Score              69              71
IQR                  129             129
Modified Z-Score     98              75
Percentile           50              50


In [7]:
genehmigungsdauer=data_not_na[data_not_na["dauer_genehmigung"]>0]
genehmigungsdauer

Unnamed: 0,goal100_id,einheit_mastr_nummer,behoerden_einheit_id,name_windpark,name_stromerzeugungseinheit,einheit_betriebsstatus,ags_gemeinde,bundesland,landkreis,gemeinde,...,dauer_genehmigung,dauer_realisierung,id_version_download_datenset_behoerde,id_version_download_datenset_mastr,x_25832,y_25832,x_4326,y_4326,ags_bundesland,ags_landkreis
4,79220,SEE970653437198,SH-03000924205-0002,Windpark Lübeck Nord,WEA2 Pöppendorf,In Betrieb,1003000.0,Schleswig-Holstein,Lübeck,Lübeck,...,203.0,539.0,59.0,2.0,619814.179633,5.977998e+06,10.825159,53.936484,1.0,01003
6,78441,SEE948056013141,SH-51001900011-0004,Bürgerwindpark Albersdorf,WEA 4 / V 203772,In Betrieb,1051001.0,Schleswig-Holstein,Dithmarschen,Albersdorf,...,258.0,192.0,59.0,7.0,516927.977265,5.998977e+06,9.259109,54.138631,1.0,01051
7,79178,SEE969313089747,SH-51001900011-0001,Bürgerwindpark Albersdorf,WEA 1 / V 203769,In Betrieb,1051001.0,Schleswig-Holstein,Dithmarschen,Albersdorf,...,258.0,193.0,59.0,7.0,516647.993158,5.999295e+06,9.254841,54.141498,1.0,01051
8,79324,SEE973804784493,SH-51001900011-0005,Bürgerwindpark Albersdorf,WEA 5 / V 203773,In Betrieb,1051001.0,Schleswig-Holstein,Dithmarschen,Albersdorf,...,258.0,200.0,59.0,7.0,517034.002839,5.998625e+06,9.260712,54.135464,1.0,01051
9,78680,SEE954883336339,SH-51001900011-0003,Bürgerwindpark Albersdorf,WEA 3 / V 203771,In Betrieb,1051001.0,Schleswig-Holstein,Dithmarschen,Albersdorf,...,258.0,192.0,59.0,7.0,516705.970862,5.998672e+06,9.255694,54.135897,1.0,01051
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42651,24577,SEE947239808451,TH-FAWIND-GOAL100-SEE947239808451,Kraasa,Kraasa 9,In Betrieb,16077044.0,Thüringen,Altenburger Land,Starkenberg,...,311.0,412.0,23.0,48.0,729612.900483,5.653407e+06,12.271667,50.986389,16.0,16077
42653,24645,SEE965077486013,TH-FAWIND-GOAL100-SEE965077486013,Kraasa,Kraasa 8,In Betrieb,16077044.0,Thüringen,Altenburger Land,Starkenberg,...,311.0,378.0,23.0,48.0,729668.302083,5.653039e+06,12.272222,50.983056,16.0,16077
42654,24507,SEE926469864444,TH-FAWIND-GOAL100-SEE926469864444,Windpark Kraasa,VE-01-206089,In Betrieb,16077044.0,Thüringen,Altenburger Land,Starkenberg,...,279.0,57.0,23.0,48.0,730314.272403,5.653665e+06,12.281806,50.988417,16.0,16077
42661,24597,SEE953472606747,TH-FAWIND-GOAL100-SEE953472606747,WEA 05,WEA 5,In Betrieb,16077044.0,Thüringen,Altenburger Land,Starkenberg,...,608.0,300.0,23.0,48.0,729947.012664,5.653305e+06,12.276355,50.985338,16.0,16077


In [30]:
min_max_bl=pd.merge(min_dauer_bl,max_dauer_bl,on="bundesland",suffixes=("_min","_max"))
min_max_med_bl=pd.merge(median_dauer_bl,min_max_bl,on="bundesland")
min_max_med_bl=min_max_med_bl.reset_index(drop=False)
min_max_med_bl.to_csv("min_max_median_bl.csv",index=False)

In [11]:
median_time_to_genehmigung=median_time_to_genehmigung.sort_values(by="dauer_genehmigung")
median_time_to_genehmigung.head(50)

Unnamed: 0,landkreis,ags_landkreis,dauer_genehmigung
18,Eichsfeld,16061,102.0
72,Ostalbkreis,8136,184.0
25,Freudenstadt,8237,186.0
111,Südliche Weinstraße,7337,186.0
128,Worms,7319,210.0
94,Saale-Holzland-Kreis,16074,211.0
61,Neckar-Odenwald-Kreis,8225,216.0
23,Euskirchen,5366,219.0
89,Rhein-Lahn-Kreis,7141,231.0
3,Alzey-Worms,7331,244.5


In [21]:
dauer_realisierung=data[data["dauer_realisierung"]>0]
dauer_realisierung

Unnamed: 0,goal100_id,einheit_mastr_nummer,behoerden_einheit_id,name_windpark,name_stromerzeugungseinheit,einheit_betriebsstatus,ags_gemeinde,bundesland,landkreis,gemeinde,...,dauer_genehmigung,dauer_realisierung,id_version_download_datenset_behoerde,id_version_download_datenset_mastr,x_25832,y_25832,x_4326,y_4326,ags_bundesland,ags_landkreis
4,79220,SEE970653437198,SH-03000924205-0002,Windpark Lübeck Nord,WEA2 Pöppendorf,In Betrieb,1003000.0,Schleswig-Holstein,Lübeck,Lübeck,...,203.0,539.0,59.0,2.0,619814.179633,5.977998e+06,10.825159,53.936484,1.0,01003
6,78441,SEE948056013141,SH-51001900011-0004,Bürgerwindpark Albersdorf,WEA 4 / V 203772,In Betrieb,1051001.0,Schleswig-Holstein,Dithmarschen,Albersdorf,...,258.0,192.0,59.0,7.0,516927.977265,5.998977e+06,9.259109,54.138631,1.0,01051
7,79178,SEE969313089747,SH-51001900011-0001,Bürgerwindpark Albersdorf,WEA 1 / V 203769,In Betrieb,1051001.0,Schleswig-Holstein,Dithmarschen,Albersdorf,...,258.0,193.0,59.0,7.0,516647.993158,5.999295e+06,9.254841,54.141498,1.0,01051
8,79324,SEE973804784493,SH-51001900011-0005,Bürgerwindpark Albersdorf,WEA 5 / V 203773,In Betrieb,1051001.0,Schleswig-Holstein,Dithmarschen,Albersdorf,...,258.0,200.0,59.0,7.0,517034.002839,5.998625e+06,9.260712,54.135464,1.0,01051
9,78680,SEE954883336339,SH-51001900011-0003,Bürgerwindpark Albersdorf,WEA 3 / V 203771,In Betrieb,1051001.0,Schleswig-Holstein,Dithmarschen,Albersdorf,...,258.0,192.0,59.0,7.0,516705.970862,5.998672e+06,9.255694,54.135897,1.0,01051
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42663,41655,SEE905015577458,,meridian Windpark Thonhausen,WEA 4,In Betrieb,16077047.0,Thüringen,Altenburger Land,Thonhausen,...,,355.0,,48.0,735203.382956,5.636053e+06,12.340000,50.828333,16.0,16077
42664,41828,SEE929841659676,,meridian Windpark Thonhausen,WEA 7,In Betrieb,16077047.0,Thüringen,Altenburger Land,Thonhausen,...,,240.0,,48.0,735567.702820,5.635576e+06,12.344858,50.823906,16.0,16077
42665,29924,SEE924115847834,,meridian Windpark Thonhausen,WEA 6,In Betrieb,16077047.0,Thüringen,Altenburger Land,Thonhausen,...,,349.0,,48.0,735559.598844,5.635976e+06,12.345000,50.827500,16.0,16077
42666,24143,SEE924538245743,SN-09000000004-0002,Mark Sahnau / Thonhausen,Mark Sahnau WEA2,In Betrieb,16077047.0,Thüringen,Altenburger Land,Thonhausen,...,,296.0,34.0,2.0,735756.754790,5.636739e+06,12.348285,50.834268,16.0,16077


In [26]:
dauer_realisierung_mindest[dauer_realisierung_mindest.landkreis.str.contains("Erfurt")].sort_values(by="dauer_realisierung")

Unnamed: 0,goal100_id,einheit_mastr_nummer,behoerden_einheit_id,name_windpark,name_stromerzeugungseinheit,einheit_betriebsstatus,ags_gemeinde,bundesland,landkreis,gemeinde,...,dauer_genehmigung,dauer_realisierung,id_version_download_datenset_behoerde,id_version_download_datenset_mastr,x_25832,y_25832,x_4326,y_4326,ags_bundesland,ags_landkreis
41607,24600,SEE954018466771,TH-FAWIND-GOAL100-SEE954018466771,Kerspleben 2,531507,In Betrieb,16051000.0,Thüringen,Erfurt,Erfurt,...,176.0,227.0,23.0,2.0,646042.100771,5654831.0,11.082553,51.026456,16.0,16051
41608,38145,SEE959393004785,,Kerspleben 1,53474,In Betrieb,16051000.0,Thüringen,Erfurt,Erfurt,...,,274.0,,2.0,646311.13358,5654547.0,11.086272,51.023834,16.0,16051
41575,24529,SEE932471928456,TH-FAWIND-GOAL100-SEE932471928456,CEE Windpark Töttleben,CEE Windpark Töttleben WEA 1,In Betrieb,16051000.0,Thüringen,Erfurt,Erfurt,...,407.0,445.0,23.0,48.0,647644.980222,5655664.0,11.105733,51.033535,16.0,16051
41578,34940,SEE902132723218,,Windpark Möbisburg GmbH ＆ Co. KG,WEA 1 - Nr. 78418,In Betrieb,16051000.0,Thüringen,Erfurt,Erfurt,...,,857.0,,2.0,639289.628072,5641111.0,10.981073,50.90483,16.0,16051
41580,34945,SEE915327261174,,Windpark Möbisburg GmbH ＆ Co. KG,WEA 3 - Nr. 78420,In Betrieb,16051000.0,Thüringen,Erfurt,Erfurt,...,,875.0,,2.0,639175.609546,5641661.0,10.979662,50.909798,16.0,16051
41579,34943,SEE922411163153,,Windpark Möbisburg GmbH ＆ Co. KG,WEA 2 - Nr. 78419,In Betrieb,16051000.0,Thüringen,Erfurt,Erfurt,...,,896.0,,2.0,639215.811345,5641374.0,10.980124,50.907209,16.0,16051
41587,34950,SEE991418465737,,Windpark Möbisburg GmbH ＆ Co. KG,WEA 7 - Nr. 78424,In Betrieb,16051000.0,Thüringen,Erfurt,Erfurt,...,,905.0,,2.0,640007.25876,5641903.0,10.991577,50.911777,16.0,16051
41583,34949,SEE994547937892,,Windpark Möbisburg GmbH ＆ Co. KG,WEA 6 - Nr. 78423,In Betrieb,16051000.0,Thüringen,Erfurt,Erfurt,...,,906.0,,2.0,640054.008793,5641620.0,10.992133,50.909223,16.0,16051
41596,34952,SEE913659595509,,Windpark Möbisburg GmbH ＆ Co. KG,WEA 9 - Nr. 78426,In Betrieb,16051000.0,Thüringen,Erfurt,Erfurt,...,,918.0,,2.0,640447.034518,5642031.0,10.997878,50.912822,16.0,16051
41593,34954,SEE934461145102,,Windpark Möbisburg GmbH ＆ Co. KG,WEA 11 - Nr. 78428,In Betrieb,16051000.0,Thüringen,Erfurt,Erfurt,...,,919.0,,2.0,641151.260188,5641966.0,11.007864,50.912066,16.0,16051


In [91]:
median_time_to_realisierung.sort_values(by="dauer_realisierung").head(50)

Unnamed: 0,landkreis,ags_landkreis,dauer_realisierung
107,Krefeld,5114,62.0
183,Rhein-Lahn-Kreis,7141,64.0
251,Worms,7319,69.5
7,Ammerland,3451,76.0
46,Dortmund,5913,132.0
61,Freiburg im Breisgau,8311,159.0
216,Sächsische Schweiz-Osterzgebirge,14628,163.0
72,Grafschaft Bentheim,3456,211.0
121,Lüneburg,3355,211.0
3,Altenburger Land,16077,216.0


In [100]:
genehmigung_realisierung_merged=pd.merge(median_time_to_genehmigung,median_time_to_realisierung,on=["ags_landkreis","landkreis"])
genehmigung_realisierung_merged

Unnamed: 0,landkreis,ags_landkreis,dauer_genehmigung,dauer_realisierung
0,Alb-Donau-Kreis,08425,251.0,442.0
1,Altenburger Land,16077,311.0,216.0
2,Alzey-Worms,07331,244.5,217.0
3,Bad Dürkheim,07332,329.0,333.0
4,Bad Kreuznach,07133,309.0,472.0
...,...,...,...,...
117,Wesel,05170,598.0,317.0
118,Westerwaldkreis,07143,372.0,350.0
119,Wetteraukreis,06440,644.0,387.0
120,Worms,07319,210.0,69.5


In [109]:
genehmigung_realisierung_merged_rank_2=genehmigung_realisierung_merged_rank_1.sort_values(by="dauer_realisierung").reset_index(drop=True).reset_index()
genehmigung_realisierung_merged_rank_2=genehmigung_realisierung_merged_rank_2.rename(columns={"index":"rank_realisierung"})
genehmigung_realisierung_merged_rank_2

Unnamed: 0,rank_realisierung,rank_genehmigung,landkreis,ags_landkreis,dauer_genehmigung,dauer_realisierung
0,0,8,Rhein-Lahn-Kreis,07141,231.0,64.0
1,1,4,Worms,07319,210.0,69.5
2,2,27,Altenburger Land,16077,311.0,216.0
3,3,29,Oberhavel,12065,316.0,216.5
4,4,9,Alzey-Worms,07331,244.5,217.0
...,...,...,...,...,...,...
117,117,84,Hagen,05914,562.0,826.0
118,118,119,Sigmaringen,08437,1400.0,889.5
119,119,121,Erfurt,16051,2996.0,920.0
120,120,79,Märkischer Kreis,05962,543.0,950.0


In [112]:
genehmigung_realisierung_merged_rank_2.sort_values(by="mean_rank").head(20)

Unnamed: 0,rank_realisierung,rank_genehmigung,landkreis,ags_landkreis,dauer_genehmigung,dauer_realisierung,mean_rank
1,1,4,Worms,7319,210.0,69.5,2.5
0,0,8,Rhein-Lahn-Kreis,7141,231.0,64.0,4.0
4,4,9,Alzey-Worms,7331,244.5,217.0,6.5
9,9,5,Saale-Holzland-Kreis,16074,211.0,244.0,7.0
20,20,1,Ostalbkreis,8136,184.0,297.0,10.5
17,17,12,Neunkirchen,10043,254.0,283.0,14.5
22,22,7,Euskirchen,5366,219.0,307.0,14.5
2,2,27,Altenburger Land,16077,311.0,216.0,14.5
3,3,29,Oberhavel,12065,316.0,216.5,16.0
12,12,21,Nordfriesland,1054,284.0,266.0,16.5


In [114]:
genehmigung_realisierung_merged_rank_2.to_csv("genehmigungen_realisierung.csv",index=False)