## Merge cleaned Eurostat and Google Trends Data

In [63]:
import pandas as pd

# read dfs
df_eurostat = pd.read_csv("../data/processed/eurostat_cleaned_2010_2024.csv")
df_google_trends = pd.read_csv("../data/processed/google_trends_migration_yearly.csv")

# rename first column to 'year' for both dfs
df_eurostat.rename(columns={df_eurostat.columns[0]: 'year'}, inplace=True)
df_google_trends.rename(columns={df_google_trends.columns[0]: 'year'}, inplace=True)

# check heads
print(df_eurostat.head())
print(df_google_trends.head())

                        year        2010        2011        2012        2013  \
0         net migration rate         1.6         3.7         4.9         5.6   
1  natural population change   -180821.0   -189643.0   -196038.0   -211756.0   
2              net migration    130166.0    295478.0    391884.0    455473.0   
3   total population (1 Jan)  81802257.0  80222065.0  80327900.0  80523746.0   

         2014        2015        2016        2017        2018        2019  \
0         7.2        14.3         5.6         5.1         4.8         3.7   
1   -153429.0   -187625.0   -118761.0   -147371.0   -167351.0   -161430.0   
2    583503.0   1165772.0    464730.0    418069.0    394213.0    308928.0   
3  80767463.0  81197537.0  82175684.0  82521653.0  82792351.0  83019213.0   

         2020        2021        2022        2023        2024  
0         2.4         3.7         2.5         8.1         5.4  
1   -212428.0   -228195.0   -327522.0   -335217.0   -330641.0  
2    200748.0    3102

In [64]:
# combine both dfs on 'year' column
df_combined = pd.concat([df_eurostat, df_google_trends], ignore_index=True)
df_combined

Unnamed: 0,year,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,net migration rate,1.6,3.7,4.9,5.6,7.2,14.3,5.6,5.1,4.8,3.7,2.4,3.7,2.5,8.1,5.4
1,natural population change,-180821.0,-189643.0,-196038.0,-211756.0,-153429.0,-187625.0,-118761.0,-147371.0,-167351.0,-161430.0,-212428.0,-228195.0,-327522.0,-335217.0,-330641.0
2,net migration,130166.0,295478.0,391884.0,455473.0,583503.0,1165772.0,464730.0,418069.0,394213.0,308928.0,200748.0,310288.0,208899.0,672761.0,451736.0
3,total population (1 Jan),81802260.0,80222060.0,80327900.0,80523750.0,80767460.0,81197540.0,82175680.0,82521650.0,82792350.0,83019210.0,83166710.0,83155030.0,83237120.0,83118500.0,83456040.0
4,Asyl,0.0,0.0,0.417,0.667,1.25,3.333,2.0,1.083,1.083,1.0,1.0,0.917,1.0,1.0,1.0
5,Flüchtlinge,0.0,0.5,0.333,1.083,2.417,29.333,23.75,10.167,7.667,4.333,3.583,2.583,6.25,3.583,2.667
6,Migration,2.083,2.25,2.0,2.0,2.0,2.667,2.5,2.167,2.25,2.0,2.0,2.0,2.167,2.5,3.083
7,Ausländer,2.0,2.083,2.0,2.0,2.083,2.5,2.083,2.0,2.0,2.083,2.0,2.0,2.0,2.25,3.083
8,Remigration,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25


In [65]:
# rename rows 
df_combined["year"] = df_combined["year"].replace({
    "net migration rate": "Netto-Migrationsrate",
    "natural population change": "Natürliche Bevölkerungsänderung",
    "net migration": "Netto-Migration",
    "total population (1 Jan)": "Gesamtbevölkerung (1. Jan)",
    "Asyl": "Suchinteresse Asyl",
    "Flüchtlinge": "Suchinteresse Flüchtlinge",
    "Migration": "Suchinteresse Migration",
    "Ausländer": "Suchinteresse Ausländer",
    "Remigration": "Suchinteresse Remigration"
})
# rename column
df_combined.rename(columns={"year": "Jahr"}, inplace=True)
df_combined

Unnamed: 0,Jahr,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,Netto-Migrationsrate,1.6,3.7,4.9,5.6,7.2,14.3,5.6,5.1,4.8,3.7,2.4,3.7,2.5,8.1,5.4
1,Natürliche Bevölkerungsänderung,-180821.0,-189643.0,-196038.0,-211756.0,-153429.0,-187625.0,-118761.0,-147371.0,-167351.0,-161430.0,-212428.0,-228195.0,-327522.0,-335217.0,-330641.0
2,Netto-Migration,130166.0,295478.0,391884.0,455473.0,583503.0,1165772.0,464730.0,418069.0,394213.0,308928.0,200748.0,310288.0,208899.0,672761.0,451736.0
3,Gesamtbevölkerung (1. Jan),81802260.0,80222060.0,80327900.0,80523750.0,80767460.0,81197540.0,82175680.0,82521650.0,82792350.0,83019210.0,83166710.0,83155030.0,83237120.0,83118500.0,83456040.0
4,Suchinteresse Asyl,0.0,0.0,0.417,0.667,1.25,3.333,2.0,1.083,1.083,1.0,1.0,0.917,1.0,1.0,1.0
5,Suchinteresse Flüchtlinge,0.0,0.5,0.333,1.083,2.417,29.333,23.75,10.167,7.667,4.333,3.583,2.583,6.25,3.583,2.667
6,Suchinteresse Migration,2.083,2.25,2.0,2.0,2.0,2.667,2.5,2.167,2.25,2.0,2.0,2.0,2.167,2.5,3.083
7,Suchinteresse Ausländer,2.0,2.083,2.0,2.0,2.083,2.5,2.083,2.0,2.0,2.083,2.0,2.0,2.0,2.25,3.083
8,Suchinteresse Remigration,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25


#### Eurobarometer Data

In [66]:
df_eurobarometer = pd.read_csv("../data/raw/eurobarometer.csv")

# clean eurobarometer
df_eurobarometer.columns = df_eurobarometer.columns.str.strip()
df_eurobarometer = df_eurobarometer.drop(columns=["identifier"])
df_eurobarometer = df_eurobarometer.rename(columns={"year": "Jahr", "opinion_percentage": "Meinungsprozentsatz"})

# pivot eurobarometer
df_eurobarometer_pivot = (
    df_eurobarometer
    .set_index("Jahr")["Meinungsprozentsatz"]
    .to_frame()
    .T
)

df_eurobarometer_pivot = df_eurobarometer_pivot.reindex(sorted(df_eurobarometer_pivot.columns), axis=1)
df_eurobarometer_pivot.to_csv("../data/processed/eurobarometer_pivot.csv", index=False)
print("saved")

saved


In [71]:
# create a series from the pivoted eurobarometer df -> turn it into a row -> insert year column -> concat with combined df
# Fix column types 
df_eurobarometer_pivot.columns = df_eurobarometer_pivot.columns.astype(str)


series_eurobarometer = df_eurobarometer_pivot.iloc[0]

row = series_eurobarometer.to_frame().T
row.insert(0, "Jahr", series_eurobarometer.name)

df_final = pd.concat([df_combined, row], ignore_index=True)


df_final


Unnamed: 0,Jahr,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,Netto-Migrationsrate,1.6,3.7,4.9,5.6,7.2,14.3,5.6,5.1,4.8,3.7,2.4,3.7,2.5,8.1,5.4
1,Natürliche Bevölkerungsänderung,-180821.0,-189643.0,-196038.0,-211756.0,-153429.0,-187625.0,-118761.0,-147371.0,-167351.0,-161430.0,-212428.0,-228195.0,-327522.0,-335217.0,-330641.0
2,Netto-Migration,130166.0,295478.0,391884.0,455473.0,583503.0,1165772.0,464730.0,418069.0,394213.0,308928.0,200748.0,310288.0,208899.0,672761.0,451736.0
3,Gesamtbevölkerung (1. Jan),81802260.0,80222060.0,80327900.0,80523750.0,80767460.0,81197540.0,82175680.0,82521650.0,82792350.0,83019210.0,83166710.0,83155030.0,83237120.0,83118500.0,83456040.0
4,Suchinteresse Asyl,0.0,0.0,0.417,0.667,1.25,3.333,2.0,1.083,1.083,1.0,1.0,0.917,1.0,1.0,1.0
5,Suchinteresse Flüchtlinge,0.0,0.5,0.333,1.083,2.417,29.333,23.75,10.167,7.667,4.333,3.583,2.583,6.25,3.583,2.667
6,Suchinteresse Migration,2.083,2.25,2.0,2.0,2.0,2.667,2.5,2.167,2.25,2.0,2.0,2.0,2.167,2.5,3.083
7,Suchinteresse Ausländer,2.0,2.083,2.0,2.0,2.083,2.5,2.083,2.0,2.0,2.083,2.0,2.0,2.0,2.25,3.083
8,Suchinteresse Remigration,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25
9,Meinungsprozentsatz,12.0,4.0,8.0,16.0,37.0,76.0,45.0,40.0,36.0,26.0,17.0,12.0,8.0,43.0,35.0


In [72]:
df_final.to_csv("../data/processed/combined_migration_data.csv", index=False)
print("saved")

saved
