# General Assumptions and Import Statements
The aim of this use case is to analyse if there is a correlation between the economic welfare and migrational movement. The hypothesis is people are moving to places where welfare is higher.

In [None]:
# Import necessary libraries

import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

# Import datenguidepy package

%cd ..
from datenguidepy.query_builder import Query
from datenguidepy.query_helper import get_all_regions

# Fetch Data for the Bundesländer in Germany with Datenguidepy

In [None]:
# First get the region codes to query data from the Bundesländer.

region_codes = get_all_regions().query('level == "nuts1"').name
region_codes

In [None]:
# Create a query fpr the regions and variables of interest
region_query = Query.region(list(region_codes.index))
region_query.add_field('BEVSTD') # population
region_query.add_field('BEV085') # moving in administrative districts
region_query.add_field('BEV086') # moving out administrative districts
region_query.add_field('BIP804') # GDP per capita

# Get the results for the query
results = region_query.results()

# Preview the first data rows
results.head()

In [None]:
# Calculate the net migration per 1000 inhabitants: (people movin in - people moving out) / population * 1000
results['migration'] = (results.BEV085 - results.BEV086) / results.BEVSTD * 1000

# Overview of the dataset:
results.describe()

# Create a Visualisation

In [None]:
# Determine plot size
ax = plt.subplots(figsize=(25, 15))

# Make a scatterplot with adjusted color and size mapping
plt.scatter(results["BIP804"], results["migration"], c=results["year"], s=(results["BEVSTD"] / 10000), cmap='GnBu', alpha = 0.5)
plt.colorbar(label='Year', ticks=range(min(results["year"]), max(results["year"])+1, 2))
plt.clim(min(results["year"]), max(results["year"]))

# Draw the regression line
sns.regplot(x="BIP804", y="migration", data=results, scatter=False, color="red")

# Create a legend for the population size.
for pop in np.linspace(min(results["BEVSTD"]), max(results["BEVSTD"]+1), num=4):
    plt.scatter([], [], c='k', alpha=0.3, s=(pop/10000), label='     ' + str(int(pop/1000000)) + ' Million')
plt.legend(scatterpoints=1, frameon=False, labelspacing=3, title='Population', title_fontsize="larger", loc='best', bbox_to_anchor=(0.5, -0.25, 0.5, 0.5))

# Add title and labels
plt.title('Migration and GDP', fontweight="bold")
plt.xlabel('Gross Domestic Product per Capita', fontweight="bold")
plt.ylabel('Net Migartion per 1000 Inhabitants', fontweight="bold");

The analysis is based on data for all 16 Bundesländer in gemany from 1995 to 2017. Over the years GDP and migration increased. The plot shows a positive correlation between (net) migration and GDP in general.