# Data Cleaning with PySpark

In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, count, regexp_replace, regexp_extract, monotonically_increasing_id


spark = SparkSession.builder.appName("Data_cleaning").getOrCreate()

## 1. Data Reading and Loading

In [2]:
df = spark.read.csv("population.csv", header=True, sep=";")

In [3]:
df.take(3)

[Row(postcode='Ned1erland', year='"n/201""9"', total='"1""7n/28n/21""63"', with_migration_background='"40861""38"', of_dutch_origin='"1""31""960n/25."', of_western_migration_origin='"1""774n/271"""', afrika='69307n/2', amerika='"7001""5.5."', asia='937304', europe_excluding_dutch_background='"1""731""779"', oceania='n/238n/28', belgium='"1""1""9769"', germany='"35.1""5.5.n/2"', indonesia='358773', morocco='40n/249n/2', former_dutch_antilles_aruba='"1""61""n/265."', poland='"1""85497"', suriname='353909', turkey='409877', other_western_migration_background='758680'),
 Row(postcode='Nede2rland', year='n/20n/20', total='"1""74075.85."', with_migration_background='4n/2n/20705.', of_dutch_origin='"1""31""86880"', of_western_migration_origin='"1""8n/28645."', afrika='"71""473n/2"', amerika='"71""9601"""', asia='969980', europe_excluding_dutch_background='"1""791""85.0"', oceania='n/245.4n/2', belgium='"1""n/21""01""9"', germany='349n/284', indonesia='3560n/29', morocco='408864', former_dutch

In [4]:
df.show(3)

+----------+----------+------------------+-------------------------+-----------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+---------------+----------+------------+---------------------------+------------+--------+----------+----------------------------------+
|  postcode|      year|             total|with_migration_background|  of_dutch_origin|of_western_migration_origin|      afrika|     amerika|  asia|europe_excluding_dutch_background|   oceania|         belgium|        germany| indonesia|     morocco|former_dutch_antilles_aruba|      poland|suriname|    turkey|other_western_migration_background|
+----------+----------+------------------+-------------------------+-----------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+---------------+----------+------------+---------------------------+------------+--------+---------

In [None]:
df.collect()

## 2. Data Description

In [6]:
df.printSchema()

root
 |-- postcode: string (nullable = true)
 |-- year: string (nullable = true)
 |-- total: string (nullable = true)
 |-- with_migration_background: string (nullable = true)
 |-- of_dutch_origin: string (nullable = true)
 |-- of_western_migration_origin: string (nullable = true)
 |-- afrika: string (nullable = true)
 |-- amerika: string (nullable = true)
 |-- asia: string (nullable = true)
 |-- europe_excluding_dutch_background: string (nullable = true)
 |-- oceania: string (nullable = true)
 |-- belgium: string (nullable = true)
 |-- germany: string (nullable = true)
 |-- indonesia: string (nullable = true)
 |-- morocco: string (nullable = true)
 |-- former_dutch_antilles_aruba: string (nullable = true)
 |-- poland: string (nullable = true)
 |-- suriname: string (nullable = true)
 |-- turkey: string (nullable = true)
 |-- other_western_migration_background: string (nullable = true)



In [7]:
df.columns

['postcode',
 'year',
 'total',
 'with_migration_background',
 'of_dutch_origin',
 'of_western_migration_origin',
 'afrika',
 'amerika',
 'asia',
 'europe_excluding_dutch_background',
 'oceania',
 'belgium',
 'germany',
 'indonesia',
 'morocco',
 'former_dutch_antilles_aruba',
 'poland',
 'suriname',
 'turkey',
 'other_western_migration_background']

In [8]:
df.schema.names

['postcode',
 'year',
 'total',
 'with_migration_background',
 'of_dutch_origin',
 'of_western_migration_origin',
 'afrika',
 'amerika',
 'asia',
 'europe_excluding_dutch_background',
 'oceania',
 'belgium',
 'germany',
 'indonesia',
 'morocco',
 'former_dutch_antilles_aruba',
 'poland',
 'suriname',
 'turkey',
 'other_western_migration_background']

In [9]:
df.describe()

DataFrame[summary: string, postcode: string, year: string, total: string, with_migration_background: string, of_dutch_origin: string, of_western_migration_origin: string, afrika: string, amerika: string, asia: string, europe_excluding_dutch_background: string, oceania: string, belgium: string, germany: string, indonesia: string, morocco: string, former_dutch_antilles_aruba: string, poland: string, suriname: string, turkey: string, other_western_migration_background: string]

In [10]:
df.describe().show()

+-------+------------+----------+-----------------+-------------------------+------------------+---------------------------+-----------------+------------------+------------------+---------------------------------+------------------+------------------+------------------+-----------------+------------------+---------------------------+------------------+------------------+------------------+----------------------------------+
|summary|    postcode|      year|            total|with_migration_background|   of_dutch_origin|of_western_migration_origin|           afrika|           amerika|              asia|europe_excluding_dutch_background|           oceania|           belgium|           germany|        indonesia|           morocco|former_dutch_antilles_aruba|            poland|          suriname|            turkey|other_western_migration_background|
+-------+------------+----------+-----------------+-------------------------+------------------+---------------------------+-----------------+

In [11]:
df.summary().show()

+-------+------------+----------+-----------------+-------------------------+------------------+---------------------------+-----------------+------------------+------------------+---------------------------------+------------------+------------------+------------------+-----------------+------------------+---------------------------+------------------+------------------+------------------+----------------------------------+
|summary|    postcode|      year|            total|with_migration_background|   of_dutch_origin|of_western_migration_origin|           afrika|           amerika|              asia|europe_excluding_dutch_background|           oceania|           belgium|           germany|        indonesia|           morocco|former_dutch_antilles_aruba|            poland|          suriname|            turkey|other_western_migration_background|
+-------+------------+----------+-----------------+-------------------------+------------------+---------------------------+-----------------+

In [12]:
df.count()

12233

In [13]:
len(df.columns)

20

## 3. Handling Missing Data

In [14]:
df.filter(col("with_migration_background").isNotNull()).count()

12180

In [15]:
df.filter(col("with_migration_background").isNull()).count()

53

In [16]:
df.columns

['postcode',
 'year',
 'total',
 'with_migration_background',
 'of_dutch_origin',
 'of_western_migration_origin',
 'afrika',
 'amerika',
 'asia',
 'europe_excluding_dutch_background',
 'oceania',
 'belgium',
 'germany',
 'indonesia',
 'morocco',
 'former_dutch_antilles_aruba',
 'poland',
 'suriname',
 'turkey',
 'other_western_migration_background']

In [17]:
for i in df.columns:
    print(f"{i} : {df.filter(col(i).isNull()).count()}")

postcode : 0
year : 0
total : 53
with_migration_background : 53
of_dutch_origin : 53
of_western_migration_origin : 53
afrika : 53
amerika : 53
asia : 53
europe_excluding_dutch_background : 53
oceania : 53
belgium : 53
germany : 53
indonesia : 53
morocco : 53
former_dutch_antilles_aruba : 53
poland : 53
suriname : 53
turkey : 53
other_western_migration_background : 53


In [18]:
df = df.fillna("deleting")
df.show(3)

+----------+----------+------------------+-------------------------+-----------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+---------------+----------+------------+---------------------------+------------+--------+----------+----------------------------------+
|  postcode|      year|             total|with_migration_background|  of_dutch_origin|of_western_migration_origin|      afrika|     amerika|  asia|europe_excluding_dutch_background|   oceania|         belgium|        germany| indonesia|     morocco|former_dutch_antilles_aruba|      poland|suriname|    turkey|other_western_migration_background|
+----------+----------+------------------+-------------------------+-----------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+---------------+----------+------------+---------------------------+------------+--------+---------

In [19]:
df.count()

12233

## 4. Data Filtering and Cleaning

In [20]:
df.filter(col("total") == "deleting").show()

+------------+----------+--------+-------------------------+---------------+---------------------------+--------+--------+--------+---------------------------------+--------+--------+--------+---------+--------+---------------------------+--------+--------+--------+----------------------------------+
|    postcode|      year|   total|with_migration_background|of_dutch_origin|of_western_migration_origin|  afrika| amerika|    asia|europe_excluding_dutch_background| oceania| belgium| germany|indonesia| morocco|former_dutch_antilles_aruba|  poland|suriname|  turkey|other_western_migration_background|
+------------+----------+--------+-------------------------+---------------+---------------------------+--------+--------+--------+---------------------------------+--------+--------+--------+---------+--------+---------------------------+--------+--------+--------+----------------------------------+
|    "1""044"|n/20n/21""|deleting|                 deleting|       deleting|                  

In [21]:
df.drop(col("total") == "deleting")

DataFrame[postcode: string, year: string, total: string, with_migration_background: string, of_dutch_origin: string, of_western_migration_origin: string, afrika: string, amerika: string, asia: string, europe_excluding_dutch_background: string, oceania: string, belgium: string, germany: string, indonesia: string, morocco: string, former_dutch_antilles_aruba: string, poland: string, suriname: string, turkey: string, other_western_migration_background: string]

In [22]:
df.filter(col("total") == "deleting").count()

53

In [23]:
df.filter(col("with_migration_background") == "deleting").count()

53

In [24]:
df.count()

12233

In [25]:
df = df.where(df.total != "deleting")

In [26]:
df.filter(col("total") == "deleting").show()

+--------+----+-----+-------------------------+---------------+---------------------------+------+-------+----+---------------------------------+-------+-------+-------+---------+-------+---------------------------+------+--------+------+----------------------------------+
|postcode|year|total|with_migration_background|of_dutch_origin|of_western_migration_origin|afrika|amerika|asia|europe_excluding_dutch_background|oceania|belgium|germany|indonesia|morocco|former_dutch_antilles_aruba|poland|suriname|turkey|other_western_migration_background|
+--------+----+-----+-------------------------+---------------+---------------------------+------+-------+----+---------------------------------+-------+-------+-------+---------+-------+---------------------------+------+--------+------+----------------------------------+
+--------+----+-----+-------------------------+---------------+---------------------------+------+-------+----+---------------------------------+-------+-------+-------+---------

In [27]:
df.select([count(when(col(c).isNull(), c)).alias(c) for c in df.columns]).show()

+--------+----+-----+-------------------------+---------------+---------------------------+------+-------+----+---------------------------------+-------+-------+-------+---------+-------+---------------------------+------+--------+------+----------------------------------+
|postcode|year|total|with_migration_background|of_dutch_origin|of_western_migration_origin|afrika|amerika|asia|europe_excluding_dutch_background|oceania|belgium|germany|indonesia|morocco|former_dutch_antilles_aruba|poland|suriname|turkey|other_western_migration_background|
+--------+----+-----+-------------------------+---------------+---------------------------+------+-------+----+---------------------------------+-------+-------+-------+---------+-------+---------------------------+------+--------+------+----------------------------------+
|       0|   0|    0|                        0|              0|                          0|     0|      0|   0|                                0|      0|      0|      0|        0

## 5. Data Manipulation

In [28]:
df = df.withColumn("index", monotonically_increasing_id())
df.show(3)

+----------+----------+------------------+-------------------------+-----------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+---------------+----------+------------+---------------------------+------------+--------+----------+----------------------------------+-----+
|  postcode|      year|             total|with_migration_background|  of_dutch_origin|of_western_migration_origin|      afrika|     amerika|  asia|europe_excluding_dutch_background|   oceania|         belgium|        germany| indonesia|     morocco|former_dutch_antilles_aruba|      poland|suriname|    turkey|other_western_migration_background|index|
+----------+----------+------------------+-------------------------+-----------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+---------------+----------+------------+---------------------------+------------+------

In [29]:
column_list = df.columns[:20]

In [30]:
df = df.select("index", *column_list)
df.show(3)

+-----+----------+----------+------------------+-------------------------+-----------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+---------------+----------+------------+---------------------------+------------+--------+----------+----------------------------------+
|index|  postcode|      year|             total|with_migration_background|  of_dutch_origin|of_western_migration_origin|      afrika|     amerika|  asia|europe_excluding_dutch_background|   oceania|         belgium|        germany| indonesia|     morocco|former_dutch_antilles_aruba|      poland|suriname|    turkey|other_western_migration_background|
+-----+----------+----------+------------------+-------------------------+-----------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+---------------+----------+------------+---------------------------+------------+

In [31]:
df.filter((df.index==1) | (df.index==2)).show()

+-----+----------+----------+----------------+-------------------------+---------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+--------+----------+------------+---------------------------+------------+--------+----------+----------------------------------+
|index|  postcode|      year|           total|with_migration_background|of_dutch_origin|of_western_migration_origin|      afrika|     amerika|  asia|europe_excluding_dutch_background|   oceania|         belgium| germany| indonesia|     morocco|former_dutch_antilles_aruba|      poland|suriname|    turkey|other_western_migration_background|
+-----+----------+----------+----------------+-------------------------+---------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+--------+----------+------------+---------------------------+------------+--------+----------+-------------

In [32]:
df.show(3)

+-----+----------+----------+------------------+-------------------------+-----------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+---------------+----------+------------+---------------------------+------------+--------+----------+----------------------------------+
|index|  postcode|      year|             total|with_migration_background|  of_dutch_origin|of_western_migration_origin|      afrika|     amerika|  asia|europe_excluding_dutch_background|   oceania|         belgium|        germany| indonesia|     morocco|former_dutch_antilles_aruba|      poland|suriname|    turkey|other_western_migration_background|
+-----+----------+----------+------------------+-------------------------+-----------------+---------------------------+------------+------------+------+---------------------------------+----------+----------------+---------------+----------+------------+---------------------------+------------+

In [33]:
df_postcode = df.filter(col("index") <= 4).withColumn("postcode", regexp_replace(col("postcode"), '[^a-zA-Z.]+', ""))
df_postcode.show()

+-----+---------+----------+--------------------+-------------------------+------------------+---------------------------+------------+------------+-------------+---------------------------------+----------+----------------+---------------+----------+--------------+---------------------------+------------+----------+----------+----------------------------------+
|index| postcode|      year|               total|with_migration_background|   of_dutch_origin|of_western_migration_origin|      afrika|     amerika|         asia|europe_excluding_dutch_background|   oceania|         belgium|        germany| indonesia|       morocco|former_dutch_antilles_aruba|      poland|  suriname|    turkey|other_western_migration_background|
+-----+---------+----------+--------------------+-------------------------+------------------+---------------------------+------------+------------+-------------+---------------------------------+----------+----------------+---------------+----------+--------------+----

In [34]:
df_postcode.count()

5

In [35]:
df_postcode_last = df.filter(col("index") > 4).withColumn("postcode", regexp_replace(col("postcode"), '[^0-9.]+', ""))
df_postcode_last.show(3)

+-----+--------+----------+-----+-------------------------+---------------+---------------------------+------+-------+----+---------------------------------+-------+---------+-------+---------+-------+---------------------------+------+--------+---------+----------------------------------+
|index|postcode|      year|total|with_migration_background|of_dutch_origin|of_western_migration_origin|afrika|amerika|asia|europe_excluding_dutch_background|oceania|  belgium|germany|indonesia|morocco|former_dutch_antilles_aruba|poland|suriname|   turkey|other_western_migration_background|
+-----+--------+----------+-----+-------------------------+---------------+---------------------------+------+-------+----+---------------------------------+-------+---------+-------+---------+-------+---------------------------+------+--------+---------+----------------------------------+
|    5|    1011|"n/201""9"| 9670|              "41""1""5."|       5.5.5.5.|                    n/265.0|n/235.|   845.| 970|    

In [36]:
df_postcode_last.count()

12175

In [37]:
df = df_postcode.union(df_postcode_last)

In [38]:
df.show(10)

+-----+---------+----------+--------------------+-------------------------+------------------+---------------------------+------------+------------+-------------+---------------------------------+----------+----------------+---------------+----------+--------------+---------------------------+------------+----------+----------+----------------------------------+
|index| postcode|      year|               total|with_migration_background|   of_dutch_origin|of_western_migration_origin|      afrika|     amerika|         asia|europe_excluding_dutch_background|   oceania|         belgium|        germany| indonesia|       morocco|former_dutch_antilles_aruba|      poland|  suriname|    turkey|other_western_migration_background|
+-----+---------+----------+--------------------+-------------------------+------------------+---------------------------+------------+------------+-------------+---------------------------------+----------+----------------+---------------+----------+--------------+----

In [39]:
for col_name in df.columns:
    if col_name == "index":
        df = df.drop(col_name)
    elif col_name != "postcode":
        df = df.withColumn(col_name, regexp_replace(col(col_name), '[^0-9]+', ''))

df.show(3)

+---------+----+--------+-------------------------+---------------+---------------------------+------+-------+------+---------------------------------+-------+-------+-------+---------+-------+---------------------------+------+--------+------+----------------------------------+
| postcode|year|   total|with_migration_background|of_dutch_origin|of_western_migration_origin|afrika|amerika|  asia|europe_excluding_dutch_background|oceania|belgium|germany|indonesia|morocco|former_dutch_antilles_aruba|poland|suriname|turkey|other_western_migration_background|
+---------+----+--------+-------------------------+---------------+---------------------------+------+-------+------+---------------------------------+-------+-------+-------+---------+-------+---------------------------+------+--------+------+----------------------------------+
|Nederland|2019|17282163|                  4086138|       13196025|                    1774271|693072| 700155|937304|                          1731779|  23828| 

## 6. Data Type Conversions

In [40]:
df = df.withColumn("year", col("year").cast("int"))

In [41]:
for col_name in df.columns:
    if col_name != "year" and col_name != "postcode":
        df = df.withColumn(col_name, col(col_name).cast("int"))

df.printSchema()

root
 |-- postcode: string (nullable = false)
 |-- year: integer (nullable = true)
 |-- total: integer (nullable = true)
 |-- with_migration_background: integer (nullable = true)
 |-- of_dutch_origin: integer (nullable = true)
 |-- of_western_migration_origin: integer (nullable = true)
 |-- afrika: integer (nullable = true)
 |-- amerika: integer (nullable = true)
 |-- asia: integer (nullable = true)
 |-- europe_excluding_dutch_background: integer (nullable = true)
 |-- oceania: integer (nullable = true)
 |-- belgium: integer (nullable = true)
 |-- germany: integer (nullable = true)
 |-- indonesia: integer (nullable = true)
 |-- morocco: integer (nullable = true)
 |-- former_dutch_antilles_aruba: integer (nullable = true)
 |-- poland: integer (nullable = true)
 |-- suriname: integer (nullable = true)
 |-- turkey: integer (nullable = true)
 |-- other_western_migration_background: integer (nullable = true)



In [42]:
df.summary().show()

+-------+------------------+------------------+------------------+-------------------------+------------------+---------------------------+-----------------+------------------+-----------------+---------------------------------+------------------+------------------+------------------+------------------+------------------+---------------------------+------------------+------------------+------------------+----------------------------------+
|summary|          postcode|              year|             total|with_migration_background|   of_dutch_origin|of_western_migration_origin|           afrika|           amerika|             asia|europe_excluding_dutch_background|           oceania|           belgium|           germany|         indonesia|           morocco|former_dutch_antilles_aruba|            poland|          suriname|            turkey|other_western_migration_background|
+-------+------------------+------------------+------------------+-------------------------+------------------+-

In [43]:
df.show()

+---------+----+--------+-------------------------+---------------+---------------------------+------+-------+-------+---------------------------------+-------+-------+-------+---------+-------+---------------------------+------+--------+------+----------------------------------+
| postcode|year|   total|with_migration_background|of_dutch_origin|of_western_migration_origin|afrika|amerika|   asia|europe_excluding_dutch_background|oceania|belgium|germany|indonesia|morocco|former_dutch_antilles_aruba|poland|suriname|turkey|other_western_migration_background|
+---------+----+--------+-------------------------+---------------+---------------------------+------+-------+-------+---------------------------------+-------+-------+-------+---------+-------+---------------------------+------+--------+------+----------------------------------+
|Nederland|2019|17282163|                  4086138|       13196025|                    1774271|693072| 700155| 937304|                          1731779|  238

## 7. Data Merging

In [44]:
df_nl = spark.read.csv("Nederland.csv", header=True)
df_nl.show(3)

+------+----------+---------+--------+------------+
|   PC6|Huisnummer|Buurt2023|Wijk2023|Gemeente2023|
+------+----------+---------+--------+------------+
|1011AB|        99| 0363AF01|  0363AF|        0363|
|1011AB|       105| 0363AF01|  0363AF|        0363|
|1011AB|       106| 0363AF01|  0363AF|        0363|
+------+----------+---------+--------+------------+
only showing top 3 rows



In [45]:
df_nl = df_nl.drop(col("Huisnummer"))
df_nl.show(3)

+------+---------+--------+------------+
|   PC6|Buurt2023|Wijk2023|Gemeente2023|
+------+---------+--------+------------+
|1011AB| 0363AF01|  0363AF|        0363|
|1011AB| 0363AF01|  0363AF|        0363|
|1011AB| 0363AF01|  0363AF|        0363|
+------+---------+--------+------------+
only showing top 3 rows



In [46]:
df_nl = df_nl.withColumn("PC6", regexp_extract(df_nl["PC6"], r'^(.{4})', 1))
df_nl.show(3)

+----+---------+--------+------------+
| PC6|Buurt2023|Wijk2023|Gemeente2023|
+----+---------+--------+------------+
|1011| 0363AF01|  0363AF|        0363|
|1011| 0363AF01|  0363AF|        0363|
|1011| 0363AF01|  0363AF|        0363|
+----+---------+--------+------------+
only showing top 3 rows



In [47]:
df_nl = df_nl.dropDuplicates(['PC6'])
df_nl.show()

+----+---------+--------+------------+
| PC6|Buurt2023|Wijk2023|Gemeente2023|
+----+---------+--------+------------+
|1011| 0363AF01|  0363AF|        0363|
|1012| 0363AD01|  0363AD|        0363|
|1013| 0363AA03|  0363AA|        0363|
|1014| 0363EB02|  0363EB|        0363|
|1015| 0363AC01|  0363AC|        0363|
|1016| 0363AC02|  0363AC|        0363|
|1017| 0363AG03|  0363AG|        0363|
|1018| 0363AK07|  0363AK|        0363|
|1019| 0363MA11|  0363MA|        0363|
|1021| 0363NL03|  0363NL|        0363|
|1022| 0363NP02|  0363NP|        0363|
|1023| 0363NF03|  0363NF|        0363|
|1024| 0363NJ04|  0363NJ|        0363|
|1025| 0363NH01|  0363NH|        0363|
|1026| 0363NQ06|  0363NQ|        0363|
|1027| 0363NQ03|  0363NQ|        0363|
|1028| 0363NQ03|  0363NQ|        0363|
|1031| 0363NK03|  0363NK|        0363|
|1032| 0363NK02|  0363NK|        0363|
|1033| 0363NA05|  0363NA|        0363|
+----+---------+--------+------------+
only showing top 20 rows



In [48]:
df_nl.printSchema()

root
 |-- PC6: string (nullable = true)
 |-- Buurt2023: string (nullable = true)
 |-- Wijk2023: string (nullable = true)
 |-- Gemeente2023: string (nullable = true)



In [49]:
df = df.join(df_nl, df.postcode == df_nl.PC6, "inner").drop("PC6")

In [50]:
df.show(3)

+--------+----+-----+-------------------------+---------------+---------------------------+------+-------+----+---------------------------------+-------+-------+-------+---------+-------+---------------------------+------+--------+------+----------------------------------+---------+--------+------------+
|postcode|year|total|with_migration_background|of_dutch_origin|of_western_migration_origin|afrika|amerika|asia|europe_excluding_dutch_background|oceania|belgium|germany|indonesia|morocco|former_dutch_antilles_aruba|poland|suriname|turkey|other_western_migration_background|Buurt2023|Wijk2023|Gemeente2023|
+--------+----+-----+-------------------------+---------------+---------------------------+------+-------+----+---------------------------------+-------+-------+-------+---------+-------+---------------------------+------+--------+------+----------------------------------+---------+--------+------------+
|    1011|2023| 9915|                     4730|           5185|                   