<center>

# $\textbf{Unemployment}$

<center>

### $\textbf{Code}$

In [36]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import explode, col, lit, array, struct

In [37]:
spark = SparkSession.builder.appName('Unemployment').master("local").enableHiveSupport().getOrCreate()
spark

In [38]:
# Creating dataframe from the csv file and infering the schema
df = spark.read.load("Files/Unemployment2.csv", format="csv", sep=",", inferschema="true", header="true")

In [39]:
# Remove columns Country Code and Indicator Code
df.drop("Country Code", "Indicator Code", "Indicator Name")

# Rename the column
df = df.withColumnRenamed("Country Name","country")

In [40]:
# Assuming df is properly defined DataFrame
df = df.select("country", explode(array([
    struct(lit(year).alias("year"), col(str(year)).alias("unemployment")) 
    for year in range(2000, 2023)
])).alias("data")).selectExpr("country", "data.year", "data.unemployment")

df = df.orderBy("country")

In [41]:
# Cast columns to their desired types
df = df.withColumn("country", col("country").cast("string"))
df = df.withColumn("year", col("year").cast("int"))
df = df.withColumn("unemployment", col("unemployment").cast("double"))

In [42]:
# Filter data for years greater than 2010 and lower than 2024
df = df.filter(df["year"] > 2010)
df = df.filter(df["year"] < 2024)

In [43]:
# Order by country and then by year
df = df.orderBy("country", "year")

In [44]:
#Storing this dataframe in parquet
df.write.mode("overwrite").parquet("FilesParquet/Unemployment.parquet" )
spark.read.parquet("FilesParquet/Unemployment.parquet").show()
spark.stop()

+--------------------+----+----------------+
|             country|year|    unemployment|
+--------------------+----+----------------+
|         Afghanistan|2011|           7.918|
|         Afghanistan|2012|           7.914|
|         Afghanistan|2013|           7.914|
|         Afghanistan|2014|            7.91|
|         Afghanistan|2015|           9.002|
|         Afghanistan|2016|          10.092|
|         Afghanistan|2017|           11.18|
|         Afghanistan|2018|          11.131|
|         Afghanistan|2019|          11.082|
|         Afghanistan|2020|           11.71|
|         Afghanistan|2021|          12.075|
|         Afghanistan|2022|            14.1|
|Africa Eastern an...|2011|7.14930120647393|
|Africa Eastern an...|2012|6.98489436007991|
|Africa Eastern an...|2013|6.90438303627979|
|Africa Eastern an...|2014|6.88450041554073|
|Africa Eastern an...|2015|6.98306405212527|
|Africa Eastern an...|2016|7.15215693722101|
|Africa Eastern an...|2017|7.27429827091257|
|Africa Ea