<center>

# $\textbf{Tax}$

<center>

### $\textbf{Code}$

In [28]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import explode, col, lit, array, struct, when

In [29]:
spark = SparkSession.builder.appName('Tax').master("local").enableHiveSupport().getOrCreate()
spark

In [30]:
# Creating dataframe from the csv file and infering the schema
df = spark.read.load("Files/Tax2.csv", format="csv", sep=",", inferschema="true", header="true")

In [31]:
# Rename the column
df = df.withColumnRenamed("Country Name","country")
columns_to_drop = [str(year) for year in range(1960, 2000)]
df = df.drop("Country Code","Indicator Name","Indicator Code",*columns_to_drop,"_c67")

In [32]:
# Assuming df is properly defined DataFrame
df = df.select("country", explode(array([
    struct(lit(year).alias("year"), col(str(year)).alias("tax")) 
    for year in range(2000, 2023)
])).alias("data")).selectExpr("country", "data.year", "data.tax")

df = df.orderBy("country")

In [33]:
# Replace null values in 'total_cases' column with 0
df = df.withColumn("tax", when(df["tax"].isNull(), 0).otherwise(df["tax"]))

# Cast columns to their desired types
df = df.withColumn("country", col("country").cast("string"))
df = df.withColumn("year", col("year").cast("int"))
df = df.withColumn("tax", col("tax").cast("double"))

In [34]:
# Filter data for years greater than 2010 and lower than 2024
df = df.filter(df["year"] > 2010)
df = df.filter(df["year"] < 2024)

In [35]:
# Order by country and then by year
df = df.orderBy("country", "year")

In [36]:
#Storing this dataframe in parquet
df.write.mode("overwrite").parquet("FilesParquet/Tax.parquet" )
spark.read.parquet("FilesParquet/Tax.parquet").show()
spark.stop()

+--------------------+----+----------------+
|             country|year|             tax|
+--------------------+----+----------------+
|         Afghanistan|2011|8.91679399595736|
|         Afghanistan|2012|7.70780060287773|
|         Afghanistan|2013|7.12277329020889|
|         Afghanistan|2014|6.88210294277301|
|         Afghanistan|2015|7.58538233129798|
|         Afghanistan|2016|9.50265278288094|
|         Afghanistan|2017|9.89845089688421|
|         Afghanistan|2018|             0.0|
|         Afghanistan|2019|             0.0|
|         Afghanistan|2020|             0.0|
|         Afghanistan|2021|             0.0|
|         Afghanistan|2022|             0.0|
|Africa Eastern an...|2011|17.5214609793853|
|Africa Eastern an...|2012|18.0124688502049|
|Africa Eastern an...|2013|18.1782844330301|
|Africa Eastern an...|2014|18.2420894982272|
|Africa Eastern an...|2015|17.4297917003423|
|Africa Eastern an...|2016|17.0950776217955|
|Africa Eastern an...|2017|17.8607979980375|
|Africa Ea