<center>

# $\textbf{Inflation}$

<center>

### $\textbf{Code}$

In [104]:
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
from pyspark.sql.functions import explode, col, lit, array, struct, when

In [105]:
spark = SparkSession.builder.appName('Inflation').master("local").config("spark.jars.packages", "com.crealytics:spark-excel_2.11:0.12.2").getOrCreate()
spark

In [106]:
# Step 4: Read the Excel file using pandas
pandas_df = pd.read_excel("Files/Inflation.xlsx")

# Replace "no data" with NaN
pandas_df.replace("no data", float("nan"), inplace=True)

# Extract column names from the first row
column_names = [str(col) for col in pandas_df.columns]

# Step 5: Define the schema for the Spark DataFrame
schema_fields = [StructField(column_names[0], StringType(), True)] + \
                [StructField(col, DoubleType(), True) for col in column_names[1:]]

# Create schema
schema = StructType(schema_fields)

# Step 6: Convert the pandas DataFrame to a Spark DataFrame with the specified schema
df = spark.createDataFrame(pandas_df, schema=schema)

In [107]:
# Rename the columns "location" and "total_cases"
df = df.withColumnRenamed("Inflation rate, average consumer prices (Annual percent change)","country")

In [108]:
# Assuming df is properly defined DataFrame
df = df.select("country", explode(array([
    struct(lit(year).alias("year"), col(str(year)).alias("inflation")) 
    for year in range(2000, 2025)
])).alias("data")).selectExpr("country", "data.year", "data.inflation")

In [109]:
#Storing this dataframe in parquet
df.write.mode("overwrite").parquet("FilesParquet/Inflation.parquet" )
spark.read.parquet("FilesParquet/Inflation.parquet").show()
spark.stop()

+-----------+----+---------+
|    country|year|inflation|
+-----------+----+---------+
|Afghanistan|2000|      NaN|
|Afghanistan|2001|      NaN|
|Afghanistan|2002|      5.1|
|Afghanistan|2003|     35.7|
|Afghanistan|2004|     16.4|
|Afghanistan|2005|     10.6|
|Afghanistan|2006|      6.8|
|Afghanistan|2007|      8.7|
|Afghanistan|2008|     26.4|
|Afghanistan|2009|     -6.8|
|Afghanistan|2010|      2.2|
|Afghanistan|2011|     11.8|
|Afghanistan|2012|      6.4|
|Afghanistan|2013|      7.4|
|Afghanistan|2014|      4.7|
|Afghanistan|2015|     -0.7|
|Afghanistan|2016|      4.4|
|Afghanistan|2017|      5.0|
|Afghanistan|2018|      0.6|
|Afghanistan|2019|      2.3|
+-----------+----+---------+
only showing top 20 rows

