# TRANSFORMATION OF BRONZE LAYER DATA

In [0]:
df = spark.table('bronze.ingested_data')
display(df)

### Dropping the _Price_ Column

### Dropping the null values from the _Time_ column

In [0]:
df = df.drop('Price')
display(df)

In [0]:
df = df.dropna(subset=["Time"])
display(df)

### Dropping the _Duplicate_ rows

In [0]:
df = df.dropDuplicates(subset=None)
display(df.count())


### Creating the new column to determine the Year of Launch and Dropping the Date Column

In [0]:
from pyspark.sql import functions as F
df = df.withColumn("year", F.year(F.col("Date")))
df = df.drop('Date')
display(df.printSchema())

### Removing the new line tag in the MissionStatus Column

In [0]:
df = df.withColumn("MissionStatus", F.element_at(F.split(df["MissionStatus"], "\n"), 1))

### Extracting _Country_ and _City_ from the Location 

In [0]:
from pyspark.sql import functions as F

# Split the location by ", " and Extract Country and City
df = df.withColumn("Country", F.element_at(F.split(df["Location"], ", "), -1))
df = df.withColumn("City",F.element_at(F.split(df["Location"], ", "), -2))

# drop the Location column
df = df.drop("Location")
# Change the Abrevation to Full name
df = df.withColumn("Country",F.when(df["Country"] == "USA", "United States").otherwise(df["Country"]))

display(df)


### Implementation of **_Silver Layer_** in Medallion architecture

In [0]:
df.write.format("delta").mode("overwrite").save("/mnt/silver/")

In [0]:
spark.sql("CREATE SCHEMA IF NOT EXISTS silver");
df.write.format("delta").mode("overwrite").saveAsTable("silver.transformed_data")