In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

# Database Creation

In [0]:
%sql
CREATE DATABASE IF NOT EXISTS gold

# Data Readimg

### trip_type

In [0]:
df_trip_type = spark.read.format("parquet")\
                            .option("header", "true")\
                            .option("inferSchema", "true")\
                            .load("abfss://silver@nyctaxidatalake00.dfs.core.windows.net/trip_type")
df_trip_type.display()

trip_type,trip_description
1,Street-hail
2,Dispatch


In [0]:
df_trip_type.write.format('delta')\
                    .mode('append')\
                    .option('path','abfss://gold@nyctaxidatalake00.dfs.core.windows.net/trip_type')\
                    .save()

In [0]:
df_trip_type.write.format('delta')\
                    .mode('overwrite')\
                    .option('overwriteSchema', 'true')\
                    .saveAsTable('gold.trip_type')

In [0]:
%sql
SELECT * FROM gold.trip_type;

trip_type,trip_description
1,Street-hail
2,Dispatch


### trip_zone

In [0]:
df_trip_zone = spark.read.format("parquet")\
                            .option("header", "true")\
                            .option("inferSchema", "true")\
                            .load("abfss://silver@nyctaxidatalake00.dfs.core.windows.net/trip_zone")
df_trip_zone.limit(10).display()

LocationID,Borough,Zone,service_zone,zone1
1,EWR,Newark Airport,EWR,Newark Airport
2,Queens,Jamaica Bay,Boro Zone,Jamaica Bay
3,Bronx,Allerton/Pelham Gardens,Boro Zone,Allerton
4,Manhattan,Alphabet City,Yellow Zone,Alphabet City
5,Staten Island,Arden Heights,Boro Zone,Arden Heights
6,Staten Island,Arrochar/Fort Wadsworth,Boro Zone,Arrochar
7,Queens,Astoria,Boro Zone,Astoria
8,Queens,Astoria Park,Boro Zone,Astoria Park
9,Queens,Auburndale,Boro Zone,Auburndale
10,Queens,Baisley Park,Boro Zone,Baisley Park


In [0]:
df_trip_zone.write.format('delta')\
                    .mode('append')\
                    .option('path','abfss://gold@nyctaxidatalake00.dfs.core.windows.net/trip_zone')\
                    .save()

In [0]:
df_trip_zone.write.format('delta')\
    .mode('overwrite')\
    .option('overwriteSchema', 'true')\
    .saveAsTable('gold.trip_zone')

In [0]:
%sql
SELECT * FROM gold.trip_zone LIMIT 10;

LocationID,Borough,Zone,service_zone,zone1
1,EWR,Newark Airport,EWR,Newark Airport
2,Queens,Jamaica Bay,Boro Zone,Jamaica Bay
3,Bronx,Allerton/Pelham Gardens,Boro Zone,Allerton
4,Manhattan,Alphabet City,Yellow Zone,Alphabet City
5,Staten Island,Arden Heights,Boro Zone,Arden Heights
6,Staten Island,Arrochar/Fort Wadsworth,Boro Zone,Arrochar
7,Queens,Astoria,Boro Zone,Astoria
8,Queens,Astoria Park,Boro Zone,Astoria Park
9,Queens,Auburndale,Boro Zone,Auburndale
10,Queens,Baisley Park,Boro Zone,Baisley Park


### trips2024

In [0]:
df_trip = spark.read.format("parquet")\
                            .option("header", "true")\
                            .option("inferSchema", "true")\
                            .load("abfss://silver@nyctaxidatalake00.dfs.core.windows.net/trips2024")
df_trip.limit(10).display()

VendorID,PULocationID,DOLocationID,trip_distance,fare_amount,total_amount
2,65,49,1.24,9.3,13.8
2,7,179,0.94,7.2,11.64
2,74,42,0.84,6.5,9.0
2,75,235,6.07,25.4,32.9
2,256,49,2.06,12.1,17.52
1,210,210,1.3,9.3,12.8
2,66,4,4.35,19.8,28.05
2,95,95,2.02,13.5,16.0
2,24,143,2.35,12.8,21.05
2,210,210,1.3,8.0,9.0


In [0]:
df_trip.write.format('delta')\
                    .mode('append')\
                    .option('path','abfss://gold@nyctaxidatalake00.dfs.core.windows.net/trips2024')\
                    .save()

In [0]:
df_trip.write.format('delta')\
    .mode('overwrite')\
    .option('overwriteSchema', 'true')\
    .saveAsTable('gold.trips2024')

In [0]:
%sql
SELECT * FROM gold.trips2024 LIMIT 10;

VendorID,PULocationID,DOLocationID,trip_distance,fare_amount,total_amount
2,65,49,1.24,9.3,13.8
2,7,179,0.94,7.2,11.64
2,74,42,0.84,6.5,9.0
2,75,235,6.07,25.4,32.9
2,256,49,2.06,12.1,17.52
1,210,210,1.3,9.3,12.8
2,66,4,4.35,19.8,28.05
2,95,95,2.02,13.5,16.0
2,24,143,2.35,12.8,21.05
2,210,210,1.3,8.0,9.0


# Versioning

In [0]:
%sql
SELECT * FROM gold.trip_zone LIMIT 10;

LocationID,Borough,Zone,service_zone,zone1
1,EWR,Newark Airport,EWR,Newark Airport
2,Queens,Jamaica Bay,Boro Zone,Jamaica Bay
3,Bronx,Allerton/Pelham Gardens,Boro Zone,Allerton
4,Manhattan,Alphabet City,Yellow Zone,Alphabet City
5,Staten Island,Arden Heights,Boro Zone,Arden Heights
6,Staten Island,Arrochar/Fort Wadsworth,Boro Zone,Arrochar
7,Queens,Astoria,Boro Zone,Astoria
8,Queens,Astoria Park,Boro Zone,Astoria Park
9,Queens,Auburndale,Boro Zone,Auburndale
10,Queens,Baisley Park,Boro Zone,Baisley Park


Version 1 - Updating record

In [0]:
%sql
UPDATE gold.trip_zone
SET Borough = 'EMR'
WHERE LocationID = 1;

In [0]:
%sql
SELECT * FROM gold.trip_zone WHERE LocationID = 1;

LocationID,Borough,Zone,service_zone,zone1
1,EMR,Newark Airport,EWR,Newark Airport


Version 2 - Deleting a record

In [0]:
%sql
DELETE FROM gold.trip_zone
WHERE LocationID = 1

num_affected_rows
1


In [0]:
%sql
SELECT * FROM gold.trip_zone WHERE LocationID = 1;

LocationID,Borough,Zone,service_zone,zone1


In [0]:
%sql
DESCRIBE HISTORY gold.trip_zone;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
4,2025-07-10T16:23:58.000Z,147795821957797,mohiteabhishek25@outlook.com,DELETE,"Map(predicate -> [""(LocationID#15333 = 1)""])",,,0710-142034-9k54menm-v2n,3.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1495, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 427, numDeletionVectorsUpdated -> 0, numDeletedRows -> 1, scanTimeMs -> 372, numAddedFiles -> 0, numAddedBytes -> 0, rewriteTimeMs -> 53)",,Databricks-Runtime/16.4.x-photon-scala2.12
3,2025-07-10T16:17:04.000Z,147795821957797,mohiteabhishek25@outlook.com,UPDATE,"Map(predicate -> [""(LocationID#13991 = 1)""])",,,0710-142034-9k54menm-v2n,2.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 1, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 2200, numDeletionVectorsUpdated -> 0, scanTimeMs -> 973, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 1495, rewriteTimeMs -> 1210)",,Databricks-Runtime/16.4.x-photon-scala2.12
2,2025-07-10T14:38:36.000Z,147795821957797,mohiteabhishek25@outlook.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableDeletionVectors"":""true""}, statsOnLoad -> false)",,,0710-142034-9k54menm-v2n,1.0,WriteSerializable,False,"Map(numFiles -> 1, numRemovedFiles -> 1, numRemovedBytes -> 902, numOutputRows -> 265, numOutputBytes -> 8833)",,Databricks-Runtime/16.4.x-photon-scala2.12
1,2025-07-10T14:36:24.000Z,147795821957797,mohiteabhishek25@outlook.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableDeletionVectors"":""true""}, statsOnLoad -> false)",,,0710-142034-9k54menm-v2n,0.0,WriteSerializable,False,"Map(numFiles -> 1, numRemovedFiles -> 1, numRemovedBytes -> 8833, numOutputRows -> 2, numOutputBytes -> 902)",,Databricks-Runtime/16.4.x-photon-scala2.12
0,2025-07-10T12:54:47.000Z,147795821957797,mohiteabhishek25@outlook.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableDeletionVectors"":""true""}, statsOnLoad -> false)",,,0710-115534-8lkcni02-v2n,,WriteSerializable,False,"Map(numFiles -> 1, numRemovedFiles -> 0, numRemovedBytes -> 0, numOutputRows -> 265, numOutputBytes -> 8833)",,Databricks-Runtime/16.4.x-photon-scala2.12


Rolling back to previous version

In [0]:
%sql
RESTORE gold.trip_zone TO VERSION AS OF 2

table_size_after_restore,num_of_files_after_restore,num_removed_files,num_restored_files,removed_files_size,restored_files_size
8833,1,1,1,8833,8833


In [0]:
%sql
SELECT * FROM gold.trip_zone LIMIT 10;

LocationID,Borough,Zone,service_zone,zone1
1,EWR,Newark Airport,EWR,Newark Airport
2,Queens,Jamaica Bay,Boro Zone,Jamaica Bay
3,Bronx,Allerton/Pelham Gardens,Boro Zone,Allerton
4,Manhattan,Alphabet City,Yellow Zone,Alphabet City
5,Staten Island,Arden Heights,Boro Zone,Arden Heights
6,Staten Island,Arrochar/Fort Wadsworth,Boro Zone,Arrochar
7,Queens,Astoria,Boro Zone,Astoria
8,Queens,Astoria Park,Boro Zone,Astoria Park
9,Queens,Auburndale,Boro Zone,Auburndale
10,Queens,Baisley Park,Boro Zone,Baisley Park
