In [0]:
import pandas as pd
import requests
import time
import json
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import StructType, StructField, StringType, MapType

# Azure Storage credentials
storage_account_name = "macavstorage"
container_name = "datalake"
servicePrincipalID = "0b27e0ea-e184-49cd-b921-b3519cb03f7f"
blobsecret = dbutils.secrets.get(scope="Scope1", key="blobsecret1")
tenantID = "60feac79-e042-4ce8-8759-dca313146110"

# Path to the file in Azure Data Lake
# Create secret scope at 
# https://adb-4383697834848777.17.azuredatabricks.net/#secrets/createScope
#Scope Name = Scope1
#DNS Name = "https://twitchkv.vault.azure.net/" (Vault URI)
#Resource ID = "/subscriptions/972ad05f-b62e-48ab-a9fa-a17fd4dc6640/resourceGroups/twitchData/providers/Microsoft.KeyVault/vaults/twitchkv" (Keyvault resource ID)

#Initializing spark-session and adding configurations
spark = SparkSession.builder \
    .appName("DeltaLakeAzureStorage") \
    .config("spark.sql.extensions", "delta.sql.DeltaSparkSessionExtensions") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .getOrCreate()

#Authenticating Serviceprincipal to access blob storage
spark.conf.set(f"fs.azure.account.auth.type.{storage_account_name}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{storage_account_name}.dfs.core.windows.net", 
               "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{storage_account_name}.dfs.core.windows.net", 
               servicePrincipalID)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{storage_account_name}.dfs.core.windows.net", 
               blobsecret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{storage_account_name}.dfs.core.windows.net", 
               f"https://login.microsoftonline.com/{tenantID}/oauth2/token")

In [0]:
from pyspark.sql import Row
import datetime

# Users_table
users_data = {
    "userId": "1",
    "macav_username": "Jugi",
    "x_name": "macav_ai",
    "x_id": "12345",
    "instagram_name": "macav_ai",
    "wallet_address": "0x4838B106FCe9647Bdf1E7877BF73cE8B0BAD5f95",
}

# Convert to Spark DataFrame
users_df = spark.createDataFrame([Row(**users_data)])
display(users_df)

In [0]:
from pyspark.sql import Row
import datetime
# Campaign_table


# Sample data
campaign_data = {
    "campaign_name": "Macav Marketing Campaign",
    "hashtags": ["macavai","macav"],
    "reward_like": 1,
    "reward_comment": 5,
    "reward_view": 10,    
    "campaign_startDate": "2025-01-01",
    "campaign_endDate": "2025-05-01",
    "creator_userid": "1",
    "campaign_contract": "0x4838B106FCe9647Bdf1E7877BF73cE8B0BAD5f88",
    "wallet_address": "0x4838B106FCe9647Bdf1E7877BF73cE8B0BAD5f95",
}

# Convert to Spark DataFrame
campaign_df = spark.createDataFrame([Row(**campaign_data)])
display(campaign_df)

Write delta

In [0]:
users_df.write \
    .format("delta") \
    .mode("overwrite") \
    .save(f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/datalake/users")

Write JSON

In [0]:
users_df.write \
    .format("json") \
    .mode("overwrite") \
    .save(f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/datalake/datalake/json/users")

json read

In [0]:
users_json = spark.read.format("json").load(f"abfs://{container_name}@{storage_account_name}.dfs.core.windows.net/datalake/datalake/json/users")
display(users_json)

delta read

In [0]:

users_delta = spark.read.format("delta").load(f"abfs://{container_name}@{storage_account_name}.dfs.core.windows.net/datalake/users")
display(users_delta)

Delta write

In [0]:
campaign_df.write \
    .format("delta") \
    .mode("overwrite") \
    .save(f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/datalake/campaigns")

Json write

In [0]:
campaign_df.write \
    .format("json") \
    .mode("overwrite") \
    .save(f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/datalake/datalake/json/campaigns")

Read JSON table

In [0]:

campaigns_json = spark.read.format("json").load(f"abfs://{container_name}@{storage_account_name}.dfs.core.windows.net/datalake/datalake/json/campaigns")
display(campaigns_json)

Read Delta table

In [0]:

campaigns_delta = spark.read.format("delta").load(f"abfs://{container_name}@{storage_account_name}.dfs.core.windows.net/datalake/campaigns")
display(campaigns_delta)