In [1]:
import os
import json
import requests
import delta
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.types import StructField, StructType, StringType, ArrayType
import env.config

In [2]:
def get_spark() -> SparkSession:
    builder = (
        pyspark.sql.SparkSession.builder
        .master("local[*]")
        .appName("TestApp")
        .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
        .config("spark.sql.catalog.spark_catalog","org.apache.spark.sql.delta.catalog.DeltaCatalog")
    )
    spark = delta.configure_spark_with_delta_pip(builder).getOrCreate()
    spark.sparkContext.setLogLevel("ERROR")
    return spark

In [3]:
def call_data_gov_api(uri):
    """
    """

    url = "https://api.data.gov.in"
    api_key = env.config.api_key
    resp_format = "json"
    limit = 500
    offset = 0
    resp = requests.get("{}{}?api-key={}&format={}&limit={}&offset={}".format(url,uri,api_key,resp_format,limit,offset))

    return resp

In [4]:
dir = os.getcwd()
spark = get_spark()

uri = "/resource/81153f15-b4da-45b5-a299-f307351c5001"
resp = call_data_gov_api(uri)
resp.json()['records']

json_array_schema = StructType([
    StructField('sno', StringType(), nullable=False), 
    StructField('memberid', StringType(), nullable=False),
    StructField('name', StringType(), nullable=False),
    StructField('party', StringType(), nullable=False),
    StructField('constituencies', StringType(), nullable=False),
    StructField('state', StringType(), nullable=False), 
    StructField('paddress', StringType(), nullable=False),
    StructField('permanentphone', StringType(), nullable=False),
    StructField('localaddress', StringType(), nullable=False),
    StructField('localphone', StringType(), nullable=False),
    StructField('emailid', StringType(), nullable=False)
  ])

# Read as a dictionary
df = spark.createDataFrame(data=resp.json()['records'], schema = json_array_schema)
df.write.format("delta").mode("overwrite").save(dir + os.sep + "/data/ls_members")
df.show(truncate=True)

:: loading settings :: url = jar:file:/Users/subhashpeshwa/miniconda/lib/python3.10/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /Users/subhashpeshwa/.ivy2/cache
The jars for the packages stored in: /Users/subhashpeshwa/.ivy2/jars
io.delta#delta-core_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-38f35613-d029-44e4-a082-ee1900f0dad2;1.0
	confs: [default]
	found io.delta#delta-core_2.12;2.4.0 in central
	found io.delta#delta-storage;2.4.0 in central
	found org.antlr#antlr4-runtime;4.9.3 in central
:: resolution report :: resolve 180ms :: artifacts dl 8ms
	:: modules in use:
	io.delta#delta-core_2.12;2.4.0 from central in [default]
	io.delta#delta-storage;2.4.0 from central in [default]
	org.antlr#antlr4-runtime;4.9.3 from central in [default]
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	---------------------------------------------------------------------
	|      default     |

+---+--------+--------------------+-----+---------------+-----------------+--------------------+--------------------+--------------------+--------------------+--------------------+
|sno|memberid|                name|party| constituencies|            state|            paddress|      permanentphone|        localaddress|          localphone|             emailid|
+---+--------+--------------------+-----+---------------+-----------------+--------------------+--------------------+--------------------+--------------------+--------------------+
|  1|    2654|Abdullah, Dr. Farooq|J&KNC|       Srinagar|Jammu and Kashmir|40, Gupkar Road, ...|Tel : (0194) 2452...|AB - 9, Tilak Mar...|Telefax : (011) 2...|iamfarooq70@hotma...|
|  2|    4143|Adhalrao Patil, S...|   SS|         Shirur|      Maharashtra|At &amp; P.O. Lan...|Tel : (02133) 235...|59, Lodhi Estate,...|Tels. : (011) 246...|shivajirao@sansad...|
|  3|    4847|Adhikari, Shri De...| AITC|         Ghatal|      West Bengal|Flat No. 29-B, To...