# Ingest Data from gen_ClientServiceCase Using API

### Imports

In [1]:
from pyspark.sql import SparkSession
import requests
from requests.auth import HTTPBasicAuth
import json
import pandas as pd
from pyspark.sql.functions import to_timestamp, when, col, lit, udf

StatementMeta(, 7604c3bb-76c7-44e8-b0a9-9f38acad5224, 3, Finished, Available, Finished)

### Get data function

In [2]:
def get_data_from_api(page, pageSize):
    payload = {
        "Fields": [
            {
                "Name": "fkUserIDAssignedTo"
            },
            {
                "Name": "CaseNotes"
            },
            {
                "Name": "CaseNumber"
            },
            {
                "Name": "crCategory"
            },
            {
                "Name": "CreateDate"        },
            {
                "Name": "CaseInfo"
            },
            {
                "Name": "fkEmployeeID"
            },
#            {
#                "Name": "z_NoticeCause"
#            },
#            {
#                "Name": "z_NoticeStateSelect"
#            },
            {
                "Name": "fkOwnerUserID"
            },
            {
                "Name": "luPriority"
            },
            {
                "Name": "CallerName"
            },
            {
                "Name": "fkReportedByEmployeeID"
            },
            {
                "Name": "Resolution"
            },
            {
                "Name": "ResolutionDate"
            },
            {
                "Name": "luStatus"
            },
            {
                "Name": "Subject"
            },
            {
                "Name": "fkCaseTypeID"
            }
        ],
        "SortCol": "CreateDate",
        "Page": page,
        "PageSize": pageSize
    }
    response = requests.post(url, auth=HTTPBasicAuth(username, password), json=payload)
    if response.status_code == 200:
        return response.json()["Data"]
    else:
        print(f"Request error: {response.status_code} - {response.text}")
        return None

StatementMeta(, 7604c3bb-76c7-44e8-b0a9-9f38acad5224, 4, Finished, Available, Finished)

### Save data function

In [3]:
def process_and_save_data(data):
    df = pd.DataFrame(data)
    spark_df = spark.createDataFrame(df)

    df_mapped = spark_df.select(
        when(col("fkUserIDAssignedTo").isNotNull(), col("fkUserIDAssignedTo").cast("int")).alias("fkUserIDAssignedTo"),
        when(col("CaseNotes").isNotNull(), col("CaseNotes")).otherwise(lit("Unknown")).alias("CaseNotes"),
        when(col("CaseNumber").isNotNull(), col("CaseNumber").cast("int")).alias("CaseNumber"),
        col("crCategory").alias("crCategory"),
        when(to_timestamp(col("CreateDate"), 'yyyy-MM-dd\'T\'HH:mm:ss\'Z\'').isNotNull(), to_timestamp(col("CreateDate"), 'yyyy-MM-dd\'T\'HH:mm:ss\'Z\'')).alias("CreateDate"),
        col("CaseInfo").alias("CaseInfo"),
        when(col("fkEmployeeID").isNotNull(), col("fkEmployeeID").cast("int")).alias("fkEmployeeID"),
        #col("z_NoticeCause").alias("z_NoticeCause"),
        #col("z_NoticeStateSelect").alias("z_NoticeStateSelect"),
        when(col("fkOwnerUserID").isNotNull(), col("fkOwnerUserID").cast("int")).alias("fkOwnerUserID"),
        col("luPriority").alias("luPriority"),
        col("CallerName").alias("CallerName"),
        when(col("fkReportedByEmployeeID").isNotNull(), col("fkReportedByEmployeeID").cast("int")).alias("fkReportedByEmployeeID"),
        when(col("Resolution").isNotNull(), col("Resolution")).otherwise(lit("Unknown")).alias("Resolution"),
        when(to_timestamp(col("ResolutionDate"), 'yyyy-MM-dd\'T\'HH:mm:ss\'Z\'').isNotNull(), to_timestamp(col("ResolutionDate"), 'yyyy-MM-dd\'T\'HH:mm:ss\'Z\'')).alias("ResolutionDate"),
        col("luStatus").alias("luStatus"),
        col("Subject").alias("Subject"),
        when(col("fkCaseTypeID").isNotNull(), col("fkCaseTypeID").cast("int")).alias("fkCaseTypeID")
    )
    
    df_mapped.write.mode("overwrite").parquet(parquet_path)
    df_output = spark.read.parquet(parquet_path)
    df_output.write.mode("append").format("delta").saveAsTable(table)

StatementMeta(, 7604c3bb-76c7-44e8-b0a9-9f38acad5224, 5, Finished, Available, Finished)

### Inicialization

In [4]:
spark = SparkSession.builder.appName("API Data Extraction").getOrCreate()

url = "https://vensureqa.clientspace.net/next/api/dataform/v3.0/query/gen_ClientServiceCase"
username = "api_creai"
password = "Vensure1$"
parquet_path = "abfss://CreaiFabricCapacity@onelake.dfs.fabric.microsoft.com/CreaiLakehouse.Lakehouse/Files/genClientServiceCase.parquet"
table = "genClientServiceCase"

StatementMeta(, 7604c3bb-76c7-44e8-b0a9-9f38acad5224, 6, Finished, Available, Finished)

### Main function

In [5]:
page = 1
pageSize = 50
lastPage = 2

while True:
    data = get_data_from_api(page, pageSize)
    
    if not data:  
        break

    process_and_save_data(data)
    
    if len(data) < pageSize or page >= lastPage:
        break

    print(page)

    page += 1


StatementMeta(, 7604c3bb-76c7-44e8-b0a9-9f38acad5224, 7, Finished, Available, Finished)

1
2
