In [1]:
# Change to false if you use it for the personal purpose and want to see real names for hospital, addresses, etc
IS_MASKED_DATA = True 

StatementMeta(, 7b028b04-bfd6-47cc-a9d7-6196d585c177, 5, Finished, Available, Finished)

In [2]:
%run 0_ReviseCare_Secrets

StatementMeta(, 7b028b04-bfd6-47cc-a9d7-6196d585c177, 6, Finished, Available, Finished)

Variables GMAP_API_KEY, OPENAI_API_KEY, AZURE_OPEN_AI_KEY, AZURE_OPEN_AI_ENDPOINT were initialized


In [3]:
import googlemaps
import time
import json
import random
import string

# For utilizing GenAI engines
from openai import OpenAI
from openai import AzureOpenAI

# Registering UDF functions
from pyspark.sql.functions import udf, lit
from pyspark.sql.types import StringType

StatementMeta(, 7b028b04-bfd6-47cc-a9d7-6196d585c177, 7, Finished, Available, Finished)

In [11]:
# Initialize the client with your API key
gmaps = googlemaps.Client(key=GMAP_API_KEY)
print("Google client gmaps was is initialized")

StatementMeta(, 8fcd8a0a-e338-4f0c-9b9d-08565a36926b, 15, Finished, Available, Finished)

Google client gmaps was is initialized


In [12]:
def list_to_dataframe(data: list):
    json_data = []

    for i in data:
        jsonData = json.dumps(i)
        json_data.append(jsonData)

    jsonRDD = sc.parallelize(json_data)

    df = spark.read.json(jsonRDD)

    return df

print("Function list_to_dataframe() is initialized")

StatementMeta(, 8fcd8a0a-e338-4f0c-9b9d-08565a36926b, 16, Finished, Available, Finished)

Function list_to_dataframe() is initialized


In [13]:
def get_ids_from_config(hospitals_config: str) -> str:
    place_ids = []
    
    for r in hospitals_config.split('\n'):
        if r.strip() == '':
            continue
        place_name, place_id = r.split(':')
        place_ids.append(place_id)

    return place_ids

print("Function get_ids_from_config() is initialized")

StatementMeta(, 8fcd8a0a-e338-4f0c-9b9d-08565a36926b, 17, Finished, Available, Finished)

Function get_ids_from_config() is initialized


In [14]:
def get_hospital_details(place_id):
    # Get detailed information about a hospital using the Place Details API
    fields = ["name", "business_status", "formatted_address", "place_id", "review", "rating", "user_ratings_total"]
    
    details = gmaps.place(place_id=place_id, fields=fields)
    return details['result']

print("Function get_hospital_details() is initialized")

StatementMeta(, 8fcd8a0a-e338-4f0c-9b9d-08565a36926b, 18, Finished, Available, Finished)

Function get_hospital_details() is initialized


In [15]:
def openai_get_answer(instructions: str, question: str, data: str):
    if len(AZURE_OPEN_AI_KEY)>0:
        API_VERSION = "2024-02-01"
        MODEL_NAME = "gpt-4o"

        client = AzureOpenAI(
            azure_endpoint=AZURE_OPEN_AI_ENDPOINT,
            api_key=AZURE_OPEN_AI_KEY,
            api_version=API_VERSION,
        )
    else:
        client = OpenAI(api_key = OPENAI_API_KEY)
        MODEL_NAME = "gpt-4o-mini"
    
    '''
    instructions: should limit the answers
    question: formulates the main question
    data: provides the context
    '''
    
    MESSAGES = [
        {"role": "system", "content": instructions},
        {"role": "user", "content": question},
        {"role": "user", "content": f"data for analysis: {data}"},
    ]

    completion = client.chat.completions.create(
        model=MODEL_NAME,
        messages=MESSAGES
    )

    answer = completion.choices[0].message.content

    return answer

print("Function openai_get_answer() is initialized")

openai_udf = udf(openai_get_answer, StringType())
spark.udf.register("openai_udf", openai_get_answer, StringType())
print("UDF Function openai_udf() is initialized")

StatementMeta(, 8fcd8a0a-e338-4f0c-9b9d-08565a36926b, 19, Finished, Available, Finished)

Function openai_get_answer() is initialized
UDF Function openai_udf() is initialized


In [9]:
def string_masking(input_str):
    def get_replacement(char, charset):
        # Use hash of the character to get a consistent index in the charset
        index = abs(hash(char)) % len(charset)
        return charset[index]
    
    if IS_MASKED_DATA:    
        # Character sets for replacement
        lowercase = string.ascii_lowercase
        uppercase = string.ascii_uppercase
        digits = string.digits

        result = []
        for char in input_str:
            if char.islower():  # Replace lowercase letters
                result.append(get_replacement(char, lowercase))
            elif char.isupper():  # Replace uppercase letters
                result.append(get_replacement(char, uppercase))
            elif char.isdigit():  # Replace digits
                result.append(get_replacement(char, digits))
            else:  # Keep other symbols unchanged
                result.append(char)

        return 'Masked:' + ''.join(result)

print("Function string_masking() is initialized")

string_masking_udf = udf(string_masking, StringType())
spark.udf.register("string_masking_udf", string_masking, StringType())
print("UDF Function string_masking_udf() is initialized")

StatementMeta(, 7b028b04-bfd6-47cc-a9d7-6196d585c177, 13, Finished, Available, Finished)

Function string_masking() is initialized
UDF Function string_masking_udf() is initialized
