In [0]:
%pip install -U -q -r ../requirements.txt
dbutils.library.restartPython()

In [0]:
%run ../Main_Config

In [0]:
#Getting all the sharepoint details
from urllib.parse import urlparse, parse_qs, unquote
import requests
import json
from pyspark.sql.functions import col


def extract_sharepoint_domain_and_path(sharepoint_url:str):
    """
    Parses a SharePoint URL to extract the domain and site path.

    Parameters:
    - sharepoint_url (str): Full SharePoint URL (e.g., from web or API)

    Returns:
    - tuple: (SHAREPOINT_DOMAIN, SITE_PATH) or (None, None) on failure
    """
    try:
        # Parse the SharePoint URL
        parsed_url = urlparse(sharepoint_url)
        SHAREPOINT_DOMAIN = parsed_url.netloc

        # Extract Site Path from Query Parameters
        query_params = parse_qs(parsed_url.query)
        site_path_encoded = query_params.get("id", [""])[0]

        # Decode percent-encoded characters
        site_path_decoded = unquote(site_path_encoded)

        # Trim the path to just the site root (e.g., "/sites/mysite")
        SITE_PATH = "/".join(site_path_decoded.split("/")[:3])

        logger.info(f"SHAREPOINT_DOMAIN: {SHAREPOINT_DOMAIN}")
        logger.info(f"SITE_PATH: {SITE_PATH}")

        return SHAREPOINT_DOMAIN, SITE_PATH

    except KeyError as ke:
        logger.error(f"Key error while parsing URL: {ke}", exc_info=True)
        return None, None
    except ValueError as ve:
        logger.error(f"Value error: {ve}", exc_info=True)
        return None, None
    except Exception as e:
        logger.error(f"Unexpected error while parsing SharePoint URL: {e}", exc_info=True)
        return None, None

#getting access tokens from the url
SHAREPOINT_DOMAIN,SITE_PATH =  extract_sharepoint_domain_and_path(sharepoint_url = SHAREPOINT_URL)
 
AUTH_URL = authentication_url

def get_access_token():
    """
    Requests an OAuth2 token using client credentials to access Microsoft Graph API.

    Returns:
    - str: Access token string if successful
    - None: If request fails
    """
    try:
        data = {
            "grant_type": "client_credentials",
            "client_id": CLIENT_ID,
            "client_secret": CLIENT_SECRET,
            "scope": microsoft_graph_default
        }
        response = requests.post(AUTH_URL, data=data) #sends post request to auth_url
        response.raise_for_status()
        return response.json()["access_token"]
   
    except requests.exceptions.RequestException as e:
        logger.error(f"Error fetching access token: {e}", exc_info=True)
        return None  # Return None in case of failure

#getting sharepoint site id with OAuth Token
def get_site_id():
    """
    Fetches the SharePoint site ID using the domain and site path via Microsoft Graph API.

    Returns:
    - str: Site ID if successful
    - None: If request fails
    """
    try:
        token = get_access_token() #get OAuth token
        headers = {"Authorization": f"Bearer {token}"}
       
        SITE_URL = microsoft_graph_url + f"{SHAREPOINT_DOMAIN}" + site_url_second + f"{SITE_PATH}"

        response = requests.get(SITE_URL, headers=headers) # get request to fetch sharepoint site data
        response.raise_for_status()
       
        site_data = response.json()
      
        return site_data["id"]
   
    except requests.exceptions.RequestException as e:
        logger.error(f"Error fetching access token: {e}", exc_info=True)
        return None  # Return None in case of failure

#Fetch Drive ID from Sharepoint using OAuth Token
def get_drive_id():
    """
    Retrieves the default drive ID associated with a SharePoint site using the site ID.

    Returns:
    - str: Drive ID if successful
    - None: If request fails
    """
    try:
        site_id = get_site_id()
        token = get_access_token()
        headers = {"Authorization": f"Bearer {token}"}
       
        DRIVE_URL = microsoft_graph_url + f"{site_id}" + drive_url_second

        response = requests.get(DRIVE_URL, headers=headers)
        response.raise_for_status()
        drive_data = response.json()["value"]
        return drive_data[0]["id"]
    
    except requests.exceptions.RequestException as e:
        logger.error(f"Error fetching access token: {e}", exc_info=True)
        return None  # Return None in case of failure

In [0]:
#For passing values between tasks
token = get_access_token()
site_id = get_site_id()
drive_id = get_drive_id()
try:
    dbutils.jobs.taskValues.set(key="access_token", value=token)
    dbutils.jobs.taskValues.set(key="site_id", value=site_id)
    dbutils.jobs.taskValues.set(key="drive_id", value=drive_id)
# dbutils.jobs.taskValues.set(key="logger", value=logger) not possible 
except:
    logger.error("Error setting task values")