In [None]:
# Setup
%run ./Includes/Classroom-Setup-01

In [None]:
# Other conventions
# List the DA object components
print(f"Username: {DA.username}")
print(f"Catalog Name: {DA.catalog_name}")
print(f"Schema Name: {DA.schema_name}")
print(f"Working Directory: {DA.paths.working_dir}")
print(f"Dataset Location: {DA.paths.datasets}")
print(f"Secondary Principal: {DA.iam.secondary}")
print(f"Cluster Name: {DA.cluster_name}")

In [None]:
# Implementing a Sample GitHub Application
# Python variable TOKEN
DBACADEMY_GITHUB_TOKEN = "github_pat_11BG040804840080ASF804ASF4A0F0SAF0SAF04A0F15C90048C0E0AC4ECC0S4F10E5C"

In [None]:
# Query repositories through Databricks SDK, PyGithub, PySpark
from github import Github
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

if not DBACADEMY_GITHUB_TOKEN:
    raise ValueError("GitHub token is not set. Please set the DBACADEMY_GITHUB_TOKEN variable.")

try:
    # Initialize GitHub client
    g = Github(DBACADEMY_GITHUB_TOKEN)
    user = g.get_user()
    repos = list(user.get_repos())
    repo_count = len(repos)
    
    # Print the repository count
    print(f"Number of repositories fetched: {repo_count}\n")
except:
    raise ValueError("Error retrieving repositories from GitHub: {e}")

# Check if repositories exist
if repo_count == 0:
    raise ValueError("No repositories found. Please check your GitHub token and permissions.")
else:
    # Extract required repository information
    data = [(repo.name, repo.git_url, repo.created_at, repo.open_issues_count, repo.visibility, repo.watchers_count) for repo in repos]
    
    # Convert to Spark Dataframe
    df = spark.createDataFrame(data, ["name", "git_url", "created_at", "open_issues_count", "visibility", "watchers_count"])
    
    # Display the DataFrame in Databricks notebook
    display(df)

In [None]:
# Solving the problem with parametrization
# Create widgets to display GitHub credentials
dbutils.widgets.text(name='gitHub_token', defaultValue='')

In [None]:
# Query repositories through Databricks SDK, PyGithub, PySpark
# Adjust to use widgets instead of hardcoded variables
import os
from github import Github
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

# Retrieve values from widgets
GITHUB_TOKEN = dbutils.widgets.get("github_token")

if not GITHUB_TOKEN:
    raise ValueError("GitHub token is not set. Please set the DBACADEMY_GITHUB_TOKEN variable.")

try:
    # Initialize GitHub client
    g = Github(GITHUB_TOKEN)
    user = g.get_user()
    repos = list(user.get_repos())
    repo_count = len(repos)
    
    # Print the repository count
    print(f"Number of repositories fetched: {repo_count}\n")
except:
    raise ValueError("Error retrieving repositories from GitHub: {e}")

# Check if repositories exist
if repo_count == 0:
    raise ValueError("No repositories found. Please check your GitHub token and permissions.")
else:
    # Extract required repository information
    data = [(repo.name, repo.git_url, repo.created_at, repo.open_issues_count, repo.visibility, repo.watchers_count) for repo in repos]
    
    # Convert to Spark Dataframe
    df = spark.createDataFrame(data, ["name", "git_url", "created_at", "open_issues_count", "visibility", "watchers_count"])
    
    # Display the DataFrame in Databricks notebook
    display(df)

In [None]:
# Solving problem with Databricks Secrets
# SDK Setup
# Install SDK
%sh pip install --upgrade databricks-sdk

In [None]:
# Databricks SQL Connector
%sh pip install --upgrade databricks-sql-connector

In [None]:
# Record credentials
# Create widgets
dbutils.widgets.text(name='db_token', defaultValue='')
dbutils.widgets.text(name='url', defaultValue='')

In [None]:
# Generate Databricks Credentials
# Configuring the SDK
import os

os.environ["DBACADEMY_GITHUB_TOKEN"] = dbutils.widgets.get("github_token")
os.environ["DATABRICKS_TOKEN"] = dbutils.widgets.get("db_token")
os.environ["DATABRICKS_HOST"] = dbutils.widgets.get("url")

In [None]:
# Workspace client object
from databricks.sdk import WorkspaceClient
import os

# Create a workspace client
w = WorkspaceClient(
    host=os.environ["DATABRICKS_HOST"],
    token=os.environ["DATABRICKS_TOKEN"]
)

In [None]:
# Creating a Secret Scope
scope_name = "mysecrets_cli"

try:
    w.secrets.create_scope(scope_name)
    print(f"Secret scope '{scope_name}' created successfully!")
except Exception as e:
    print(f"Error in creating '{scope_name}' scope. Error: {e}")

In [None]:
# Listing all secrets scopes
try:
    scopes = w.secrets.list_scopes()
    print("Secret Scopes:")
    for scope in scopes:
        print(f" - {scope.name}")
except Exception as e:
    print(f"Error in listing secret scopes. Error: {e}")

In [None]:
# Adding secrets
scope_name = "mysecrets_cli"
secrets_dict = {
    "github_token": os.environ["DBACADEMY_GITHUB_TOKEN"]
}

# Add multiple secrets to same scope
try:
    for key, value in secrets_dict.items():
        w.secrets.put_secret(scope=scope_name, key=key, value=value)
        print(f"Secret '{key}' added to scope '{scope_name}' successfully!")
except Exception as e:
    print(f"Error in adding secrets to '{scope_name}' scope. Error: {e}")

In [None]:
# Listing secrets
scope_name = "mysecrets_cli"
try:
    secrets = w.secrets.list_secrets(scope=scope_name)
    print(f"Secrets in scope '{scope_name}':")
    for secret in secrets:
        print(f" - {secret.key}")
except Exception as e:
    print(f"Error in listing secrets in '{scope_name}' scope. Error: {e}")

In [None]:
# Using secrets
import os
from github import Github
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

# Retrieve values from widgets
GITHUB_TOKEN = dbutils.widgets.get("mysecrets_cli", "github_token")

if not GITHUB_TOKEN:
    raise ValueError("GitHub token is not set. Please set the DBACADEMY_GITHUB_TOKEN variable.")

try:
    # Initialize GitHub client
    g = Github(GITHUB_TOKEN)
    user = g.get_user()
    repos = list(user.get_repos())
    repo_count = len(repos)
    
    # Print the repository count
    print(f"Number of repositories fetched: {repo_count}\n")
except:
    raise ValueError("Error retrieving repositories from GitHub: {e}")

# Check if repositories exist
if repo_count == 0:
    raise ValueError("No repositories found. Please check your GitHub token and permissions.")
else:
    # Extract required repository information
    data = [(repo.name, repo.git_url, repo.created_at, repo.open_issues_count, repo.visibility, repo.watchers_count) for repo in repos]
    
    # Convert to Spark Dataframe
    df = spark.createDataFrame(data, ["name", "git_url", "created_at", "open_issues_count", "visibility", "watchers_count"])
    
    # Display the DataFrame in Databricks notebook
    display(df)

In [None]:
# Get Secrets Values
dbutils.secrets.get("mysecrets_cli", "github_token")

In [None]:
# Access control
# Granting access to secrets
from databricks.sdk.service.workspace import AclPermission

# Define the scope name
scope_name = "mysecrets_cli"

# Define the user/group and their permission level
permissions = [
    {
        "principal": "users",
        "permission": AclPermission.READ # Read access
    },
    {
        "principal": "admins",
        "permission": AclPermission.MANAGE # Full access
    }
]

# Grant access to the secret scope
try:
    for permission in permissions:
        w.secrets.put_acl(scope=scope_name, principal=permission["principal"], permission=permission["permission"])
        print(f"Access granted to '{permission['principal']}' with '{permission['permission']}' permission on scope '{scope_name}'")
except Exception as e:
    print(f"Error in granting access to '{scope_name}' scope. Error: {e}")

In [None]:
# List Grants ACL
# Define scope
scope_name = "mysecrets_cli"

# List all grants for the secret scope
try:
    acls = w.secrets.list_acls(scope=scope_name)
    print(f"Grants for scope '{scope_name}':")
    for acl in acls:
        print(f" - Principal: {acl.principal}, Permission: {acl.permission.value}")
except Exception as e:
    print(f"Error in listing grants for '{scope_name}' scope. Error: {e}")

In [None]:
# Revoking grants
# Define scope
scope_name = "mysecrets_cli"

# Listing grants for the scope
try:
    # Fetch ACLs for the scope
    acls = w.secrets.list_acls(scope=scope_name)
    
    # Display results
    for acl in acls:
        print(f" - Principal: {acl.principal}, Permission: {acl.permission.value}")
except Exception as e:
    print(f"Error in listing grants for '{scope_name}' scope. Error: {e}")

In [None]:
# Clean Up
# Define scope name
scope_name = "mysecrets_cli"

try:
    w.secrets.delete_scope(scope_name)
    print(f"Secret scope '{scope_name}' deleted successfully!")
except Exception as e:
    print(f"Error in deleting '{scope_name}' scope. Error: {e}")

In [None]:
dbutils.widgets.removeAll()