## Installing R packages

## Introduction

This notebook installs the required R libraries.

The libraries gets installed into `/usr/lib/R/site-library`

Afterwards, the libraries gets copied into `dbfs:/FileStore/Rpackage/rlibrary`

The libraries can then be easily applied in other notebooks by running: `%r .libPaths(c(.libPaths(), normalizePath("/dbfs/FileStore/Rpackage/rlibrary/")))`

## Install libaries

In [0]:
# ruff: noqa: E501

In [0]:
import os
import sys

sys.path.append(os.path.abspath(".."))
from databricks.sdk import WorkspaceClient

w = WorkspaceClient()
dbutils = w.dbutils

In [0]:
from typing import List


def check_libraries_exists_usr(expected_libraries: List[str], dbutils) -> None:
    """
    Checks if the expected R libraries exist in the local
    `/usr/lib/R/site-library` path.

    Args:
        expected_libraries (List[str]): A list of R library names to check.
        dbutils: Databricks utility object used to interact with the filesystem.

    Raises:
        Exception: If the subfolder for any library is not found in the specified path.
    """
    for library in expected_libraries:
        library_path = f"file:/usr/lib/R/site-library/{library}"
        try:
            if not any(dbutils.fs.ls(library_path)):
                raise Exception(
                    f"Subfolder for library {library} "
                    f"not found in file:/usr/lib/R/site-library"
                )
        except:  # noqa: E722
            raise Exception(
                f"Subfolder for library {library} "
                f"not found in file:/usr/lib/R/site-library"
            )


def check_libraries_exists_filestorage(expected_libraries: List[str], dbutils) -> None:
    """
    Checks if the expected R libraries exist
    in the Databricks FileStore path `/FileStore/Rpackage/rlibrary`.

    Args:
        expected_libraries (List[str]): A list of R library names to check.
        dbutils: Databricks utility object used to interact with the filesystem.

    Raises:
        Exception: If the subfolder for any library is not found in the specified path.
    """
    for library in expected_libraries:
        library_path = f"dbfs:/FileStore/Rpackage/rlibrary/{library}"
        try:
            if not any(dbutils.fs.ls(library_path)):
                raise Exception(
                    f"Subfolder for library {library} "
                    f"not found in dbfs:/FileStore/Rpackage/rlibrary"
                )
        except:  # noqa: E722
            raise Exception(
                f"Subfolder for library {library} "
                f"not found in dbfs:/FileStore/Rpackage/rlibrary"
            )


In [0]:
%r
# Define a dictionary with package names and versions
packages <- list(
  "ggplot2" = "3.5.1",
  "reshape" = "0.8.9",
)

# Loop through the dictionary and install each package
for (pkg in names(packages)) {
  version <- packages[[pkg]]
  url <- paste0("https://cran.r-project.org/package=", pkg, "&version=", version)
  cat("Installing package:", pkg, "version:", version, "\n")
  cat("======================================\n")
  utils::install.packages(pkgs = url, repos = NULL, lib="/usr/lib/R/site-library")
}


In [0]:
%r
path <-"/dbfs/FileStore/<your-package>.tar.gz"
install.packages(pkgs=path, repos = NULL, type="source", lib="/usr/lib/R/site-library")

## Ensure libaries are installed in file:/usr/lib/R/site-library

In [0]:
if not any(dbutils.fs.ls("file:/usr/lib/R/site-library")):
    raise Exception("Nothing to be found in file:/usr/lib/R/site-library")

In [0]:
expected_libraries = [
    "ggplot2",
    "reshape",
    "YourPackage",
]

In [0]:

# Check if each element in expected_libraries has a corresponding subfolder in the directory
check_libraries_exists_usr(expected_libraries, dbutils)

## Clear dbfs R packages

In [0]:
# Clear the library
dbutils.fs.rm("dbfs:/FileStore/Rpackage/rlibrary", True)

## Copy R packages to DBFS

In [0]:
# Specify the source local path
local_source = "file:/usr/lib/R/site-library"

# Specify the destination DBFS location
dbfs_destination = "dbfs:/FileStore/Rpackage/rlibrary"

# Copy files and folders from local path to DBFS
dbutils.fs.cp(local_source, dbfs_destination, recurse=True)

## Ensure libaries are installed in dbfs:/FileStore/Rpackage/rlibrary

In [0]:
if not any(dbutils.fs.ls("dbfs:/FileStore/Rpackage/rlibrary")):
    raise Exception("Nothing to be found in dbfs:/FileStore/Rpackage/rlibrary")

In [0]:
check_libraries_exists_filestorage(expected_libraries, dbutils)