In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Load Data from Third-Party Sources into Vertex AI Search

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/search/import_3p_sources.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fsearch%2Fimport_3p_sources.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/search/import_3p_sources.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/search/import_3p_sources.ipynb">
      <img width="32px" src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

| | |
|-|-|
| Author(s) | [Holt Skinner](https://github.com/holtskinner) |

## Overview

This notebook describes how to connect third-party data sources to Vertex AI Search.

NOTE: This feature is only available to allowlisted users.

For more information, refer to the [documentation](https://cloud.google.com/generative-ai-app-builder/docs/connect-third-party-data-source).

## Get started

### Install Vertex AI SDK and other required packages


In [None]:
%pip install --upgrade --user --quiet google-cloud-discoveryengine atlassian-python-api

### Restart runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.

The restart might take a minute or longer. After it's restarted, continue to the next step.

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

If you're running this notebook on Google Colab, run the cell below to authenticate your environment.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
# Use the environment variable if the user doesn't provide Project ID.
import os

PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

## Atlassian Confluence

Before connecting to Vertex AI Search, the Atlassian data source must have OAuth configured correctly.

The following code performs many of the required configuration steps.

For more information, refer to the [documentation](https://cloud.google.com/generative-ai-app-builder/docs/connect-third-party-data-source#confluence).

### Create an Atlassian API token

Create an API token from your Atlassian account:

1. Log in to https://id.atlassian.com/manage-profile/security/api-tokens.

2. Click Create API token.

3. From the dialog that appears, enter a memorable and concise Label for your token and click Create.

4. Click Copy to clipboard, then paste the token to your script, or elsewhere to save:

In [None]:
# https://EXAMPLE.atlassian.net
ATLASSIAN_INSTANCE = "https://vertex-ai-search-test2.atlassian.net"
ATLASSIAN_API_TOKEN = ""

In [None]:
from atlassian import Confluence
import requests
from requests_oauthlib import OAuth2Session

# Constants
USERNAME = "your-email@example.com"
API_TOKEN = "your-api-token"
CLIENT_ID = "your-client-id"
CLIENT_SECRET = "your-client-secret"
REDIRECT_URI = (
    "https://vertexaisearch.cloud.google.com/console/oauth/confluence_oauth.html"
)
SCOPES = [
    "read:attachment:confluence",
    "read:configuration:confluence",
    "read:content-details:confluence",
    "read:content.metadata:confluence",
    "read:group:confluence",
    "read:space:confluence",
    "read:user:confluence",
]

# Step 1: Initialize Confluence client
confluence = Confluence(url=CONFLUENCE_URL, username=USERNAME, password=API_TOKEN)

# Step 2: Get all spaces
spaces = confluence.get_all_spaces()
print("Spaces:", spaces)

# Step 3: Retrieve cloudId
response = requests.get(
    f"{CONFLUENCE_URL}/_edge/tenant_info", auth=(USERNAME, API_TOKEN)
)
if response.status_code == 200:
    cloud_id = response.json().get("cloudId")
    print("Cloud ID:", cloud_id)
else:
    print("Failed to retrieve cloud ID:", response.status_code)

# Step 4: Initialize OAuth2 session and retrieve token
oauth = OAuth2Session(CLIENT_ID, redirect_uri=REDIRECT_URI, scope=SCOPES)
authorization_url, state = oauth.authorization_url(
    "https://auth.atlassian.com/authorize"
)
print("Visit this URL to authorize the application:", authorization_url)

# Fetch token after authorization
token = oauth.fetch_token(
    "https://auth.atlassian.com/oauth/token",
    client_secret=CLIENT_SECRET,
    authorization_response=input("Enter callback URL: "),
)

# Use OAuth token to access Confluence API
confluence_oauth = Confluence(url=CONFLUENCE_URL, token=token["access_token"])

# Example of retrieving content with OAuth
content = confluence_oauth.get_page_by_title(space="SPACE_KEY", title="Page Title")
print("Content:", content)

### Import libraries

In [None]:
# TODO: Add all library imports here

### Load model

In [None]:
# TODO: Uncomment and add model name
# MODEL_ID = ""  # @param {type:"string", isTemplate: true}

## Cleaning up

{TODO: Clean up resources created in this notebook. If no resources are created in this notebook, remove this section}