# **Cocoon: Database Transform**
Given the catalog of source and target databases, we automatically transform them in SQL

## **You Need...**

1. LLM API
2. Compiled DBT directory



In [None]:
! pip install cocoon_data==0.1.155

In [None]:
from cocoon_data import *

In [None]:
#@title  Download example dbt project (skip this step if you have your own)
import requests
import os
import base64

def url_path_join(*args):
    """Join path components for URL using forward slashes."""
    return '/'.join(arg.strip('/') for arg in args)

def download_github_directory(repo_owner, repo_name, directory_path, local_path):
    # GitHub API endpoint
    api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{directory_path}"

    # Send a GET request to the GitHub API
    response = requests.get(api_url)

    # Check if the request was successful
    if response.status_code == 200:
        # Create the local directory if it doesn't exist
        os.makedirs(local_path, exist_ok=True)

        # Parse the JSON response
        contents = response.json()

        # Loop through each file in the directory
        for item in contents:
            if item['type'] == 'file':
                # Get the file content
                file_content = requests.get(item['download_url']).content

                # Save the file locally
                file_path = os.path.join(local_path, item['name'])
                with open(file_path, 'wb') as file:
                    file.write(file_content)
                print(f"Downloaded: {file_path}")
            elif item['type'] == 'dir':
                # If it's a subdirectory, recursively download its contents
                subdir_path = url_path_join(directory_path, item['name'])
                local_subdir_path = os.path.join(local_path, item['name'])
                download_github_directory(repo_owner, repo_name, subdir_path, local_subdir_path)
    else:
        print(f"Failed to retrieve directory contents. Status code: {response.status_code}")

# Usage
repo_owner = "Cocoon-Data-Transformation"
repo_name = "cocoon"

source_project_name = "tpch"
source_directory_path = f"documentation/model_gallery/{source_project_name}"
source_dbt_directory = os.path.join(".", source_project_name)

target_project_name = "ssb"
target_directory_path = f"documentation/model_gallery/{target_project_name}"
target_dbt_directory = os.path.join(".", target_project_name)


download_github_directory(repo_owner, repo_name, source_directory_path, source_dbt_directory)
download_github_directory(repo_owner, repo_name, target_directory_path, target_dbt_directory)

In [None]:
#@title Provide your LLM API (prefer claude 3.5 sonnet)

# if you use Anthropic for claude 3.5 sonnet
cocoon_llm_setting['api_type'] = 'Anthropic'
os.environ["ANTHROPIC_API_KEY"] = "sk-ant-api..."
cocoon_llm_setting['aws_access_key'] = "claude-3-5-sonnet-20240620"

# # if you use Vertex AI for claude 3.5 sonnet
# # if you use google colab
# # from google.colab import auth
# # auth.authenticate_user()
# cocoon_llm_setting['api_type'] = 'AnthropicVertex'
# cocoon_llm_setting['vertex_region'] = "us-east5"
# cocoon_llm_setting['vertex_project_id'] = ""
# cocoon_llm_setting['vertex_model'] = "claude-3-5-sonnet@20240620"

# # if you use Bedrock for claude 3.5 sonnet
# cocoon_llm_setting['api_type'] = 'AnthropicBedrock'
# cocoon_llm_setting['aws_access_key'] = "..."
# cocoon_llm_setting['aws_secret_key'] = "..."
# cocoon_llm_setting['aws_region'] = "us-east-1"
# cocoon_llm_setting['aws_model'] = "anthropic.claude-3-5-sonnet-20240620-v1:0"

# # if you use Vertex AI for llama3
# cocoon_llm_setting['api_type'] = 'Llama3Vertex'
# cocoon_llm_setting['vertex_region'] = "us-east5"
# cocoon_llm_setting['vertex_project_id'] = ""

# # if you use Bedrock for llama3
# cocoon_llm_setting['api_type'] = 'Llama3Bedrock'
# cocoon_llm_setting['aws_access_key'] = "..."
# cocoon_llm_setting['aws_secret_key'] = "..."
# cocoon_llm_setting['aws_region'] = "us-west-2"

# # if you use Open AI
# cocoon_llm_setting['api_type'] = "openai"
# cocoon_llm_setting['api_key'] = "sk-proj-..."
# cocoon_llm_setting['openai_model'] = "gpt-4-turbo"

# # if you use Azure Open AI
# cocoon_llm_setting['api_type'] = "azure"
# # azure openai key and endpoint
# cocoon_llm_setting['api_key'] = "..."
# cocoon_llm_setting['api_base'] = "https://xxx.openai.azure.com/"
# # deployed model in azure openai
# cocoon_llm_setting['api_version'] = "2023-12-01-preview"
# cocoon_llm_setting['azure_engine'] = "xxx"

# test if LLM works
test_message = "hello"
messages = [{"role": "user", "content": test_message}]
response = call_llm_chat(messages, temperature=0.1, top_p=0.1, use_cache=False)
print(response['choices'][0]['message']['content'])

Hello! How can I assist you today? Feel free to ask me any questions or let me know if you need help with anything.


In [None]:
source_dbt_directory = "./tpch"
target_dbt_directory = "./ssb"


_, cocoon_workflow = create_cocoon_workflow(con=None,
                    para={"source_dbt_directory": source_dbt_directory,
                          "target_dbt_directory": target_dbt_directory,})

In [None]:
# Note that Cocoon currently can't change the previous step
# You need to rerun the 'create_cocoon_workflow(con)' above 👆
cocoon_workflow.start()

VBox(children=(Dropdown(options=(('All Column Matching', 0), ('stg_customer', 1), ('stg_date', 2), ('stg_lineo…

VBox(children=(Text(value='./transform.html', description='HTML'), Checkbox(value=False, description='Allow Ov…