# **Cocoon: Fuzzy Union, Table Transformation, Common Data Model**


## **You Need...**

1. LLM API (only support openai for now; please send a feature request for other API)
2. Data Warehouse Con (Snowflake/DuckDB/csv)



In [None]:
! pip install cocoon_data==0.1.107

In [None]:
from cocoon_data import *

In [None]:
#@title  Download example table (skip this step if you have your own table)
import requests

files = {
    "person_example.csv": "https://raw.githubusercontent.com/Cocoon-Data-Transformation/cocoon/main/files/person_example.csv",
    "patients.csv": "https://raw.githubusercontent.com/Cocoon-Data-Transformation/cocoon/main/files/patients.csv",
}

# Loop through the files dictionary
for file_name, url in files.items():
    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Open file in binary write mode
        with open(file_name, "wb") as file:
            file.write(response.content)
        print(f"{file_name} downloaded successfully.")
    else:
        print(f"Failed to download {file_name}.")

In [None]:
#@title Provide your LLM API

# if you use Open AI, please ensure GPT-4 is available
openai.api_key  = ''

# # if you use anthropic, please ensure Claude 3 Opus is available
# os.environ["ANTHROPIC_API_KEY"] = ""
# openai.api_type ='claude'

# # if you use Vertex AI, please ensure Claude 3 Opus is available
# openai.api_type = 'AnthropicVertex'
# os.environ['AnthropicVertex_region'] = "us-east5"
# os.environ['AnthropicVertex_project_id'] = ""

# test if LLM works
test_message = "hello"
messages = [{"role": "user", "content": test_message}]
response = call_llm_chat(messages, temperature=0.1, top_p=0.1)
print(response['choices'][0]['message']['content'])

In [None]:
# @title Provide Data Warehouse Con

# In-mem duckdb loads the df
con = duckdb.connect(database=':memory:')
for csv_file in files:
    file_path = f'./{csv_file}'

    df = pd.read_csv(file_path)
    table_name = os.path.basename(file_path).split('.')[0]
    table_name = clean_table_name(table_name)

    query_widget = QueryWidget(con)
    con.register(table_name, df)

# # Snowflake: specify the con info and table ame
# con = snowflake.connector.connect(
#     # check out your url: {account}.snowflakecomputing.com
#     account="",
#     user="",
#     password="",
#     warehouse="",
#     database="",
#     schema="",
# )
# # Please enter the table to stage/clean
# # Make sure it's a table (queries over view can be slow)

In [None]:
query_widget, cocoon_workflow = create_cocoon_workflow(con)

In [None]:
query_widget.display()

In [None]:
cocoon_workflow.start_workflow()