In [None]:
# Import python packages
import streamlit as st
import pandas as pd
from snowflake.cortex import Complete
from snowflake.snowpark.context import get_active_session

session = get_active_session()
db = str(session.get_current_database().strip('"'))
solution_prefix = (db.upper()).split('_PROD')[0]

### Get Query History

In [None]:
WITH notebook_execution_time AS (
    SELECT 
        MIN(start_time) AS execution_time
    FROM 
        SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
    WHERE 
        user_name = '{{solution_prefix}}_SOLE_ADMIN'
        AND upper(query_text) LIKE '%EXECUTE NOTEBOOK%'
)
SELECT 
    REGEXP_REPLACE(CAST(query_text AS STRING), '(COMMENT\s*=\s*''[^'']*''|COMMENT\s*''[^'']*'')\s*', '') AS query_text,
    total_elapsed_time,
    user_name
FROM 
    SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
WHERE 
    user_name = '{{solution_prefix}}_SOLE_ADMIN'
    AND ( 
        (SELECT execution_time FROM notebook_execution_time) IS NULL 
        OR start_time < (SELECT execution_time FROM notebook_execution_time)
    )
    AND upper(query_text) NOT LIKE '%ALTER USER%'
    AND upper(query_text) NOT LIKE 'SHOW%'
    AND upper(query_text) NOT LIKE '%BUILD_WH%'
    AND (upper(query_text) LIKE '%ACCOUNTADMIN%' 
    OR upper(query_text) NOT LIKE '%_ADMIN%')
    AND upper(query_text) NOT LIKE '%CURRENT_ACCOUNT%'
    AND upper(query_text) NOT LIKE '%IMPORTED PRIVILEGES%'
    AND upper(query_text) NOT LIKE 'CREATE STAGE%'
    AND upper(query_text) NOT LIKE 'GRANT OWNERSHIP ON STAGE%'
    AND upper(query_text) NOT LIKE 'PUT FILE%'
    AND upper(query_text) NOT LIKE 'CREATE OR REPLACE NOTEBOOK%'
    AND upper(query_text) NOT LIKE 'ALTER NOTEBOOK%'
    AND upper(query_text) NOT LIKE '%NOT NEEDED FOR QUICKSTART%'
ORDER BY 
    start_time ASC;


In [None]:
query_history_df = query_history.to_df()

In [None]:
query_text = "\n".join(
    (row['QUERY_TEXT'].rstrip(';') + ';' for row in query_history_df.collect())
) + "\n\n"
query_text

In [None]:
template = '''author: Fill in sample AUTHOR_NAME
id: Fill in sample quickstart name
summary: Fill in Summary
categories: Getting-Started
environments: web
status: Published 
feedback link: https://github.com/Snowflake-Labs/sfguides/issues
tags: Getting Started, Data Science, Data Engineering 

# Fill in Solution Heading
<!-- ------------------------ -->
## Overview 
Duration: 1

Fill in solution description

### Prerequisites
- Fill in prerequisites

### What You Will Learn 
- Fill in what you will learn

### What You Will Need 
- A [GitHub](https://github.com/) Account 
- Fill in More based on content

### What You Will Build 
- Fill in what you will build list


You can see the source metadata for this guide you are reading now, on [the github repo](https://raw.githubusercontent.com/Snowflake-Labs/sfguides/master/site/sfguides/sample.md).


<!-- ------------------------ -->
## Instructions for Creating a Step in the Quickstart(Do not include this, its just instruction)
Duration: 2

A single sfguide consists of multiple steps. These steps are defined in Markdown using Header 2 tag `##`. 

```markdown
## Step 1 Title
Duration: 3

All the content for the step goes here.

## Step 2 Title
Duration: 1

All the content for the step goes here.
```

To indicate how long each step will take, set the `Duration` under the step title (i.e. `##`) to an integer. The integers refer to minutes. If you set `Duration: 4` then a particular step will take 4 minutes to complete. 

The total sfguide completion time is calculated automatically for you and will be displayed on the landing page. 

<!-- ------------------------ -->
## Code Snippets, Info Boxes, and Tables
Duration: 2

Look at the [markdown source for this sfguide](https://raw.githubusercontent.com/Snowflake-Labs/sfguides/master/site/sfguides/sample.md) to see how to use markdown to generate code snippets, info boxes, and download buttons. 

### JavaScript
```javascript
{ 
  key1: "string", 
  key2: integer,
  key3: "string"
}
```

### Java
```java
for (statement 1; statement 2; statement 3) {
  // code block to be executed
}
```

### Info Boxes
> aside positive
> 
>  This will appear in a positive info box.


> aside negative
> 
>  This will appear in a negative info box.

### Buttons
<button>

  [This is a download button](link.com)
</button>

### Tables
<table>
    <thead>
        <tr>
            <th colspan="2"> **The table header** </th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>The table body</td>
            <td>with two columns</td>
        </tr>
    </tbody>
</table>

### Hyperlinking
[Youtube - Halsey Playlists](https://www.youtube.com/user/iamhalsey/playlists)

<!-- ------------------------ -->
## Images, Videos, and Surveys, and iFrames
Duration: 2

Look at the [markdown source for this guide](https://raw.githubusercontent.com/Snowflake-Labs/sfguides/master/site/sfguides/sample.md) to see how to use markdown to generate these elements. 

### Images
<img src="assets/SAMPLE.png"/>

### Videos
Videos from youtube can be directly embedded:
<video id="KmeiFXrZucE"></video>

### Inline Surveys
<form>
  <name>How do you rate yourself as a user of Snowflake?</name>
  <input type="radio" value="Beginner">
  <input type="radio" value="Intermediate">
  <input type="radio" value="Advanced">
</form>

### Embed an iframe
![https://codepen.io/MarioD/embed/Prgeja](https://en.wikipedia.org/wiki/File:Example.jpg "Try Me Publisher")

<!-- ------------------------ -->
## Conclusion And Resources
Duration: 1

At the end of your Snowflake Guide, always have a clear call to action (CTA). This CTA could be a link to the docs pages, links to videos on youtube, a GitHub repo link, etc. 

If you want to learn more about Snowflake Guide formatting, checkout the official documentation here: [Formatting Guide](https://github.com/googlecodelabs/tools/blob/master/FORMAT-GUIDE.md)

### What You Learned
- Provide list of what was learned

### Related Resources
- Provide links to Resources
'''

In [None]:
ls @{{db}}.ANALYTICS.QUICKSTART;

In [None]:
files = stage_files.to_pandas()

file_paths = files['name'].tolist()  # Convert the column to a list

# Initialize a variable to store concatenated content
all_content = ""

# Loop through each file path and read content
for file_path in file_paths:
    if file_path.endswith('.md'):
        # Get file stream from Snowflake
        f = session.file.get_stream(f"@{db}.ANALYTICS." + file_path)
        
        # Read the file content in binary mode
        file_content = f.read()
        f.close()
        try:
            file_content = file_content.decode('utf-8')  # Decode bytes to string as UTF-8
        except UnicodeDecodeError:
            try:
                file_content = file_content.decode('latin-1')  # Fallback to Latin-1 encoding
            except UnicodeDecodeError:
                file_content = file_content.decode('utf-8', errors='ignore')  # Ignore errors if needed
        
        # Concatenate content
        all_content += file_content + "\n" 

all_content

In [None]:
prompt = f"""
You are tasked with generating a complete quickstart document for a solution. The structure and format of the document must follow the template provided below. Make sure to address the following key points:

{template}

You are provided with the following content:

**Supporting Content:**
{all_content}

**Important Instructions:**
1. **Content Population:** Populate the quickstart document exclusively with content from the `Supporting Content` sections. Ensure the final document strictly adheres to the template's structure.
2. **Overview Section:** The "Overview" section must include a description of the solution right below Overview after Duration. Ensure that all subheadings are included, and provide a meaningful description based on the `Supporting Content`. If specific details are not available, clearly indicate that the description is empty but maintain the section's structure.
3. **No Placeholder Text:** Do **not** include any text or placeholders from the template that are not directly replaced with content from the provided files.
4. **Additional Headings:** Many solutions utilize Snowflake Notebooks or Streamlit. If the solution includes mentions of these, ensure to include a section describing them. If a Snowflake Notebook or Streamlit application is mentioned, provide a detailed description of its purpose and functionality. Note that the actual notebook will be manually uploaded later; therefore, you should focus on describing its intended use and importance in the document.
5. **Image Placement:** Insert images exactly where they are referenced in the content. Images should be included inline within the text, not at the end.

**Generate the document based on the above and that no Markdown syntax remains.**
"""

response = Complete('mistral-large', prompt.replace("'", "''"))
response

In [None]:
sql_setup_section = f"""
## Creating Snowflake Objects

```sql
{query_text}
```
"""
sql_setup_section

In [None]:
import re 

# New content to be inserted
new_content = f"""
## Creating Snowflake Objects

Follow below instructions to get the FactSet Tick History data from Snowflake Marketplace.
- Navigate to [Snowsight](https://app.snowflake.com/)
- Click: Data Products
- Click: Marketplace
- Search: FactSet Tick History
- Scroll Down and Click: Tick History
- Click: Get
- Make sure the Database name is: Tick_History
- Which roles, in addition to ACCOUNTADMIN, can access this database? PUBLIC
- Click: Get

### Creating Objects, Loading Data, and Joining Data
Duration: 3

Navigate to Worksheets, click "+" in the top-right corner to create a new Worksheet, and choose "SQL Worksheet".

Paste and run the following SQL in the worksheet to create Snowflake objects (database, schema, tables),

```sql
{query_text}
```
"""

overview_start = response.find('## Overview')
if overview_start == -1:
    updated_document = response
    next_section_start = -1  # Initialize next_section_start when no Overview section is found
else:
    # Define a regular expression pattern for headers starting with "## " followed by any character
    pattern = r'^##\s'

    # Use re.search to find the next header starting with "## " after the Overview section
    match = re.search(pattern, response[overview_start + len('## Overview'):], re.MULTILINE)
    
    if match:
        # Calculate the exact start position of the next "## " section
        next_section_start = overview_start + len('## Overview') + match.start()
    else:
        next_section_start = -1  # If no other section header is found

if next_section_start == -1:
    # If no other section is found, insert at the end of the document
    next_section_start = len(response)

# Insert the new content after the "## Overview" section
updated_document = response[:next_section_start] + new_content + response[next_section_start:]

updated_document

In [None]:
def prepare_document_for_llm(document):
    # Replace line breaks with a special marker
    return document.replace('\n', '<LINE_BREAK>')

# Prepare the document
prepared_document = prepare_document_for_llm(updated_document)
prepared_document

In [None]:
import re

def convert_image_references(text):
    # Regular expression to match image references
    img_pattern = re.compile(r'!\[([^\]]*)\]\(img/([^\)]*)\)')
    
    # Function to replace matched references
    def replace_match(match):
        alt_text = match.group(1)
        image_path = match.group(2)
        return f'<img src="assets/{image_path}"/>'
    
    # Perform the replacement
    return img_pattern.sub(replace_match, text)


corrected_document = convert_image_references(prepared_document)
print(corrected_document)


In [None]:
def restore_line_breaks(document):
    # Replace special markers back with line breaks
    return document.replace('<LINE_BREAK>', '\n')

# Restore the line breaks in the corrected document
corrected_document = restore_line_breaks(corrected_document)
corrected_document

In [None]:
# with open("quickstart.md",'w', encoding='utf-8') as f:
#     f.write("```\n")
#     for row in query_history_df.collect(): 
#         query_text = row['QUERY_TEXT']
#         if not query_text.strip().endswith(';'):
#             query_text += ';'
#         f.write(query_text) 
#         f.write("\n")
#     f.write("```\n")
# f.close()

with open("quickstart.md",'w', encoding='utf-8') as f:
    f.write("```\n")
    f.write(corrected_document)
    f.write("```\n")
f.close()


In [None]:
put_result = session.file.put("quickstart.md",f"@{db}.ANALYTICS.QUICKSTART", auto_compress= False)
put_result[0].status