In [1]:
# Import libraries.

import json

from IPython.display import Markdown, display

In [2]:
# Define parameters.

PROMPTS = "../data/prompts/zeroshot/taskvine_zeroshot_gpt4o_1.json"

In [3]:
# Analyze a singular prompt.

def analyze(prompt: dict):
    print(prompt["content"])
    display(Markdown(prompt["response"]))

In [4]:
# Load prompts from file.
with open(PROMPTS, "r") as fp:
    prompts = json.load(fp)

# Perform the zeroshot prompting.
for prompt in prompts:
    analyze(prompt)

Write a Python TaskVine application to get the current date on a worker machine. The date must be stored to an output file named "current_date".


```python
from ndcctools.taskvine import Manager, Task

# Create a Manager
manager = Manager()

# Define the command to get the current date and redirect it to "current_date"
command = "date > current_date"

# Create a Task with the command
task = Task(command)

# Specify the output file
task.set_output_files(["current_date"])

# Add the task to the manager
manager.add_task(task)

# Wait for all tasks to complete
manager.wait_for_all()
```

Write a Python TaskVine application to count the number of times the keywords "wealth", "nation", "labour", "price", "nature", and "commodity" appear in Adam Smith's Wealth of Nations. The text is available at "https://www.gutenberg.org/cache/epub/3300/pg3300.txt".


```python
import urllib.request
from ndcctools.taskvine import Manager, Worker, Task

# Define the URL of the text
url = "https://www.gutenberg.org/cache/epub/3300/pg3300.txt"

# Keywords to search for
keywords = ["wealth", "nation", "labour", "price", "nature", "commodity"]

def download_text(url):
    # Download the text file
    response = urllib.request.urlopen(url)
    return response.read().decode('utf-8')

def split_text(text, lines_per_chunk):
    # Split the text into chunks
    lines = text.splitlines()
    for i in range(0, len(lines), lines_per_chunk):
        yield " ".join(lines[i:i+lines_per_chunk])

def count_keywords(chunk, keywords):
    # Count keywords in chunk
    counts = {keyword: chunk.lower().count(keyword) for keyword in keywords}
    return counts

text = download_text(url)
chunks = list(split_text(text, 1000))  # Splitting text into chunks of 1000 lines

# Define the TaskVine Task Manager
with Manager() as manager:
    # Add a task for each chunk
    for i, chunk in enumerate(chunks):
        task = Task(command=f"python3 -c 'import pickle; "
                            f"c=pickle.load(open(\"chunk_{i}.pkl\", \"rb\")); "
                            f"counts={{k:c.lower().count(k) for k in {keywords}}}; "
                            f"pickle.dump(counts, open(\"result_{i}.pkl\", \"wb\"))'")
        
        # Serialize chunk to a file
        with open(f"chunk_{i}.pkl", "wb") as f:
            pickle.dump(chunk, f)
        
        task.upload_files([f"chunk_{i}.pkl"])
        task.download_output_files([f"result_{i}.pkl"])
        manager.submit(task)
    
    # Wait for all tasks to complete
    manager.wait_for_all()

    # Collect results
    total_counts = {keyword: 0 for keyword in keywords}

    for i in range(len(chunks)):
        with open(f"result_{i}.pkl", "rb") as f:
            result = pickle.load(f)
            for keyword, count in result.items():
                total_counts[keyword] += count

    # Print the total counts
    print("Keyword counts in 'The Wealth of Nations':")
    for keyword, count in total_counts.items():
        print(f"{keyword}: {count}")
```


Write a Python TaskVine application to run my program "script.sh" with various input arguments and organizes all output data. "script.sh" takes, as input, an integer and two files. "script.sh" outputs a file named "output{i}.txt" where "{i}" is the input integer. The application must run "script.sh" with input integers 1-1000 and input files "file1.input" and "file2.input" which are stored in the directory "script_inputs". The application must store all output files in a directory named "script_outputs".


```python
import os
from ndcctools.taskvine import Manager, Task

# Create output directory if it doesn't exist
os.makedirs("script_outputs", exist_ok=True)

# Initialize TaskVine manager
manager = Manager()

# Add input files to the manager for automatic staging
manager.add_input_file("script_inputs/file1.input")
manager.add_input_file("script_inputs/file2.input")
manager.add_input_file("script.sh")

# Add tasks to the manager
for i in range(1, 1001):
    # Construct the output filename
    output_file = f"output{i}.txt"

    # Create a task to run script.sh with the specified arguments
    task = Task(f"./script.sh {i} file1.input file2.input")
    
    # Set task's requirements
    task.set_input_files(["script_inputs/file1.input", "script_inputs/file2.input"])
    task.set_output_files([output_file], local_path="script_outputs/")

    # Add the task to the manager
    manager.add_task(task)

# Wait for all tasks to complete
manager.wait_for_all()
```

Write a Python TaskVine application to execute a pipeline of shell scripts. I have the script "script1.sh" which outputs a file named "intermediary.out" upon completion. I also have the scripts "script2.sh" and "script3.sh"  which both accept an input file as a command line argument. "script2.sh" and "script3.sh" must both receive "intermediary.out" as the input. Finally, "script2.sh" and "script3.sh" output files named "script2.out" and "script3.out", respectively. Save these outputs files on the host machine. "intermediary.out" need not be saved to the host machine.


```python
import ndcctools.taskvine as vine

def main():
    # Create a TaskVine manager
    manager = vine.Manager()

    # Define task for script1.sh
    task1 = vine.Task(command="./script1.sh")
    task1.set_output("intermediary.out")
    manager.add_task(task1)

    # Define task for script2.sh
    task2 = vine.Task(command="./script2.sh intermediary.out")
    task2.add_input("intermediary.out", producer=task1)
    task2.set_output("script2.out")
    task2.publish_file("script2.out")

    # Define task for script3.sh
    task3 = vine.Task(command="./script3.sh intermediary.out")
    task3.add_input("intermediary.out", producer=task1)
    task3.set_output("script3.out")
    task3.publish_file("script3.out")

    # Add tasks to manager
    manager.add_task(task2)
    manager.add_task(task3)

    # Wait for all tasks to complete
    manager.wait_for_tasks()

    # Shutdown the manager
    manager.shutdown()

if __name__ == "__main__":
    main()
```


Write a Python TaskVine application to run my GPU-accelerated program. The program binary is stored at "./train_demo". The program requires that the worker machine has the feature "NVIDIA RTX 4090". Upon completion, the program outputs "model.tar.gz". The output file must be saved to the host machine.


```python
import ndcctools.taskvine as vine

# Create a TaskVine application object
app = vine.Application()

# Define feature requirement for the worker
features = {'NVIDIA RTX 4090'}

# Create a task for the GPU-accelerated program
task = vine.Task(
    command='./train_demo',
    inputs=[],                 # No input files
    outputs=['model.tar.gz'],  # Output file
    required_features=features # Worker needs this feature
)

# Add the task to the application
app.add_task(task)

# Wait for the tasks to complete
app.run()

# Fetch the output file back to the host
app.fetch_output_files()
```

Write a Python TaskVine application to compile and test my project stored at "complex_project/". To compile the project, run the "make all" command in the project's root directory. Then, to test the project, run "make test". The compilation and test must use at most 4 cores, 1024 MB of memory, and 2048 MB of disk. Enable resource monitoring to track resource usage, but do not terminate the task if it overuses resources. When the task completes, print the task's exit code and standard output. For each resource that the task overused, print the resource and by how much it overused.


```python
import ndcctools.taskvine as vine

def run_task(command, run_dir, cores, memory, disk):
    task = vine.Task(command)

    # Set the directory from which the command will be run
    task.set_input_directory(run_dir, "")

    # Set the resource constraints
    task.set_cores(cores)
    task.set_memory(memory)
    task.set_disk(disk)
    
    # Enable resource monitoring
    task.enable_resource_monitoring()

    return task

def main():
    # Create the TaskVine application
    app = vine.Application()

    # Run compilation task
    compile_task = run_task("make all", "complex_project", 4, 1024, 2048)
    
    # Add the compile task to the application
    app.add_task(compile_task)

    # Wait for the compile task to finish
    compile_task.wait()
    
    print("Compile Task:")
    print("Exit Code:", compile_task.exit_code())
    print("Standard Output:", compile_task.stdout().read().decode())

    # Check if compile task overused resources
    overused_resources = compile_task.overused_resources()
    for resource, amount in overused_resources.items():
        print(f"Overused {resource}: {amount}")

    # If compilation was successful, run the test task
    if compile_task.exit_code() == 0:
        test_task = run_task("make test", "complex_project", 4, 1024, 2048)

        # Add the test task to the application
        app.add_task(test_task)

        # Wait for the test task to finish
        test_task.wait()
        
        print("\nTest Task:")
        print("Exit Code:", test_task.exit_code())
        print("Standard Output:", test_task.stdout().read().decode())

        # Check if test task overused resources
        overused_resources = test_task.overused_resources()
        for resource, amount in overused_resources.items():
            print(f"Overused {resource}: {amount}")

if __name__ == "__main__":
    main()
```

Write a Python TaskVine application that executes multiple programs with different levels of resource requirements. The two task categories must be named "intensive" and "light". The "intensive" tasks require 8 cores and 16 GB of memory. The "light" tasks require 1 core and 2 GB of memory. In the intensive category, submit 10 tasks to execute the shell script "intensive.sh". In the light category, submit 100 tasks to execute the shell script "light.sh" Print the standard output of each task upon completion.


```python
import ndcctools.taskvine as vine

def main():
    # Create a TaskVine Manager
    manager = vine.Manager()

    # Task resources for intensive and light tasks
    intensive_cores = 8
    intensive_memory = 16 * 1024  # Memory in MB
    light_cores = 1
    light_memory = 2 * 1024  # Memory in MB

    # Create and submit intensive tasks
    for i in range(10):
        task = vine.Task(shell_command="./intensive.sh")
        task.add_resource("cores", intensive_cores)
        task.add_resource("memory", intensive_memory)
        task.tag = f"intensive-{i}"
        manager.submit(task)

    # Create and submit light tasks
    for i in range(100):
        task = vine.Task(shell_command="./light.sh")
        task.add_resource("cores", light_cores)
        task.add_resource("memory", light_memory)
        task.tag = f"light-{i}"
        manager.submit(task)

    # Process completed tasks
    while not manager.empty():
        finished_task = manager.wait(5)
        if finished_task:
            print(f"Task {finished_task.tag} completed with output:")
            print(finished_task.stdout)

    # Finalize the manager
    manager.finish()

if __name__ == "__main__":
    main()
```

Write a Python TaskVine application to perform matrix inversion and transposition. The matrix computations must be defined in Python functions and added to a TaskVine library called "matrix_library". The user must specify the size, m x n,  of the matrix and the number of matrices, i, on which to perform computation. The application must randomly generate i matrices and for each matrix, print the matrix, print the inverse matrix, and print the transposed matrix.


```python
import numpy as np
from ndcctools import Taskvine

# Define TaskVine library
matrix_library = Taskvine.Library('matrix_library')

# Define a function to invert a matrix
@matrix_library.function(name='invert_matrix', version=1)
def invert_matrix(matrix):
    return np.linalg.inv(matrix)

# Define a function to transpose a matrix
@matrix_library.function(name='transpose_matrix', version=1)
def transpose_matrix(matrix):
    return np.transpose(matrix)

# Function to generate random matrices and perform inversion and transposition
def process_matrices(m, n, num_matrices):
    for _ in range(num_matrices):
        matrix = np.random.rand(m, n)
        print("Original Matrix:")
        print(matrix)

        # Perform inversion
        task = Taskvine.Task()
        inv_result = task.call(invert_matrix, matrix)
        inverse_matrix = task.get_result(inv_result)
        print("Inverted Matrix:")
        print(inverse_matrix)

        # Perform transposition
        task = Taskvine.Task()
        trans_result = task.call(transpose_matrix, matrix)
        transposed_matrix = task.get_result(trans_result)
        print("Transposed Matrix:")
        print(transposed_matrix)

# Main code to run the application
if __name__ == "__main__":
    m = int(input("Enter number of rows (m): "))
    n = int(input("Enter number of columns (n): "))
    i = int(input("Enter number of matrices (i): "))
    
    # Initialize Taskvine
    Taskvine.Server.initialize()
    
    try:
        process_matrices(m, n, i)
    finally:
        Taskvine.Server.shutdown()
```


Write a Python Taskvine application to fetch the titles of multiple news webpages. The webpage contents must be downloaded asynchronously using the TaskVine futures executor. Print the URL and title of each webpage in order of the completion of each task. The webpages I am interested in are "https://abcnews.com", "https://apnews.com", "https://axios.com", "https://cnn.com", "https://nbcnews.com", "https://nytimes.com", "https://reuters.com", "https://washingtonpost.com", "https://wsj.com".


```python
import requests
from bs4 import BeautifulSoup
from ndcctools.taskvine import Executor

# List of URLs to fetch
urls = [
    "https://abcnews.com",
    "https://apnews.com",
    "https://axios.com",
    "https://cnn.com",
    "https://nbcnews.com",
    "https://nytimes.com",
    "https://reuters.com",
    "https://washingtonpost.com",
    "https://wsj.com"
]

def fetch_title(url):
    """Fetch the HTML content of a webpage and extract its title."""
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        title = soup.title.string if soup.title else 'No title found'
        return url, title
    except requests.RequestException as e:
        return url, f'Error: {e}'

def main():
    with Executor() as executor:
        futures = [executor.submit(fetch_title, url) for url in urls]
        for future in executor.as_completed(futures):
            url, title = future.result()
            print(f"URL: {url}, Title: {title}")

if __name__ == '__main__':
    main()
```


Write a Python TaskVine application to get the current date on a worker machine. The date must be stored to an output file named "current_date". To prevent an unwanted worker from connecting to the manager, use the password stored in the file "taskvine.password". Further, encrypt communication between the manager and worker with SSL using the "key.pem" and "certificate.pem" files.


```python
import ndcctools.taskvine as vine

# Create a TaskVine manager
manager = vine.Manager()

# Set the manager password
manager.set_password_file("taskvine.password")

# Set SSL options for encryption
manager.set_ssl_options("certificate.pem", "key.pem")

# Define the command to get the date
command = "date > current_date"

# Create a TaskVine task
task = vine.Task(command)

# Add the task to the manager
manager.submit(task)

# Run the manager
manager.run()
```