!pip install -r requirements.txt

In [None]:
import os
import json
from pathlib import Path

model_lookup_name = "llama-3-70b-chat"
gen_objective = "innovation_type"

In [None]:
prompt_main_dir = Path("../prompt")
input_data_dir = Path("../data/input")
output_data_dir = Path("../data/output")
model_dir = output_data_dir / model_lookup_name
trial_dir = model_dir / gen_objective
prompt_trial_dir = prompt_main_dir / gen_objective

if not os.path.exists(trial_dir):
    os.mkdir(trial_dir)
    files = [
        'generation_1.jsonl', 'cont_generation_1.jsonl', 'generation_1.md',
        'refinement_mapping.md', 'refinement.md', 'refinement_updated.jsonl',
        # 'generation_2.jsonl', 'generation_2.md',
        # 'assignment.jsonl',
    ]

    for file in files:
        file_path = trial_dir / file
        with open(file_path, "w") as file:
            pass

In [None]:
# Input data
data_file = input_data_dir / "uniform_sample.jsonl"
small_data_sample = input_data_dir / "small_uniform_sample.jsonl"
# Use small sample for experiments
data_sample = data_file

# Generation I/O
generation_prompt = prompt_trial_dir / "generation_1.txt"
seed_1 = prompt_trial_dir / "seed_1.md"
generation_out = trial_dir / "generation_1.jsonl"
cont_generation_out = trial_dir / "cont_generation_1.jsonl"
generation_topic = trial_dir / "generation_1.md"

# Refinement I/O
# Uses general refinement prompt!
refinement_prompt = prompt_trial_dir / "refinement.txt"
refinement_out = trial_dir / "refinement.md"
refinement_mapping = trial_dir / "refinement_mapping.txt"
refinement_updated = trial_dir / "refinement_updated.jsonl"

# Generation 2 I/O
generation_2_prompt = prompt_trial_dir / "generation_2.txt"
generation_2_out = trial_dir / "generation_2.jsonl"
generation_2_topic = trial_dir / "generation_2.md"

# Assignment I/O
assignment_prompt = prompt_trial_dir / "assignment.txt"
assignment_out = trial_dir / "assignment.jsonl"

# Correction I/O
correction_prompt = prompt_main_dir / "correction.txt"
correction_out = trial_dir / "assignment_corrected.jsonl"

In [None]:
# Description: Example of how to run the generation script
%run generation_1.py --deployment_name "llama-3-70b-chat" \
                    --max_tokens 300 --temperature 0.0 --top_p 0.0 \
                    --data $data_sample \
                    --prompt_file $generation_prompt \
                    --seed_file $seed_1 \
                    --cont_out_file $cont_generation_out \
                    --out_file $generation_out \
                    --topic_file $generation_topic \
                    --verbose True \
                    --provider together.ai

In [None]:
# Refinement 
# Run the script multiple times to get a better result
# Default: 1 runs
%run refinement.py --deployment_name llama-3-70b-chat \
                    --max_tokens 500 --temperature 0.0 --top_p 0.0 \
                    --prompt_file $refinement_prompt \
                    --generation_file $generation_out \
                    --topic_file $generation_topic \
                    --out_file $refinement_out \
                    --verbose True \
                    --updated_file $refinement_updated \
                    --mapping_file $refinement_mapping \
                    --refined_again False \
                    --remove False \
                    --provider together.ai

In [None]:
dbrx_lookup_name = "dbrx-instruct"
mistral_lookup_name = "mistral-7b-instruct-v0_2"
qwen_lookup_name = "qwen-1_5-72b-chat"

assignment_out = trial_dir / "assignment_qwen.jsonl"
manual_refinement = trial_dir / "generation_1_manual-refinement.md"

In [None]:
# Assignment 
# "qwen-1_5-72b-chat"
# "mistral-7b-instruct-v0_2"
# "dbrx-instruct"
%run assignment.py --deployment_name $qwen_lookup_name \
                        --max_tokens 300 --temperature 0.0 --top_p 0.0 \
                        --data  $data_sample\
                        --prompt_file $assignment_prompt \
                        --topic_file $manual_refinement \
                        --out_file $assignment_out \
                        --verbose True \
                        --provider together.ai