In [None]:
! pip install -qq https://github.com/pyannote/pyannote-audio/archive/refs/heads/develop.zip
! pip install -U -qq transformers bitsandbytes accelerate loralib
! pip install -qq git+https://github.com/huggingface/peft.git
! pip install -q guidance

In [None]:
import os

os.environ["HUGGINGFACE_HUB_CACHE"] = "/kaggle/working/"


### **Alpaca-Lora**

In [None]:
import transformers
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
from peft import PeftModel
import torch
import accelerate

transformers.utils.move_cache()

BASE_MODEL = "huggyllama/llama-7b"
LORA_WEIGHTS = "tloen/alpaca-lora-7b"
device = "cuda" if torch.cuda.is_available() else "cpu"


tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
model = LlamaForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_8bit=False,
    torch_dtype=torch.float16,
    device_map="auto",
)

model = PeftModel.from_pretrained(
    model,
    "tloen/alpaca-lora-7b",
)


In [None]:
model.generation_config


In [None]:
import pandas as pd

transcription_df = pd.read_csv("/kaggle/input/transcription/transcription.csv")
transcription_df


In [None]:
MAX_CONTEXT = 1000
segments = transcription_df.apply(
    lambda x: f'{x["speaker"]} ({x["start_time"]}): {x["text"][: MAX_CONTEXT].strip()}',
    axis=1,
).tolist()
OFFSET = 4
chunks = ["\n".join(segments[i : i + OFFSET]) for i in range(0, len(segments), OFFSET)]


In [None]:
chunks[:2]


## **GUIDANCE**

In [None]:
import transformers
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
from peft import PeftModel
import torch
import accelerate
import guidance

tokenizer = LlamaTokenizer.from_pretrained("huggyllama/llama-7b")
guidance.llm = guidance.llms.Transformers(
    model=model,
    tokenizer=tokenizer,
    caching=False,
)


In [None]:
options = [" Yes", " No"]

ans = []
action_items = []
contains_ai_program = guidance("""Does the following meeting transcript contain an action item? Please answer with a single word, either "Yes", "No.
Meeting transcript: {{transcript}}
Answer: {{select "answer" options=options}}"""
)
identify_ai_program = guidance("""Does the following meeting transcript contain an action item? Please answer with a single word, either "Yes", "No.
Meeting transcript: {{transcript}}
Answer: Yes

Now, identify the action item present in the meeting transcript.
Action Item: {{gen "text" max_tokens=20}}
Identify the assigne of that action item, this should be a persons name. If there is no clear assigne the set it to 'UNKNOWN'.
Assigne: {{gen "assigne"}}
At what time during the meeting was the action item mentioned?
Time: {{gen "time" max_tokens=7}}"""
)


In [None]:
def identify_ais(chunks, silent=True):
    action_items = []
    for chunk in chunks:
        contains_ai = contains_ai_program(
            transcript=chunk, options=options, silent=silent
        )
        if contains_ai["answer"].strip() == "Yes":
            ai = identify_ai_program(transcript=chunk, silent=silent)
            action_items.append(
                {
                    "text": ai["text"].strip(),
                    "assigne": ai["assigne"].strip(),
                    "ts": ai["time"].strip(),
                }
            )
    return action_items


In [None]:
action_items


In [None]:
text = """Alice (10:00 AM): Good morning, everyone! Let's get started with today's meeting. We have a lot to cover, so I'd like to keep it efficient. First on the agenda, we need to discuss the progress of the marketing campaign. Bob, could you provide an update?

Bob (10:02 AM): Certainly, Alice. We've made significant headway with the marketing campaign. Our social media ads are performing well, generating a high click-through rate and increased website traffic. We've also launched an email campaign that has resulted in a notable uptick in conversions. Overall, we're on track to meet our targets.

Alice (10:05 AM): That's great to hear, Bob. Good job to you and the marketing team. Now, let's move on to the next item. Charlie, I believe you have an update on the product development front?

Charlie (10:07 AM): Yes, Alice. We've made considerable progress with the new product. The development team has completed the core functionality and is now focusing on refining the user interface. We're planning to conduct a beta test next week to gather feedback from a select group of users. If all goes well, we should be ready for a full launch by the end of next month.

Alice (10:10 AM): Excellent work, Charlie. It's exciting to see the product taking shape. Keep up the good work. Next, I wanted to discuss our upcoming industry conference. As you all know, it's a crucial event for us to showcase our brand and network with potential clients. I'd like to brainstorm ideas on how we can make the most impact during the conference. Any suggestions?

Bob (10:12 AM): One idea could be to host a workshop or a panel discussion on a trending topic in our industry. It would position us as thought leaders and attract a lot of attention from attendees.

Charlie (10:14 AM): I agree with Bob's suggestion. We can also create engaging visual displays at our booth and offer interactive demos of our product to captivate the audience.

Alice (10:16 AM): Those are excellent suggestions, Bob and Charlie. Let's make sure to implement them. Lastly, I want to remind everyone about the upcoming deadline for the budget proposal. Please ensure all necessary information is submitted by the end of this week, as we need to finalize the budget for the next fiscal year.

Bob (10:19 AM): Noted, Alice. I'll make sure the finance team completes the budget proposal and submits it on time.

Alice (10:21 AM): Perfect. Thank you, Bob. Is there anything else anyone would like to discuss or bring up before we conclude the meeting?

Charlie (10:23 AM): Actually, I have one more thing to add. It would be beneficial to schedule a meeting with the sales team to align our product launch timeline with their strategies. We need to ensure they have sufficient knowledge about the new product and are prepared to market it effectively.

Alice (10:25 AM): That's a great point, Charlie. Let's schedule a meeting with the sales team as soon as possible to coordinate our efforts. I'll take care of setting up the meeting. Thank you for bringing that up.
"""
# executed_program1 = program(transcript=text, options=options)
# if executed_program1["answer"].strip() == "Yes":
#     executed_program2 = program2(transcript=text)


In [None]:
identify_ais([text], silent=False)
