# Pip Installs

In [1]:
%%capture 
%pip install datasets -q
%pip install openai -q
%pip install tiktoken -q
%pip install langchain -q
%pip install scipy -q
%pip install ragatouille -q
%pip install --upgrade jupyter ipywidgets -q
%pip install aiohttp nest_asyncio -q
%pip install asyncio -q
%pip install -U sentence-transformers -q
%pip install dotenv -q
%pip install ast -q
%pip install plotly -q
%pip install llama-index
%pip install transformers
%pip install torch
%pip install accelerate
%pip install sentence-transformers
%pip install accelerate\
%pip install protobuf\
%pip install sentencepiece\
%pip install torch\
%pip install git+https://github.com/huggingface/transformers\ 
%pip install huggingface_hub

# Import Packages

In [2]:
import ast  # for converting embeddings saved as strings back to arrays
import openai
import pandas as pd  # for storing text and embeddings data
import tiktoken  # for counting tokens
import os
from scipy import spatial
import ipywidgets
from datasets import load_dataset
import re  # for cutting <ref> links out of Wikipedia articles
from tqdm.notebook import tqdm
import plyer
import platform

# Chunking Text
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document as LangchainDocument

# Reranking
from ragatouille import RAGPretrainedModel

from typing import Optional

# Asynchronous requests
import aiohttp
import asyncio
from tqdm.asyncio import tqdm as atqdm

# Embedded chuck 
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv
import ast

# LLAMA
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from dotenv import load_dotenv


# **Experiment: GPT Model**

In [6]:
# Authenticate with Hugging Face

from huggingface_hub import login

# Load model and tokenizer
print(torch.cuda.is_available())
MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")

# Files
OUTPUT_FILE_NAME = "./../csv_files/expOutputs/experiment_baseline_llama2-7b.csv"



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

False


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-chat-hf.
401 Client Error. (Request ID: Root=1-66fed8e4-58a3c23426efc8a376c2727d;4c7c2d42-06e3-442f-9406-6ada0cebd129)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-7b-chat-hf is restricted. You must have access to it and be authenticated to access it. Please log in.

In [4]:
load_dotenv()
client = openai.OpenAI(api_key= os.getenv('OPENAI_API_KEY'))

In [5]:
def ask(query: str, model_name: str = MODEL_NAME) -> str:
    """Sends a query to the LLaMA model and returns the response."""
    inputs = tokenizer(query, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=100, temperature=0)
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.strip()

# Inferencing

In [6]:
def process_prompts(df, num_prompts):
    """Process prompts from the dataframe and get responses from the model."""
    results = []
    
    for i in tqdm(range(min(num_prompts, len(df))), desc="Processing prompts"):
        current_question = df.loc[i, 'Question']
        print(f"\nProcessing prompt number: {i + 1}")
        print(f"Current prompt: {current_question}")
        
        answer = ask(current_question, MODEL_NAME)
        
        results.append({
            'Question': current_question,
            'Answer': answer
        })
        
        print(f"\nAnswer:")
        print(f"{'-'*30}")
        print(f"{answer}")
        print(f"{'-'*30}")
    
    return pd.DataFrame(results)

In [None]:
df = pd.read_csv('./../csv_files/rag_questions.csv', low_memory=False)

results_df = process_prompts(df, len(df))

results_df.to_csv(OUTPUT_FILE_NAME,  encoding='utf-8', index=False)

In [None]:
def send_notification(title, message):
    system = platform.system()
    if system == "Darwin":  # macOS
        os.system(f"osascript -e 'display notification \"{message}\" with title \"{title}\"'")
        os.system("afplay /System/Library/Sounds/Glass.aiff")  # Play notification sound
    elif system == "Linux":
        os.system(f'notify-send "{title}" "{message}"')
        os.system("paplay /usr/share/sounds/freedesktop/stereo/complete.oga")  # Play notification sound
    elif system == "Windows":
        from plyer import notification
        notification.notify(
            title=title,
            message=message,
            timeout=10  # Notification duration in seconds
        )
        import winsound
        winsound.MessageBeep(winsound.MB_ICONASTERISK)  # Play notification sound
    else:
        print(f"Notification: {title} - {message}")

send_notification(
    title="Experiment Finished",
    message=f"Your {MODEL_NAME} baseline experiment has finished running.",
)


In [None]:
ask('I am using a transmitter to be used for WiFi operation. This transmitter is going to transmit a peak power of 20 dBm with 20 MHz bandwidth. This transmitter would have a center frequency of 1400 MHz. I am going to use an antenna which has a peak gain of 0.5 dBi. The antenna operates on the 2450 MHz band with a 100 MHz bandwidth. Does this transmitter follow FCC regulations? ')

In [None]:
ask('I am using a transmitter to be used for WiFi operation. This transmitter is going to transmit a peak power of 40 dBm on both 20 Mhz and 40 MHz bandwidth. This transmitter can hop on any frequency from 2412 MHz to 2484 MHz. I am going to use an antenna which has a peak gain of 0.5 dBi. The antenna operates on the 2450 MHz band with a 100 MHz bandwidth. Does this transmitter follow FCC regulations? ')