# Generative AI

In [1]:
!pip install groq

Collecting groq
  Downloading groq-0.13.0-py3-none-any.whl.metadata (13 kB)
Downloading groq-0.13.0-py3-none-any.whl (108 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.8/108.8 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.13.0


In [2]:
from google.colab import userdata

API_KEY = userdata.get("GROQ_API_KEY")

In [3]:
from groq import Groq

client = Groq(
    api_key=API_KEY
)

def hit_groq(base_word, groq_model):
  chat_completion = client.chat.completions.create(
      messages=[
          {
              "role": "system",
              "content": "you are a linguist that try to make a full coherent sentence out of list of basic words"
          },
          {
              "role": "user",
              "content": f"""Please create a full coherent sentence out of this words, the order of the word that appear in the sentence have to be in sequence just like how the word appeared in the list.
                            please just output the word without any other text. you can add punctuation to the generated sentence to make the sentence more natural. here are the words: [{base_word}]""",
          }
      ],
      model=groq_model,
      temperature=0.5,
      max_tokens=1024,
      top_p=1,
      stop=None,
      stream=False,
  )
  return chat_completion.choices[0].message.content

In [4]:
# Function to compute Exact Match (EM) and F1 score
def exact_match(predicted, ground_truth):
    return 1 if predicted.lower() == ground_truth.lower() else 0

def compute_f1(predicted, ground_truth):
    predicted_tokens = set(predicted.lower().split())
    ground_truth_tokens = set(ground_truth.lower().split())

    common = predicted_tokens.intersection(ground_truth_tokens)
    if len(common) == 0:
        return 0, 0, 0

    precision = len(common) / len(predicted_tokens)
    recall = len(common) / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return precision, recall, f1

In [9]:
import pandas as pd
from tqdm import tqdm
df = pd.read_csv('seq2seq_data.csv')

def evaluate_model(dataset, groq_model):
  input_base_word = dataset['base_word_text']
  ground_truth = dataset['original_text']
  predicted = []
  for base_word in tqdm(input_base_word):
    result = hit_groq(base_word, groq_model)
    predicted.append(str(result))

  exact_match_scores = [exact_match(pred, gt) for pred, gt in zip(predicted, ground_truth)]
  exact_match_result = sum(exact_match_scores) / len(exact_match_scores)

  f1_scores = [compute_f1(pred, gt) for pred, gt in zip(predicted, ground_truth)]
  precision_scores = [score[0] for score in f1_scores]
  recall_scores = [score[1] for score in f1_scores]
  f1_scores = [score[2] for score in f1_scores]

  precision_result = sum(precision_scores) / len(precision_scores)
  recall_result = sum(recall_scores) / len(recall_scores)
  f1_result = sum(f1_scores) / len(f1_scores)

  print(f"Exact Match: {exact_match_result}")
  print(f"Precision: {precision_result}")
  print(f"Recall: {recall_result}")
  print(f"F1 Score: {f1_result}")

In [10]:
# use 300 random sample from the data to test the model
data = df.sample(n=300)

## llama-3.2-90b-vision-preview

In [11]:
evaluate_model(data, "llama-3.2-90b-vision-preview")

100%|██████████| 300/300 [21:01<00:00,  4.20s/it]

Exact Match: 0.006666666666666667
Precision: 0.7677154171084541
Recall: 0.7207894314245613
F1 Score: 0.7394824268380005





## llama-3.1-8b-instant

In [12]:
evaluate_model(data, "llama-3.1-8b-instant")

100%|██████████| 300/300 [10:52<00:00,  2.18s/it]

Exact Match: 0.01
Precision: 0.7061126963695972
Recall: 0.6831324739330482
F1 Score: 0.6895524043326703





## llama3-70b-8192

In [13]:
evaluate_model(data, "llama3-70b-8192")

100%|██████████| 300/300 [11:00<00:00,  2.20s/it]

Exact Match: 0.006666666666666667
Precision: 0.7149985257463664
Recall: 0.6967950907127445
F1 Score: 0.6936466449067747





## gemma2-9b-it

In [14]:
evaluate_model(data, "gemma2-9b-it")

100%|██████████| 300/300 [10:46<00:00,  2.15s/it]

Exact Match: 0.0
Precision: 0.8015317688086716
Recall: 0.7354538584372563
F1 Score: 0.7634117749666258





## mixtral-8x7b-32768

In [15]:
evaluate_model(data, "mixtral-8x7b-32768")

100%|██████████| 300/300 [10:46<00:00,  2.15s/it]

Exact Match: 0.0
Precision: 0.5900965062335317
Recall: 0.6053485818969347
F1 Score: 0.5875230376110244





## llama-3.3-70b-versatile

In [16]:
evaluate_model(data, "llama-3.3-70b-versatile")

100%|██████████| 300/300 [11:06<00:00,  2.22s/it]

Exact Match: 0.0
Precision: 0.720728392595057
Recall: 0.6420264473127477
F1 Score: 0.6759102706409095





In [17]:
data

Unnamed: 0,topic_category,original_text,base_word_text
17745,5.0,how do i erase my book marks when i've double ...,how i erase my book mark when i ve double clic...
67137,9.0,she is sending me mixed signals.,she sending me mixed signal
76325,1.0,declarative: the concept of clontology was cr...,concept of clontology created by cganterrocco...
86111,9.0,declarative: being a pregnancy resulting from...,pregnancy resulting from rape nt define your ...
12058,3.0,what causes the pain associated with bruises?,what cause pain associated with bruise
...,...,...,...
97843,4.0,the itsy bitsy spider climbed up the water s...,itsy bitsy spider climbed up water spout
117563,4.0,imperative: share the reasons behind the high ...,share reason behind high rate of rape case in...
46642,3.0,what if i take vinegar every day what would th...,what if i take vinegar every day what that for...
66975,8.0,declarative: bob marley fans range from 5 to ...,bob marley fan range from 5 to 85 year old
