In [None]:
# install google-generativeai for calling Gemini API
!pip install google-generativeai



In [None]:
import google.generativeai as genai
from google.colab import userdata

# If you want to use userdata, you must add GOOGLE_API_KEY to Secrets in Google Colab
GOOGLE_API_KEY=userdata.get('api')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
for m in genai.list_models():
  print(m.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-pro-exp-0827
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-exp-0827
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-exp-1206
models/gemini-exp-1121
models/gemini-exp-1114
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/learnlm-1.5-pro-experimental
models/emb

In [None]:
model = genai.GenerativeModel("gemini-1.5-flash-latest",
                              safety_settings={
                                'HATE': 'BLOCK_NONE',
                                'HARASSMENT': 'BLOCK_NONE',
                                'SEXUAL' : 'BLOCK_NONE',
                                'DANGEROUS' : 'BLOCK_NONE'
                            })

In [None]:
!unzip "/content/SemEval_2014_Task_4-20250204T160411Z-001.zip"

Archive:  /content/SemEval_2014_Task_4-20250204T160411Z-001.zip
  inflating: SemEval_2014_Task_4/Laptops_Test_Data_PhaseA.xml  
  inflating: SemEval_2014_Task_4/laptops-trial.xml  
  inflating: SemEval_2014_Task_4/Restaurants_Test_Data_phaseB.xml  
  inflating: SemEval_2014_Task_4/Laptops_Test_Data_phaseB.xml  
  inflating: SemEval_2014_Task_4/restaurants-trial.xml  
  inflating: SemEval_2014_Task_4/Restaurants_Test_Data_PhaseA.xml  
  inflating: SemEval_2014_Task_4/Laptops_Train.xml  
  inflating: SemEval_2014_Task_4/Restaurants_Train.xml  


In [None]:
import xml.etree.ElementTree as ET
import pandas as pd

# Đọc dữ liệu XML
tree = ET.parse('/content/SemEval_2014_Task_4/Laptops_Test_Data_phaseB.xml')
root = tree.getroot()

data = []

# Duyệt qua tất cả các câu trong XML
for sentence in root.findall('.//sentence'):
    sentence_id = sentence.get('id')  # Lấy ID của câu
    sentence_text = ''.join(sentence.itertext()).strip()  # Lấy toàn bộ văn bản của câu
    aspect_terms = []

    # Kiểm tra xem có phần tử aspectTerms trong câu không
    aspectTerms = sentence.find('./aspectTerms')  # Tìm đúng vị trí aspectTerms
    if aspectTerms is not None:
        # Lấy các aspectTerm trong câu hiện tại
        for aspectTerm in aspectTerms.findall('aspectTerm'):
            term = aspectTerm.get('term')  # Lấy giá trị term
            aspect_terms.append(term)

    # Nếu không có aspectTerm, thêm một danh sách trống
    if not aspect_terms:
        aspect_terms = []

    # Thêm thông tin vào danh sách dữ liệu
    data.append((sentence_id, sentence_text, aspect_terms))

# Chuyển đổi dữ liệu thành DataFrame
df = pd.DataFrame(data, columns=['ID', 'Text', 'AspectTerms'])

# Hiển thị 5 dòng đầu tiên của DataFrame
print(df.head())


       ID                                               Text  \
0   892:1  Boot time is super fast, around anywhere from ...   
1  1144:1  tech support would not fix the problem unless ...   
2   805:2                 but in resume this computer rocks!   
3   359:1                                   Set up was easy.   
4   562:1  Did not enjoy the new Windows 8 and touchscree...   

                          AspectTerms  
0                         [Boot time]  
1                      [tech support]  
2                                  []  
3                            [Set up]  
4  [Windows 8, touchscreen functions]  


In [None]:
data=data[:10]

In [None]:
data

[('892:1',
  'Boot time is super fast, around anywhere from 35 seconds to 1 minute.',
  ['Boot time']),
 ('1144:1',
  'tech support would not fix the problem unless I bought your plan for $150 plus.',
  ['tech support']),
 ('805:2', 'but in resume this computer rocks!', []),
 ('359:1', 'Set up was easy.', ['Set up']),
 ('562:1',
  'Did not enjoy the new Windows 8 and touchscreen functions.',
  ['Windows 8', 'touchscreen functions']),
 ('323:1',
  "I expected so as it's an Apple product, but I was glad to see my expectations exceeded, this is THE laptop to buy right now.",
  []),
 ('958:1',
  "Other than not being a fan of click pads (industry standard these days) and the lousy internal speakers, it's hard for me to find things about this notebook I don't like, especially considering the $350 price tag.",
  ['internal speakers', 'price tag', 'click pads']),
 ('684:1', 'excellent in every way.', []),
 ('282:9',
  'No installation disk (DVD) is included.',
  ['installation disk (DVD)']),


### 0-shot

In [None]:
from tqdm import tqdm
import time
import json
predict = {}
count = 0

# Testing
for sample in tqdm(data):
    sample_id, text,aspect = sample
    aspect_text = ', '.join(aspect)
    print(aspect_text)
    prompt = f"Bạn là một trợ thủ đắc lực! Hãy vui lòng phân tích câu sau bằng tiếng anh, xác định các thuật ngữ thuộc tính (aspects) và độ cảm xúc (polarity) tương ứng của chúng (tích cực, tiêu cực, trung lập, mâu thuẫn) dựa trên các {aspect_text} đã cho trước"
    response = model.generate_content([prompt, text])
    predict[sample_id] = response.text
    with open("0-shot.json", "w", encoding= 'utf-8') as outfile:
        json.dump(predict, outfile, indent=4, ensure_ascii=False)
    if (count % 10 == 0) and (count != 0):
      time.sleep(90)
    count += 1


  0%|          | 0/10 [00:00<?, ?it/s]

Boot time


 10%|█         | 1/10 [00:05<00:49,  5.51s/it]

tech support


 20%|██        | 2/10 [00:10<00:39,  4.94s/it]




 30%|███       | 3/10 [00:13<00:30,  4.41s/it]

Set up


 40%|████      | 4/10 [00:16<00:23,  3.86s/it]

Windows 8, touchscreen functions


 50%|█████     | 5/10 [00:19<00:17,  3.58s/it]




 60%|██████    | 6/10 [00:24<00:15,  3.95s/it]

internal speakers, price tag, click pads


 70%|███████   | 7/10 [00:29<00:13,  4.39s/it]




 80%|████████  | 8/10 [00:32<00:07,  3.80s/it]

installation disk (DVD)


 90%|█████████ | 9/10 [00:36<00:03,  3.86s/it]

use


100%|██████████| 10/10 [00:39<00:00,  4.00s/it]


### 1-shot

In [None]:
prompt_base = '''
Bạn là một trợ lý thông minh! Hãy phân tích câu dưới đây bằng tiếng Anh, xác định các thuật ngữ (aspects) và độ cảm xúc (polarity) tương ứng với chúng (tích cực, tiêu cực, trung lập, mâu thuẫn).
Ví dụ:
Text: The tech guy then said the service center does not do 1-to-1 exchange and I have to direct my concern to the "sales" team, which is the retail shop where I bought my netbook from?
Aspect :service center,&quot;sales&quot,tech guy
Answer:
term="service center" polarity="negative"
term="&quot;sales&quot; team" polarity="negative"
term="tech guy" polarity="neutral"
'''

In [None]:
from tqdm import tqdm
import time
import json
predict = {}
count = 0

  # Testing
for sample in tqdm(data):
      sample_id, text,aspect = sample
      aspect_text = ', '.join(aspect)
      prompt = prompt_base + f"\nBây giờ, hãy phân tích câu sau với phần trả lời ngắn gọn giống như câu trước : {text} với các Aspect là {aspect_text}"
      response = model.generate_content([prompt])
      print(response.text)
      predict[sample_id] = response.text
      with open(f"1-shot.json", "w", encoding= 'utf-8') as outfile:
          json.dump(predict, outfile, indent=4, ensure_ascii=False)
      if (count % 10 == 0) and (count != 0):
        time.sleep(70)
      count += 1

 10%|█         | 1/10 [00:01<00:14,  1.62s/it]

term="Boot time" polarity="positive"



 20%|██        | 2/10 [00:03<00:15,  1.89s/it]

term="tech support" polarity="negative"



 30%|███       | 3/10 [00:05<00:12,  1.78s/it]

term="computer" polarity="positive"
term="resume" polarity="neutral"



 40%|████      | 4/10 [00:06<00:10,  1.72s/it]

term="Set up" polarity="positive"



 50%|█████     | 5/10 [00:09<00:09,  1.86s/it]

term="Windows 8" polarity="negative"
term="touchscreen functions" polarity="negative"



 60%|██████    | 6/10 [00:11<00:08,  2.13s/it]

Here's an analysis of the sentence, focusing on the provided aspects:

**Text:** I expected so as it's an Apple product, but I was glad to see my expectations exceeded, this is THE laptop to buy right now.

**Aspects:** Apple product, expectations, laptop


**Answer:**

term="Apple product" polarity="positive"
term="expectations" polarity="positive"
term="laptop" polarity="positive"



 70%|███████   | 7/10 [00:14<00:06,  2.24s/it]

term="internal speakers" polarity="negative"
term="price tag" polarity="positive"
term="click pads" polarity="negative"



 80%|████████  | 8/10 [00:16<00:04,  2.16s/it]

Text: excellent in every way

Aspect: overall quality

Answer: term="overall quality" polarity="positive"



 90%|█████████ | 9/10 [00:17<00:01,  1.97s/it]

term="installation disk (DVD)" polarity="negative"



100%|██████████| 10/10 [00:19<00:00,  1.93s/it]

term="use" polarity="positive"




