In [2]:
# Load libraries 
import os
import re
import time
import openai 
import pandas as pd 
from tqdm import tqdm
from typing import Tuple
from dotenv import load_dotenv


load_dotenv()
pd.set_option("display.max_columns", None)

In [3]:
# Setting credentials
OPENAI_KEY = os.getenv("OPENAI_API_KEY", default = None) 
openai.api_key = OPENAI_KEY

In [5]:
# Load dataset
data = pd.read_excel("../dataset/Full Dataset Kotor updated 2.0.xlsx")
data.head()

Unnamed: 0,created_at,id_str,full_text,quote_count,reply_count,retweet_count,favorite_count,lang,user_id_str,conversation_id_str,username,tweet_url
0,Fri Nov 17 23:58:21 +0000 2023,1.725665e+18,@gogo74070675957 @iina_surbakti @gibran_tweet ...,0.0,0.0,0.0,1.0,in,1.213867e+18,1.725499e+18,ArjunaOnthesky,https://twitter.com/ArjunaOnthesky/status/1725...
1,Fri Nov 17 23:58:19 +0000 2023,1.725665e+18,@vendie7 Btw sy msh ingat omelanmu ttg prof MD...,,,,,,,,,
2,Fri Nov 17 23:57:49 +0000 2023,1.725665e+18,ðŸ”´âšªï¸ PEMILU terutama PILPRES adalah SATU...,1.0,7.0,12.0,29.0,in,1.378303e+18,1.725665e+18,_BungHerwin,https://twitter.com/_BungHerwin/status/1725664...
3,Fri Nov 17 23:57:34 +0000 2023,1.725665e+18,@ekagumilars Indonesia aman &amp damai tanpa ...,0.0,0.0,0.0,0.0,in,2537213000.0,1.725384e+18,irfandjay,https://twitter.com/irfandjay/status/172566450...
4,Fri Nov 17 23:57:31 +0000 2023,1.725664e+18,Pilpres kali ini,,,,,,,,,


In [6]:
# Data Duplicate checking
data.duplicated(subset = ['full_text', 'id_str', 'retweet_count']).value_counts()

False    1564
True      172
dtype: int64

In [7]:
# Overview duplicated data
data[data.duplicated(subset = ['full_text', 'id_str', 'retweet_count'])].head(10)

Unnamed: 0,created_at,id_str,full_text,quote_count,reply_count,retweet_count,favorite_count,lang,user_id_str,conversation_id_str,username,tweet_url
151,Fri Nov 17 21:00:46 +0000 2023,1.72562e+18,@NenkMonica @gibran_tweet Salahnya dmn dateng ...,,,,,,,,,
152,Fri Nov 17 20:49:46 +0000 2023,1.725617e+18,Kreatif balon #GanjarMahfud2024 anti di Bongka...,0.0,3.0,7.0,24.0,in,7.945097e+17,1.725617e+18,Jayabay19479190,https://twitter.com/Jayabay19479190/status/172...
352,Fri Nov 17 16:24:41 +0000 2023,1.725551e+18,@DPP_PKB @ninikwafiroh Maju bersama AMIN memen...,0.0,0.0,0.0,0.0,in,1.618235e+18,1.725482e+18,IndahRahaleb,https://twitter.com/IndahRahaleb/status/172555...
353,Fri Nov 17 16:23:57 +0000 2023,1.72555e+18,@triwul82 Mahfud MD mengingatkan bahwa tanggun...,,,,,,,,,
529,Fri Nov 17 23:56:55 +0000 2023,1.725664e+18,@Mdy_Asmara1701 Para Kabinda di banyak daerah ...,,,,,,,,,
586,Fri Nov 17 23:58:21 +0000 2023,1.725665e+18,@gogo74070675957 @iina_surbakti @gibran_tweet ...,0.0,0.0,0.0,1.0,in,1.213867e+18,1.725499e+18,ArjunaOnthesky,https://twitter.com/ArjunaOnthesky/status/1725...
587,Fri Nov 17 23:56:33 +0000 2023,1.725664e+18,Ini Daftar Nama dan Struktur Lengkap TKN Prabo...,0.0,0.0,0.0,0.0,in,1.110933e+18,1.725664e+18,fathw25,https://twitter.com/fathw25/status/17256642523...
588,Fri Nov 17 23:53:50 +0000 2023,1.725664e+18,Kecuali Jokowi jadi ketum Golkar dan Gibran ja...,0.0,0.0,0.0,0.0,in,84867300.0,1.725664e+18,tualang,https://twitter.com/tualang/status/17256635676...
589,Fri Nov 17 23:35:18 +0000 2023,1.725659e+18,Temukan dan dapatkan Kaos Baju prabowo gemoy -...,0.0,0.0,0.0,0.0,in,2993486000.0,1.725659e+18,aris_jenang,https://twitter.com/aris_jenang/status/1725658...
590,Fri Nov 17 23:35:18 +0000 2023,1.725659e+18,Masyarakat Menilai Gibran Tidak Mempunyai Kapa...,0.0,0.0,0.0,1.0,in,1.618289e+18,1.725659e+18,Liza16144812,https://twitter.com/Liza16144812/status/172565...


In [8]:
# Duplicate data filtering
data = data.drop_duplicates(subset = ['full_text', 'id_str', 'retweet_count'])

In [9]:
# Data Duplicate checking - validation
data.duplicated(subset = ['full_text', 'id_str', 'retweet_count']).value_counts()

False    1564
dtype: int64

In [10]:
# Define prompt and ingestion script
def prompt_enrichment(tweet_comment: str) -> str:
    prompt = \
    f"""
    Ekstraksi informasi yang dibutuhkan berdasarkan komentar twitter dibawah, dengan response cukup sesuai yang di definisikan tanpa penjelasan tambahan.

    komentar_twitter: "{tweet_comment}"

    Untuk response cukup isi dengan format dibawah.
    named_entity_recognition: [Jawaban anda: cakupan NER sesuai label "PERSON" atau "ORGANIZATION" saja]
    aspect_sentiment: [Identifikasi verb / noun-phrase hasil dari part-of-speech di dalam komentar, disertai dengan nilai sentiment masing-masing aspect dengan format <aspect (sentiment)>]
    """
    return prompt

def ingest_openai(tweet_comment: str, model_base: str = "gpt-3.5-turbo") -> Tuple[str, int]: 
    token_usage = 0
    response_extraction = ""
    try:
        response = openai.ChatCompletion.create(
            model = model_base, 
            messages = [{"role" : "user", "content" : prompt_enrichment(tweet_comment)}], 
            temperature = 0.1, max_tokens = 512, top_p = 1.0, 
            frequency_penalty = 0.0, presence_penalty = 0.0
        )
        response_extraction = response["choices"][0]["message"]["content"]
        token_usage = response["usage"]["total_tokens"]
    except Exception as E:
        print(f"[ERROR] - {E}")
        print("Retry with Recursive Func")
        time.sleep(5)
        ingest_openai(tweet_comment = tweet_comment)
    return response_extraction, token_usage

In [13]:
# Check total rows
print(f"Total Rows: {data.shape[0]}")

Total Rows: 1564


In [11]:
# Test ingestion
comment = data['full_text'].sample(1).values[0]
extraction, token_usage = ingest_openai(tweet_comment = comment)
print(f"[COMMENT]\n{comment}\n[RESULT - Token Usage: {token_usage}]\n{extraction}")

[COMMENT]
Komisi Pemilihan Umum (KPU) RI telah menetapkan nomor urut terhadap tiga calon presiden dan wakil presiden di Pilpres 2024.  #BersamaIndonesiaMaju #PrabowoGibranIstimewa #PrabowoGemoy Prabowo Subianto
[RESULT - Token Usage: 279]
named_entity_recognition: ["Komisi Pemilihan Umum (KPU) RI", "Prabowo Subianto"]
aspect_sentiment: ["menetapkan nomor urut (positive)", "tiga calon presiden dan wakil presiden (neutral)"]


In [14]:
# Apply on entire dataset
final_result_extraction, final_token_usage = [], []

In [15]:
# Iter and push into array
for comment in tqdm(data["full_text"], desc = "Ingestion Start"):
    result, token = ingest_openai(tweet_comment = comment)
    final_result_extraction.append(result)
    final_token_usage.append(token)

Ingestion Start:   1%|▋                                                            | 19/1564 [07:40<6:43:15, 15.66s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:   2%|█▍                                                          | 36/1564 [29:06<18:26:39, 43.46s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:   4%|██▍                                                          | 63/1564 [44:38<6:37:56, 15.91s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:   5%|██▉                                                          | 76/1564 [57:51<6:08:41, 14.87s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:   7%|███▊                                                      | 102/1564 [1:19:19<3:43:36,  9.18s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func
[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:   8%|████▋                                                     | 126/1564 [1:51:21<4:05:45, 10.25s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  10%|█████▌                                                    | 149/1564 [2:08:17<4:53:27, 12.44s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  11%|██████▍                                                   | 174/1564 [2:28:49<6:07:45, 15.87s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  11%|██████▍                                                  | 177/1564 [2:31:18<12:27:23, 32.33s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  12%|███████▏                                                  | 195/1564 [2:46:45<8:09:27, 21.45s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  13%|███████▎                                                 | 199/1564 [2:57:21<27:32:33, 72.64s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  13%|███████▎                                                 | 202/1564 [2:58:22<14:36:16, 38.60s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  13%|███████▍                                                 | 205/1564 [3:00:05<13:06:18, 34.72s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func
[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func
[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func
[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  14%|████████                                                  | 218/1564 [3:35:01<6:53:09, 18.42s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  15%|████████▌                                                 | 230/1564 [3:43:20<5:50:06, 15.75s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  16%|█████████▎                                                | 252/1564 [3:53:26<6:15:50, 17.19s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  17%|█████████▋                                               | 265/1564 [4:09:32<28:04:03, 77.79s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  17%|█████████▊                                               | 269/1564 [4:17:01<24:25:14, 67.89s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func
[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func
[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  20%|███████████▎                                              | 305/1564 [4:38:32<4:02:15, 11.54s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  20%|███████████▌                                              | 311/1564 [4:41:13<4:17:58, 12.35s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  20%|███████████▍                                             | 313/1564 [4:47:41<30:43:09, 88.40s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  21%|████████████                                              | 326/1564 [4:59:41<4:37:20, 13.44s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  22%|████████████▊                                             | 345/1564 [5:13:19<3:59:37, 11.79s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  23%|█████████████▍                                            | 362/1564 [5:17:50<3:48:55, 11.43s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  23%|████████████▉                                           | 363/1564 [5:28:02<63:54:04, 191.54s/it]

[ERROR] - The server is overloaded or not ready yet.
Retry with Recursive Func


Ingestion Start:  23%|█████████████▏                                          | 367/1564 [5:40:14<67:54:21, 204.23s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  24%|█████████████▌                                           | 372/1564 [5:51:50<30:17:52, 91.50s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  25%|██████████████▋                                           | 395/1564 [6:11:23<5:00:03, 15.40s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  26%|██████████████▌                                          | 400/1564 [6:22:11<16:52:25, 52.19s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  26%|██████████████▎                                         | 401/1564 [6:32:22<71:00:11, 219.79s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  28%|████████████████▏                                         | 438/1564 [6:47:18<2:08:55,  6.87s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  29%|████████████████▌                                         | 447/1564 [6:58:12<4:59:52, 16.11s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  31%|█████████████████▉                                        | 484/1564 [7:12:20<2:08:16,  7.13s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  38%|██████████████████████▎                                   | 601/1564 [7:34:58<2:50:47, 10.64s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  39%|██████████████████████                                   | 606/1564 [7:45:28<13:17:59, 49.98s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  39%|█████████████████████▊                                  | 609/1564 [7:55:46<28:57:12, 109.14s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  40%|███████████████████████                                   | 623/1564 [8:15:31<3:25:04, 13.08s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  42%|████████████████████████▌                                 | 662/1564 [8:38:18<1:34:09,  6.26s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  45%|██████████████████████████▎                               | 710/1564 [9:00:09<3:27:47, 14.60s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  48%|███████████████████████████▋                              | 746/1564 [9:41:35<9:45:53, 42.97s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  48%|███████████████████████████▉                              | 753/1564 [9:52:47<8:15:51, 36.69s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  52%|█████████████████████████████▌                           | 811/1564 [10:20:21<2:18:22, 11.03s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  57%|████████████████████████████████▍                        | 889/1564 [10:55:13<1:22:39,  7.35s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  57%|████████████████████████████████▌                        | 895/1564 [11:06:15<7:19:04, 39.38s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  62%|███████████████████████████████████▏                     | 965/1564 [11:32:20<1:40:08, 10.03s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  62%|███████████████████████████████████▍                     | 972/1564 [11:42:57<4:19:43, 26.32s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  64%|███████████████████████████████████▋                    | 998/1564 [12:13:14<12:00:17, 76.36s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  64%|██████████████████████████████████▌                   | 1000/1564 [12:23:35<26:26:48, 168.81s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  65%|████████████████████████████████████▍                   | 1019/1564 [12:36:38<1:47:35, 11.84s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  65%|████████████████████████████████████▋                   | 1024/1564 [12:47:09<7:29:42, 49.97s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  68%|██████████████████████████████████████▎                 | 1071/1564 [13:28:24<2:33:12, 18.65s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  69%|██████████████████████████████████████▋                 | 1079/1564 [13:39:21<3:10:21, 23.55s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  71%|███████████████████████████████████████▌                | 1104/1564 [13:56:41<1:19:00, 10.31s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  72%|████████████████████████████████████████▍               | 1128/1564 [14:11:15<1:14:02, 10.19s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  73%|████████████████████████████████████████▊               | 1139/1564 [14:22:51<1:52:28, 15.88s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  74%|█████████████████████████████████████████▍              | 1158/1564 [14:36:00<1:24:59, 12.56s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  78%|█████████████████████████████████████████████▍            | 1226/1564 [14:55:12<40:19,  7.16s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  81%|██████████████████████████████████████████████▉           | 1265/1564 [15:09:59<38:10,  7.66s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  81%|█████████████████████████████████████████████▌          | 1274/1564 [15:21:10<1:29:41, 18.56s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  83%|███████████████████████████████████████████████▉          | 1291/1564 [15:33:34<34:34,  7.60s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  84%|████████████████████████████████████████████████▍         | 1306/1564 [15:45:26<24:32,  5.71s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  87%|██████████████████████████████████████████████████▏       | 1354/1564 [16:02:15<32:11,  9.20s/it]

[ERROR] - HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
)
Retry with Recursive Func


Ingestion Start:  87%|████████████████████████████████████████████████▌       | 1355/1564 [16:03:45<1:56:09, 33.35s/it]

[ERROR] - HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
)
Retry with Recursive Func


Ingestion Start:  87%|██████████████████████████████████████████████████▍       | 1360/1564 [16:04:37<43:57, 12.93s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  87%|██████████████████████████████████████████████▉       | 1361/1564 [16:14:45<10:46:56, 191.21s/it]

[ERROR] - HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
)
Retry with Recursive Func


Ingestion Start:  94%|██████████████████████████████████████████████████████▋   | 1473/1564 [16:30:36<10:48,  7.13s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  95%|████████████████████████████████████████████████████▉   | 1478/1564 [16:41:08<1:10:51, 49.44s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  95%|█████████████████████████████████████████████████████▏  | 1486/1564 [16:55:33<1:06:36, 51.23s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  98%|████████████████████████████████████████████████████████▋ | 1530/1564 [17:19:09<12:27, 21.99s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start:  99%|█████████████████████████████████████████████████████████▎| 1547/1564 [17:31:34<02:05,  7.38s/it]

[ERROR] - Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retry with Recursive Func


Ingestion Start: 100%|██████████████████████████████████████████████████████████| 1564/1564 [17:44:08<00:00, 40.82s/it]


In [16]:
len(final_result_extraction), len(final_token_usage)

(1564, 1564)

In [19]:
# Assign result into dataframe
data['result extraction'] = final_result_extraction
data['token usage'] = final_token_usage

In [20]:
# Save into dataframe
data.to_csv("../dataset/data_twitter_pemilu_2024_enrich [V2].csv", index = False)