In [10]:
import sys
import os
import logging
import json
from dotenv import load_dotenv

# Change cwd to the DataEnrichment root folder
data_enrichment_root_path = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
os.chdir(data_enrichment_root_path)

if data_enrichment_root_path not in sys.path:
    sys.path.append(data_enrichment_root_path)

load_dotenv()

import db
from worker import process
from utils import download
from utils.get_video_description import get_video_description
from AudioProcessor.transcriber import transcribe_audio
from AudioProcessor.sentiment import SentimentAnalyzer
import VideoProcessor
import AudioProcessor

In [2]:
db_conn = db.get_connection()
video_id = "7478689689231248662"

INFO - [EnrichmentWorker] - Successfully Connected to Database


In [3]:
video_url, video_path, audio_path = download.tiktok_full(video_id)


INFO - [EnrichmentWorker] - Downloading MP4 for 7478689689231248662 → /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/video/7478689689231248662.mp4
INFO - [EnrichmentWorker] - Successfully extracted audio to: /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/audio/7478689689231248662.wav


In [4]:
model_name = "ggml-large-v3.bin" 
transcript, lang, output = transcribe_audio(audio_path, model_name)
logging.info(transcript)

INFO - [EnrichmentWorker] - Starting transcriber model ggml-large-v3.bin for audio in /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/audio/7478689689231248662.wav
INFO - [EnrichmentWorker] - România cedează spațiul aerian aliaților?
 Greșit!
 Ce se întâmplă de fapt?
 Noua lege nu cedează controlul asupra spațiului aerian,
 ci stabilește măsuri clare pentru apărarea acestuia
 împotriva dronelor și altor amenințări.
 Spațiul aerian național este integrat în spațiul aerian al NATO.
 Încă de acum 20 de ani, odată cu aderarea României la alianță,
 integrarea spațiilor aeriene ale aliaților
 este în conformitate cu tratatele la care România este parte.
 Degeaba permite intervenția rapidă pentru distrugerea,
 neutralizarea și preluarea controlului asupra dronelor
 care încalcă spațiul aerian.
 Toate deciziile se iau în funcție de nivelul amenințării
 și protejarea vieții civile este prioritară.
 România nu pierde controlul asupra spațiului său aerian.
 Oferă aliaților cadr

In [5]:
sentiment_results = SentimentAnalyzer.get_sentiment(transcript, "ro")

INFO - [EnrichmentWorker] - Loading model DGurgurov/xlm-r_romanian_sentiment for language ro
INFO - [EnrichmentWorker] - Successfully loaded model DGurgurov/xlm-r_romanian_sentiment
  return forward_call(*args, **kwargs)
INFO - [EnrichmentWorker] - SENTIMENT MODEL OUTPUT SequenceClassifierOutput(loss=None, logits=tensor([[ 1.4409, -1.2958]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
INFO - [EnrichmentWorker] - SENTIMENT MODEL SCORES [0.93915755 0.06084239]


In [6]:
sentiment_results

{'positive': 0.06084238737821579,
 'neutral': 0.0,
 'negative': 0.9391575455665588}

In [7]:
description = get_video_description(video_id, db_conn)

In [11]:
analysis_result_json = VideoProcessor.process(video_path, transcript, sentiment_results, description)

INFO - [EnrichmentWorker] - Starting video processing for /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/video/7478689689231248662.mp4
INFO - [EnrichmentWorker] - AFC is enabled with max remote calls: 10.
INFO - [EnrichmentWorker] - HTTP Request: POST https://aiplatform.googleapis.com/v1beta1/projects/zeruel-ney/locations/global/publishers/google/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"
INFO - [EnrichmentWorker] - AFC remote call 1 is done.
INFO - [EnrichmentWorker] - VIDEO Analysis result {'summary': "The video features a man in a military uniform discussing Romanian airspace regulations. It addresses the question of whether Romania is ceding its airspace to allies, stating that this is incorrect. The video explains that new legislation establishes clear measures for defending national airspace against drones and other threats. It clarifies that Romania's airspace is integrated into NATO's, a situation that has existed since Romania joined the

In [12]:
analysis_result_json

{'summary': "The video features a man in a military uniform discussing Romanian airspace regulations. It addresses the question of whether Romania is ceding its airspace to allies, stating that this is incorrect. The video explains that new legislation establishes clear measures for defending national airspace against drones and other threats. It clarifies that Romania's airspace is integrated into NATO's, a situation that has existed since Romania joined the alliance. The law allows for rapid intervention to destroy, neutralize, or take control of drones violating airspace, with decisions based on threat levels and prioritizing civilian safety. The video asserts that Romania retains control, can impose restrictions, and that this framework provides legal grounds for allies to assist upon request, emphasizing security over cession and encouraging viewers to read the law and avoid disinformation.",
 'identified_subjects': [{'subject': 'romania', 'stance': 1.0},
  {'subject': 'nato', 'st

In [9]:
analysis_result_json

{'summary': "The video features a man in a military uniform discussing Romanian airspace regulations. It clarifies that a new law does not cede airspace control to allies but rather establishes measures for its defense against drones and other threats. The speaker emphasizes that Romania's integration into NATO airspace is in line with treaties and that the law allows for rapid intervention to neutralize drones, prioritizing civilian safety. The message aims to counter misinformation by stating that Romania retains control, can impose restrictions, and that the situation is about security, not a loss of sovereignty.",
 'identified_subjects': [{'subject': 'romania', 'stance': 1.0},
  {'subject': 'nato', 'stance': 1.0},
  {'subject': 'national sovereignty', 'stance': 1.0},
  {'subject': 'drones', 'stance': -0.5},
  {'subject': 'misinformation', 'stance': -1.0}],
 'overall_alignment': 1.0}