In [1]:
import sys
import os
import logging
import json
from dotenv import load_dotenv

# Change cwd to the DataEnrichment root folder
data_enrichment_root_path = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
os.chdir(data_enrichment_root_path)

if data_enrichment_root_path not in sys.path:
    sys.path.append(data_enrichment_root_path)

load_dotenv()

import db
from worker import process
from utils import download
from utils.get_video_description import get_video_description
from AudioProcessor.transcriber import transcribe_audio
from AudioProcessor.sentiment import SentimentAnalyzer
import AlignmentCalculator
import VideoProcessor
import AudioProcessor

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
db_conn = db.get_connection()
video_id = "7527601571677441302"
video_path = os.path.abspath(f"./tmp/video/{video_id}.mp4")
audio_path = os.path.abspath(f"./tmp/video/{video_id}.wav")

INFO - [EnrichmentWorker] - Successfully Connected to Database


In [3]:
video_url, video_path, audio_path = download.tiktok_full(video_id)

INFO - [EnrichmentWorker] - Downloading MP4 for 7527601571677441302 → /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/video/7527601571677441302.mp4
INFO - [EnrichmentWorker] - Successfully extracted audio to: /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/audio/7527601571677441302.wav


In [4]:
model_name = "ggml-large-v3.bin" 
transcript, lang, output = transcribe_audio(audio_path, model_name)
logging.info(transcript)

INFO - [EnrichmentWorker] - Starting transcriber model ggml-large-v3.bin for audio in /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/audio/7527601571677441302.wav
INFO - [EnrichmentWorker] - Dragii mei, ce se întâmplă astăzi în scandalul USAID este absolut șocant la nivel global, dar minunat în același timp.
 Trezirea în conștiința întregii lumi se produce mai profund ca niciodată.
 Ceea ce credeam că este democrație se dovedește că a fost doară cionzură globală.
 O mită împachetată într-o formă de ajutor social cu implicații teribile la adresa libertății civile.
 În întreaga lume, inclusiv în România, au fost demascate televiziuni și publicații care se dovedește că au avut o singură și unică misiune
 a ceea de a manipula percepția publică în interes globalist, fiind profund dăunătoare democrației interne și, mai grav,
 producând un atac direct.
 Acest lucru a fost destul de răbdăcut și a fost îndreptat la suveranitatea fiecărei țări.
 Felicit și mulțumesc US Doge p

In [5]:
sentiment_results = SentimentAnalyzer.get_sentiment(transcript, "ro")
sentiment_results

INFO - [EnrichmentWorker] - Loading model DGurgurov/xlm-r_romanian_sentiment for language ro
INFO - [EnrichmentWorker] - Successfully loaded model DGurgurov/xlm-r_romanian_sentiment
  return forward_call(*args, **kwargs)
INFO - [EnrichmentWorker] - SENTIMENT MODEL OUTPUT SequenceClassifierOutput(loss=None, logits=tensor([[ 1.1188, -0.8836]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
INFO - [EnrichmentWorker] - SENTIMENT MODEL SCORES [0.88104385 0.11895618]


{'positive': 0.11895617842674255,
 'neutral': 0.0,
 'negative': 0.8810438513755798}

In [6]:
description = get_video_description(video_id, db_conn)
description

'#doamneajuta #🙏❤️🇷🇴 #suveranitate #romania #@calin.georgescu.real #fyp #OnoaresiRespect🙏❤️🇷🇴 '

In [7]:
analysis_result_json = VideoProcessor.process(video_path, transcript, sentiment_results, description)
analysis_result_json

INFO - [EnrichmentWorker] - Starting video processing for /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/video/7527601571677441302.mp4
INFO - [EnrichmentWorker] - AFC is enabled with max remote calls: 10.
INFO - [EnrichmentWorker] - HTTP Request: POST https://aiplatform.googleapis.com/v1beta1/projects/zeruel-ney/locations/global/publishers/google/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"
INFO - [EnrichmentWorker] - AFC remote call 1 is done.
INFO - [EnrichmentWorker] - VIDEO Analysis result {'summary': "The video alleges that the USAID scandal reveals a global manipulation of public perception by 'globalists' and corrupt institutions, which is detrimental to democracy and national sovereignty. It praises Călin Georgescu for exposing this system and criticizes multinational companies operating in Romania for allegedly exploiting the country and its people by reporting zero profit while engaging in artificial price increases. The speaker calls for

{'summary': "The video alleges that the USAID scandal reveals a global manipulation of public perception by 'globalists' and corrupt institutions, which is detrimental to democracy and national sovereignty. It praises Călin Georgescu for exposing this system and criticizes multinational companies operating in Romania for allegedly exploiting the country and its people by reporting zero profit while engaging in artificial price increases. The speaker calls for supporting Romanian producers and demanding ethical business practices, suggesting that the current economic situation is a result of these corrupt practices.",
 'identified_subjects': [{'subject': 'calin georgescu', 'stance': 0.8},
  {'subject': 'usaid', 'stance': -0.8},
  {'subject': 'globalists', 'stance': -0.9},
  {'subject': 'democracy', 'stance': -0.5},
  {'subject': 'institutions', 'stance': -0.7},
  {'subject': 'romania', 'stance': 0.6},
  {'subject': 'european union', 'stance': -0.7},
  {'subject': 'germany', 'stance': -0

In [8]:
identified_subjects = analysis_result_json["identified_subjects"]
llm_overall_alignment = analysis_result_json["overall_alignment"]
final_alignment, deterministic_alignment, alignment_conflict = AlignmentCalculator.calculate(identified_subjects, llm_overall_alignment, alpha=0.25)
print(f"llm_overall_alignment: {llm_overall_alignment} \nfinal_alignment: {final_alignment} \ndeterministic_alignment: {deterministic_alignment} \nalignment_conflict: {alignment_conflict}")

llm_overall_alignment: -0.95 
final_alignment: -0.8573091603053434 
deterministic_alignment: -0.5792366412213741 
alignment_conflict: 0.3707633587786259
