In [11]:
import sys
import os
import logging
import json
from dotenv import load_dotenv

# Change cwd to the DataEnrichment root folder
data_enrichment_root_path = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
os.chdir(data_enrichment_root_path)

if data_enrichment_root_path not in sys.path:
    sys.path.append(data_enrichment_root_path)

load_dotenv()

import db
from worker import process
from utils import download
from utils.get_video_description import get_video_description
from AudioProcessor.transcriber import transcribe_audio
from AudioProcessor.sentiment import SentimentAnalyzer
import VideoProcessor
import AudioProcessor

In [2]:
db_conn = db.get_connection()
video_id = "7528389380721904918"

INFO - [EnrichmentWorker] - Successfully Connected to Database


In [3]:
video_url, video_path, audio_path = download.tiktok_full(video_id)


INFO - [EnrichmentWorker] - Downloading MP4 for 7528389380721904918 → /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/video/7528389380721904918.mp4
INFO - [EnrichmentWorker] - Audio 7528389380721904918 is already downloaded in /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/audio/7528389380721904918.wav. Skipping


In [4]:
model_name = "ggml-large-v3.bin" 
transcript, lang, output = transcribe_audio(audio_path, model_name)
logging.info(transcript)

INFO - [EnrichmentWorker] - Starting transcriber model ggml-large-v3.bin for audio in /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/audio/7528389380721904918.wav
INFO - [EnrichmentWorker] - De la 1 ianuarie, impozitul pe proprietate va fi mai mare.
 Asta pentru că va fi calculat în funcție de valoarea de piață a locuinței
 și nu de cea veche înregistrată la primărie și neactualizată de ani de zile.
 Schimbarea va aduce mai mulți bani la buget,
 mai ales că România este țara din Uniunea Europeană cu cei mai mulți proprietari.
 Efectele vor fi resimțite de cei care au case și apartamente în orașe mari,
 precum București, Cluj-Napoca sau Iași, unde impozitele ar putea să se dubleze.
 Modificarea este o condiție asumată de România în PNRR.
 De la 1 ianuarie vom plăti impozit pe proprietate în funcție de valoarea de piață reală a locuinței.
 La guvern se lucrează în aceste zile la dezvoltarea aplicației e-Proprietatea,
 care va centraliza datele despre prețurile reale d

In [5]:
sentiment_results = SentimentAnalyzer.get_sentiment(transcript, "ro")

INFO - [EnrichmentWorker] - Loading model DGurgurov/xlm-r_romanian_sentiment for language ro
INFO - [EnrichmentWorker] - Successfully loaded model DGurgurov/xlm-r_romanian_sentiment
  return forward_call(*args, **kwargs)
INFO - [EnrichmentWorker] - SENTIMENT MODEL OUTPUT SequenceClassifierOutput(loss=None, logits=tensor([[ 0.5810, -0.1974]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
INFO - [EnrichmentWorker] - SENTIMENT MODEL SCORES [0.6853467  0.31465325]


In [6]:
sentiment_results

{'positive': 0.3146532475948334,
 'neutral': 0.0,
 'negative': 0.6853467226028442}

In [7]:
description = get_video_description(video_id, db_conn)

In [12]:
analysis_result_json = VideoProcessor.process(video_path, transcript, sentiment_results, description)

INFO - [EnrichmentWorker] - Starting video processing for /Users/rolandteslaru/Desktop/ZeruelNet/packages/DataEnrichment/tmp/video/7528389380721904918.mp4
INFO - [EnrichmentWorker] - AFC is enabled with max remote calls: 10.
INFO - [EnrichmentWorker] - HTTP Request: POST https://aiplatform.googleapis.com/v1beta1/projects/zeruel-ney/locations/global/publishers/google/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"
INFO - [EnrichmentWorker] - AFC remote call 1 is done.
INFO - [EnrichmentWorker] - VIDEO Analysis result {'summary': "The video discusses the upcoming increase in property taxes in Romania starting January 1st. The new tax will be calculated based on the real market value of properties, replacing the old, outdated registered values. This change is expected to increase budget revenue and is a condition of Romania's National Recovery and Resilience Plan (PNRR). The report highlights that Romania has a high rate of homeownership within the EU and provides examples 

In [13]:
analysis_result_json

{'summary': "The video discusses the upcoming increase in property taxes in Romania starting January 1st. The new tax will be calculated based on the real market value of properties, replacing the old, outdated registered values. This change is expected to increase budget revenue and is a condition of Romania's National Recovery and Resilience Plan (PNRR). The report highlights that Romania has a high rate of homeownership within the EU and provides examples of how the tax might increase, potentially doubling or tripling in major cities. A new digital platform, e-Proprietatea, is being developed to manage this data, utilizing digital maps, GPS coordinates, satellite imagery, and construction permits to assign a unique ID to each property. The government anticipates collecting an additional 6 billion lei annually from this new tax.",
 'identified_subjects': [{'subject': 'romania', 'stance': 0.1},
  {'subject': 'property tax', 'stance': -0.7},
  {'subject': 'pnrr', 'stance': 0.8},
  {'su

In [9]:
analysis_result_json

{'summary': "The video discusses an upcoming increase in property taxes in Romania starting January 1st. The new tax will be calculated based on the real market value of properties, replacing older, unupdated valuations. This change is expected to increase state revenue and is a condition of Romania's National Recovery and Resilience Plan (PNRR). The report highlights that Romania has a high rate of homeownership within the EU and provides examples of potential tax increases, showing a significant jump from current to future calculations. The government is developing a digital platform, e-Proprietatea, to manage this data, which will incorporate various data sources like satellite imagery and cadastral information. A pilot program is planned for major cities like Bucharest, Cluj-Napoca, and Iași.",
 'identified_subjects': [{'subject': 'romania', 'stance': 0.1},
  {'subject': 'pnrr', 'stance': 1.0},
  {'subject': 'property tax', 'stance': -0.7},
  {'subject': 'real estate market', 'stan