In [None]:
# !pip install ffmpeg-python
# !pip install faster-whisper
# !pip install nltk scikit-learn

In [None]:
# !pip install transformers
# !pip install sentence-transformers

In [None]:
# !pip install googletrans==4.0.0-rc1

In [None]:
import os
import ffmpeg
from IPython.display import Audio
from faster_whisper import WhisperModel
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from googletrans import Translator

# Download necessary NLTK data
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

In [None]:
sid = SentimentIntensityAnalyzer()

# Initialize the Whisper model
model_size = "medium"
model = WhisperModel(model_size)

In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

In [None]:

# Initialize translator
translator = Translator()


In [None]:
# Initialize transformer-based sentiment analysis model
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model_sentiment = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
sentiment_pipeline = pipeline('sentiment-analysis', model=model_sentiment, tokenizer=tokenizer)

In [None]:
# Function to convert video to audio
def video_to_audio(video_path):
    videofilename = video_path.split('/')[-1]
    audiofilename = videofilename.replace(".mp4", '.mp3')
    input_stream = ffmpeg.input(video_path)
    audio = input_stream.audio
    output_stream = ffmpeg.output(audio, audiofilename)
    output_stream = ffmpeg.overwrite_output(output_stream)
    ffmpeg.run(output_stream)
    return audiofilename

In [None]:
# Function to transcribe audio
def transcribe_audio(audio_path):
    segments, info = model.transcribe(audio_path, word_timestamps=True)
    if not segments:
        raise ValueError("Transcription failed, segments are None or empty.")
    transcription_string = " ".join([word.word for segment in segments for word in segment.words])
    return transcription_string

In [None]:
# Function to translate text
def translate_text(text, target_language='en'):
    translated = translator.translate(text, dest=target_language)
    return translated.text

In [None]:
# Function to perform topic modeling
def get_topics(text, num_topics=1, num_words=5):
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform([text])
    lda = LatentDirichletAllocation(n_components=num_topics, random_state=0)
    lda.fit(X)
    words = vectorizer.get_feature_names_out()
    topics = []
    for topic_idx, topic in enumerate(lda.components_):
        topic_words = [words[i] for i in topic.argsort()[:-num_words - 1:-1]]
        topics.append(" ".join(topic_words))
    return topics

In [None]:
# Function to analyze sentiment using VADER and Transformers
def analyze_sentiment(text):
    # VADER sentiment analysis
    vader_scores = sid.polarity_scores(text)

    # Transformer-based sentiment analysis
    transformer_scores = sentiment_pipeline(text)
    transformer_sentiment = transformer_scores[0]['label'].lower()

    # Combining both sentiment scores
    combined_scores = {
        'vader': vader_scores,
        'transformer': transformer_sentiment
    }
    return combined_scores

In [None]:
# Main loop to process multiple videos
all_transcriptions = []
all_sentiments = []

while True:
    video_path = input("Enter the path to your news video (or 'done' to finish): ")
    if video_path.lower() == 'done':
        break

    try:
        audio_path = video_to_audio(video_path)
        transcription = transcribe_audio(audio_path)

        # Detect language
        detected_language = translator.detect(transcription).lang
        print(f"\nDetected language: {detected_language}")

        if detected_language != 'en':
            transcription_translated = translate_text(transcription, target_language='en')
            print(f"\nTranscription (translated to English):\n{transcription_translated}")
        else:
            transcription_translated = transcription
            print(f"\nTranscription:\n{transcription_translated}")

        topic = get_topics(transcription_translated)
        sentiment = analyze_sentiment(transcription_translated)

        all_transcriptions.append(transcription_translated)
        all_sentiments.append(sentiment)

        print("\n--- Analysis Results ---")
        print(f"Topics: {', '.join(topic)}")
        print(f"Sentiment (VADER): {sentiment['vader']}")
        print(f"Sentiment (Transformer): {sentiment['transformer']}")
        print("------------------------\n")

    except Exception as e:
        print(f"Error processing {video_path}: {e}")
        continue

Enter the path to your news video (or 'done' to finish): /content/Iran’s President and Foreign Minister feared dead in helicopter crash _ BBC News.mp4

Detected language: en

Transcription:
 A  major  search  operation  is  underway  after  a  helicopter  carrying  Iran's  president  and  its  foreign  minister  crashed  in  the  northwest  of  the  country  in  thick  fog.  President  Ebrahim  Raisi  had  been  returning  from  a  visit  to  the  border  with  neighboring

--- Analysis Results ---
Topics: president visit iran carrying country
Sentiment (VADER): {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Sentiment (Transformer): 4 stars
------------------------

Enter the path to your news video (or 'done' to finish): done


In [None]:
# Summarize overall sentiment using VADER
overall_sentiment_vader = {
    'neg': sum([s['vader']['neg'] for s in all_sentiments]) / len(all_sentiments),
    'neu': sum([s['vader']['neu'] for s in all_sentiments]) / len(all_sentiments),
    'pos': sum([s['vader']['pos'] for s in all_sentiments]) / len(all_sentiments),
    'compound': sum([s['vader']['compound'] for s in all_sentiments]) / len(all_sentiments),
}

print("\nOverall sentiment for today's news (VADER):")
print(f"Negative: {overall_sentiment_vader['neg']:.2f}")
print(f"Neutral: {overall_sentiment_vader['neu']:.2f}")
print(f"Positive: {overall_sentiment_vader['pos']:.2f}")
print(f"Compound: {overall_sentiment_vader['compound']:.2f}")

if (overall_sentiment_vader['pos'] > overall_sentiment_vader['neg'] and
    overall_sentiment_vader['pos'] > overall_sentiment_vader['neu']):
    overall_sentiment = "Positive"
elif (overall_sentiment_vader['neg'] > overall_sentiment_vader['pos'] and
      overall_sentiment_vader['neg'] > overall_sentiment_vader['neu']):
    overall_sentiment = "Negative"
else:
    overall_sentiment = "Neutral"

print(f"\nOverall sentiment: {overall_sentiment}")




Overall sentiment for today's news (VADER):
Negative: 0.00
Neutral: 1.00
Positive: 0.00
Compound: 0.00

Overall sentiment: Neutral


In [None]:
# Sample data (replace with your actual data)
all_sentiments = [
    {'transformer': 'positive'},
    {'transformer': 'negative'},
    {'transformer': 'neutral'},
    {'transformer': 'positive'},
    {'transformer': 'neutral'}
]

# Summarize overall sentiment using Transformer
transformer_sentiments = [s['transformer'] for s in all_sentiments]
positive_count = transformer_sentiments.count('positive')
negative_count = transformer_sentiments.count('negative')
neutral_count = transformer_sentiments.count('neutral')
total = len(transformer_sentiments)

print("\nOverall sentiment for today's news (Transformer):")
print(f"Positive: {positive_count / total * 100:.2f}%")
print(f"Negative: {negative_count / total * 100:.2f}%")
print(f"Neutral: {neutral_count / total * 100:.2f}%")

# Determine the overall sentiment based on the highest count
if positive_count > negative_count and positive_count > neutral_count:
    overall_sentiment = "Positive"
elif negative_count > positive_count and negative_count > neutral_count:
    overall_sentiment = "Negative"
else:
    overall_sentiment = "Neutral"

print(f"\nOverall sentiment: {overall_sentiment}")



Overall sentiment for today's news (Transformer):
Positive: 40.00%
Negative: 20.00%
Neutral: 40.00%

Overall sentiment: Neutral
