In [1]:
import fitz  # PyMuPDF

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text("text") + "\n"
    return text

# 🔍 Example Usage

pdf_text = extract_text_from_pdf("Comprehensive Health Data Analysis for Early Dementia Diagnosis A Machine Learning Approach.pdf")
summary = "Key insights from the paper:\n" + "\n".join(pdf_text.split(".")[:5])  # Take first 5 sentences
print(summary)


Key insights from the paper:
Comprehensive Health Data Analysis for Early
Dementia Diagnosis: A Machine Learning Approach
Jaimin Salvi∗, Aagam Shah†
∗Department of Computer Science, Nirma University, India
†Department of Computer Science, Nirma University, India
Abstract—This paper investigates the application of machine
learning (ML) models to predict dementia diagnosis using a
comprehensive health dataset
 The dataset includes key health-
related features such as diabetic status, heart rate, blood oxygen
levels, body temperature, cognitive test scores, and lifestyle
factors
 We employed ML techniques to predict dementia onset,
leveraging algorithms such as support vector machines (SVM)
and logistic regression
 Our findings demonstrate that ML
models, particularly SVM and logistic regression, can effectively
identify key predictors and achieve substantial accuracy in
dementia prediction
 The primary aim of this study is to validate
the performance of ML models in detecting dementia at

In [2]:
from gtts import gTTS

tts = gTTS(summary, lang="en")
tts.save("audio.mp3")


In [2]:
import moviepy.config as mp_config
import speech_recognition as sr
from pydub import AudioSegment

# Function to convert MP3 to WAV
def convert_mp3_to_wav(mp3_file, wav_file):
    audio = AudioSegment.from_mp3(mp3_file)
    audio.export(wav_file, format="wav")

# Function to extract text and timing info from audio using SpeechRecognition
def extract_text_from_audio_with_timestamps(audio_file):
    recognizer = sr.Recognizer()
    audio = sr.AudioFile(audio_file)
    
    with audio as source:
        audio_data = recognizer.record(source)
        
    try:
        # Using Google Web Speech API with timestamps enabled
        result = recognizer.recognize_google(audio_data, show_all=True)
        
        # Extract the words and their timings
        words_with_timestamps = []
        if 'alternative' in result:
            for alternative in result['alternative']:
                if 'timestamps' in alternative:
                    for word_info in alternative['timestamps']:
                        word, start_time, end_time = word_info
                        words_with_timestamps.append((word, start_time, end_time))
        return words_with_timestamps
    except sr.UnknownValueError:
        return []
    except sr.RequestError:
        return []

# Convert MP3 to WAV
audio_file_mp3 = "audio.mp3"
audio_file_wav = "audio.wav"
convert_mp3_to_wav(audio_file_mp3, audio_file_wav)

# Extract text and timings from the audio file (now in WAV format)
words_with_timestamps = extract_text_from_audio_with_timestamps(audio_file_wav)

# Create text clips based on extracted words and timestamps
text_clips = []
for word, start_time, end_time in words_with_timestamps:
    text_clip = TextClip(
        txt=word,
        font="Arial-Bold",
        fontsize=24,
        color="white"
    ).set_duration(end_time - start_time).set_position("center").set_start(start_time).crossfadein(0.5)
    text_clips.append(text_clip)

# Add images as background or for context (you can adjust images' visibility time)
image1 = ImageClip("./Test_Image/research_image1.png").set_duration(60).resize(width=720).set_position("center")

# Load audio file (now in WAV format)
audio = AudioFileClip(audio_file_wav)

# Combine everything (video, images, and text clips)
final_video = CompositeVideoClip([image1] + text_clips)
final_video = final_video.set_audio(audio).set_duration(audio.duration)

# Export the final video
final_video.write_videofile("research_paper_reel.mp4", fps=24)


Moviepy - Building video research_paper_reel.mp4.
MoviePy - Writing audio in research_paper_reelTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
Moviepy - Writing video research_paper_reel.mp4



                                                                  

Moviepy - Done !
Moviepy - video ready research_paper_reel.mp4




In [4]:
import speech_recognition as sr
filename = "audio.wav"
r = sr.Recognizer()
with sr.AudioFile(filename) as source:
    # listen for the data (load audio to memory)
    audio_data = r.record(source)
    # recognize (convert from speech to text)
    text = r.recognize_google(audio_data)
    print(text)

key insights from the paper comprehensive health data analysis for early dementia diagnosis a machine learning approach jimin Salvi asterisk operator aagam Shah asteris Cooperative department of computer science Nirma University India department of computer science Nirma University India abstract this paper investigates the application of machine learning ml models to predict dementia diagnosis using a comprehensive health data set the data set includes ki health related features such as diabetic status heart rate blood oxygen levels body temperature cognitive test scores and Lifestyle factors we employed ml techniques to predict debenture one set algorithm support vector machines SVM and Logistic regression of finding demonstrate that ml models particularly SVM and Logistic regression prediction the primary aim of the study is to validate the performance of ml models in detecting dimension at an early stage in to identify the most influential health and cognitive factors contributing 

In [None]:
import pyttsx3
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip, AudioFileClip
import time

# Step 1: Create the audio (this part was done earlier)
extracted_text = """
Key insights from the paper. 
Comprehensive Health Data Analysis for Early 
Dementia Diagnosis: A Machine Learning Approach.
Jaimin Salvi, Aagam Shah
Department of Computer Science, Nirma University, India. 
Abstract—This paper investigates the application of machine 
learning (ML) models to predict dementia diagnosis using a 
comprehensive health dataset. 
The dataset includes key health-related features such as diabetic status, 
heart rate, blood oxygen levels, body temperature, cognitive test scores, 
and lifestyle factors. 
We employed ML techniques to predict dementia onset, leveraging 
algorithms such as support vector machines (SVM) and logistic regression. 
Our findings demonstrate that ML models, particularly SVM and logistic 
regression, can effectively identify key predictors and achieve substantial 
accuracy in dementia prediction. 
The primary aim of this study is to validate the performance of ML models 
in detecting dementia at an early stage and to identify the most influential 
health and cognitive factors contributing to dementia risk.
"""

# Initialize the TTS engine
engine = pyttsx3.init()

# Set properties (rate and volume)
engine.setProperty('rate', 160)  # Adjust rate
engine.setProperty('volume', 1)  # Full volume

# Save audio to file
audio_file = "output_audio_with_pauses.mp3"
engine.save_to_file(extracted_text, audio_file)
engine.runAndWait()

# Step 2: Create the text clips
text_chunks = [
    ("Key insights from the paper.", 0, 4),
    ("Comprehensive Health Data Analysis for Early Dementia Diagnosis: A Machine Learning Approach.", 4, 9),
    ("Jaimin Salvi, Aagam Shah", 9, 13),
    ("Department of Computer Science, Nirma University, India.", 13, 17),
    ("Abstract—This paper investigates the application of machine learning (ML) models to predict dementia diagnosis using a comprehensive health dataset.", 17, 23),
    ("The dataset includes key health-related features such as diabetic status, heart rate, blood oxygen levels, body temperature, cognitive test scores, and lifestyle factors.", 23, 30),
    ("We employed ML techniques to predict dementia onset, leveraging algorithms such as support vector machines (SVM) and logistic regression.", 30, 37),
    ("Our findings demonstrate that ML models, particularly SVM and logistic regression, can effectively identify key predictors and achieve substantial accuracy in dementia prediction.", 37, 45),
    ("The primary aim of this study is to validate the performance of ML models in detecting dementia at an early stage and to identify the most influential health and cognitive factors contributing to dementia risk.", 45, 55)
]

# Step 3: Create text clips and sync with audio
text_clips = []

for text, start_time, end_time in text_chunks:
    text_clip = TextClip(
        txt=text,
        font="Arial-Bold",
        fontsize=24,
        color="white",
        align="center",
        size=(720, 480)
    ).set_duration(end_time - start_time).set_start(start_time).set_position("center")
    text_clips.append(text_clip)

# Step 4: Add the audio
audio = AudioFileClip(audio_file)

# Step 5: Combine text clips with audio in a final video
final_video = CompositeVideoClip(text_clips)
final_video = final_video.set_audio(audio).set_duration(audio.duration)

# Step 6: Export the final video
final_video.write_videofile("final_video_with_text_and_audio.mp4", fps=24)


Moviepy - Building video final_video_with_text_and_audio.mp4.
MoviePy - Writing audio in final_video_with_text_and_audioTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
Moviepy - Writing video final_video_with_text_and_audio.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready final_video_with_text_and_audio.mp4




In [8]:
import whisper

# Load the Whisper model (small version, but you can use large for better accuracy)
model = whisper.load_model("base")

# Transcribe the audio and get subtitles with timestamps
def transcribe_audio(audio_file):
    result = model.transcribe(audio_file, word_timestamps=True)
    return result["segments"]

# Extracted subtitles with timestamps
audio_file = "audio.mp3"
subtitles = transcribe_audio(audio_file)

# Convert the result into .srt format
def save_srt(subtitles, output_file):
    with open(output_file, "w") as f:
        for i, segment in enumerate(subtitles):
            start_time = segment["start"]
            end_time = segment["end"]
            text = segment["text"]
            start_time_str = format_time(start_time)
            end_time_str = format_time(end_time)
            f.write(f"{i + 1}\n")
            f.write(f"{start_time_str} --> {end_time_str}\n")
            f.write(f"{text}\n\n")

# Format the time to the correct .srt format (HH:MM:SS,MS)
def format_time(seconds):
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    ms = int((s - int(s)) * 1000)
    return f"{int(h):02}:{int(m):02}:{int(s):02},{ms:03}"

# Save the subtitles to an SRT file
save_srt(subtitles, "output_subtitles.srt")


In [11]:
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import pysrt

# Load video
video = VideoFileClip("research_paper_reel.mp4")

# Load the .srt file
subtitles_file = "output_subtitles.srt"
subs = pysrt.open(subtitles_file)

# Function to create TextClip for each subtitle
def create_subtitle_clip(subtitle, font="Arial", fontsize=24, color="white"):
    # Create a TextClip for the subtitle
    return TextClip(subtitle.text, font=font, fontsize=fontsize, color=color, bg_color="black", size=video.size).set_position(('center', 'bottom')).set_duration(subtitle.duration.seconds + subtitle.duration.milliseconds / 1000).set_start(subtitle.start.seconds + subtitle.start.milliseconds / 1000)

# Create a list of subtitle clips
subtitle_clips = [create_subtitle_clip(sub) for sub in subs]

# Add subtitle clips to the video
final_video = CompositeVideoClip([video] + subtitle_clips)

# Export the final video with subtitles
final_video.write_videofile("final_video_with_text_and_audio.mp4", fps=24)


Moviepy - Building video final_video_with_text_and_audio.mp4.
MoviePy - Writing audio in final_video_with_text_and_audioTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
Moviepy - Writing video final_video_with_text_and_audio.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready final_video_with_text_and_audio.mp4




## Adding Subtitles

In [12]:
audiofilename="audio.mp3"
import whisper #might take some time (approx 3- 5min depending on audio length)
model = whisper.load_model("medium")
result = model.transcribe(audiofilename,word_timestamps=True)
print(result)

100%|█████████████████████████████████████| 1.42G/1.42G [22:13<00:00, 1.15MiB/s]


{'text': ' Key insights from the paper Comprehensive health data analysis for early Dementia diagnosis A machine learning approach Jamin Salvi asterisk operator Agam Shah asterisk operator Department of Computer Science Nirma University India Department of Computer Science Nirma University India abstract This paper investigates the application of machine learning milliliter Models to predict dementia diagnosis using a Comprehensive health data set The data set includes key health related features such as Diabetic status Heart rate Blood oxygen Levels Body temperature Cognitive test scores And lifestyle Factors We employed milliliter techniques to predict dementia onset Leveraging algorithms such as support vector machines SVM And logistic regression Our findings demonstrate that milliliter Models Particularly SVM and logistic regression Can effectively identify key predictors and achieve Substantial accuracy in Dementia prediction The primary aim of this study is to validate The perfor