# **Video Ripper**

In [10]:
import requests
from bs4 import BeautifulSoup
import re
import m3u8
import ffmpeg
from tqdm import tqdm
import os
import json

In [11]:
# Function to fetch the page for a given movie ID
def get_movie_page(movie_id):
    url = f'https://vidsrc.su/embed/movie/{movie_id}'  # Construct URL using the movie ID (find the IDs in https://pstream.org/)
    
    try:
        # Send GET request to the movie embed URL
        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        
        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            return response.text  # Return HTML content
        else:
            return None
    except Exception as e:
        return None

# Example: Let's try with a movie ID
movie_id = "99861"  # Replace with any movie ID you want to test
page_html = get_movie_page(movie_id)

if page_html:
    print(page_html)
else:
    print("Failed to fetch the page")

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Avengers: Age of Ultron</title>
    <link rel="stylesheet" href="/css/netflix.css">
    <script src="https://ssl.p.jwpcdn.com/player/v/8.33.2/jwplayer.js"></script>
    <script>jwplayer.key = "cLGMn8T20tGvW+0eXPhq4NNmLB57TrscPjd1IyJF84o="</script>
    <style>
      body, html {
          margin: 0;
          padding: 0;
          height: 100%;
          overflow: hidden;
      }
      #player {
          width: 100%;
          height: 100vh;
      }
      #server-select {
          position: absolute;
          top: 10px;
          right: 10px;
          z-index: 10;
          padding: 10px;
          border: none;
          background-color: #333;
          color: #fff;
          border-radius: 5px;
          font-size: 16px;
          cursor: pointer;
      }
      #server-select option {
          background-color: #444;
          c

In [12]:
# Function to extract fixed server URLs
def extract_fixed_servers(page_html):
    fixed_servers_urls = []
    soup = BeautifulSoup(page_html, 'html.parser')
    
    # Step 1: Extract all the JavaScript within <script> tags
    script_tags = soup.find_all('script')
    
    # Step 2: Look through each <script> tag content
    for script in script_tags:
        # Only look for the fixedServers data, if script.string is not None
        if script.string and "fixedServers" in script.string:
            # Find all lines that have 'label: 'Server' and possibly a URL
            lines = script.string.splitlines()
            for line in lines:
                # Look for lines that contain 'label: 'Server' and check for URL
                if "label: 'Server" in line:
                    # Search for http(s) URLs
                    match = re.search(r"(https?://[^\s]+\.m3u8)", line)
                    if match:
                        # If a URL is found, add it to the list
                        fixed_servers_urls.append(match.group(1))
    
    return fixed_servers_urls

# Function to extract MultiLang URLs
def extract_multilang(page_html):
    multi_lang_urls = []
    soup = BeautifulSoup(page_html, 'html.parser')
    
    # Step 2: Extract MultiLang data
    multi_lang_script = soup.find('script', string=re.compile('const MultiLang ='))
    if multi_lang_script:
        # Extract the JSON part using regex
        match = re.search(r"const MultiLang = (\[.*?\]);", multi_lang_script.string, re.DOTALL)
        if match:
            # Get the raw JSON string
            raw_json = match.group(1)
            # Replace single quotes with double quotes to fix the JSON (JavaScript to JSON)
            multi_lang_json = raw_json.replace("'", '"')
            try:
                multi_lang_data = json.loads(multi_lang_json)  # Parse the JSON
                # Extract URLs and their language
                for item in multi_lang_data:
                    if 'm3u8_url' in item and 'language' in item:
                        multi_lang_urls.append({
                            'm3u8_url': item['m3u8_url'],
                            'language': item['language']
                        })
            except json.JSONDecodeError as e:
                print(f"Error parsing MultiLang JSON: {e}")
    return multi_lang_urls

# Main function to display the results
def scrape_video_links(page_html):
    fixed_servers_urls = extract_fixed_servers(page_html)
    multi_lang_urls = extract_multilang(page_html)

    print("Video URLs (from fixedServers):")
    for url in fixed_servers_urls:
        print(url)

    print("\nVideo URLs with Languages (from MultiLang):")
    for entry in multi_lang_urls:
        print(f"Language: {entry['language']}, URL: {entry['m3u8_url']}")
    
    return fixed_servers_urls, multi_lang_urls

fixed_servers_urls, multi_lang_urls = scrape_video_links(page_html)

Video URLs (from fixedServers):
https://tralvoxmoon.xyz/file1/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/cGxheWxpc3QubTN1OA==.m3u8
https://tralvoxmoon.xyz/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/cGxheWxpc3QubTN1OA==.m3u8
https://tralvoxmoon.xyz/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/cGxheWxpc3QubTN1OA==.m3u8
https://tralvoxmoon.xyz/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/MTA4MA==/aW5kZXgubTN1OA==.m3u8

Video URLs with Languages (from MultiLang):
La

In [13]:
# Function to fetch and parse the m3u8 file for available qualities
def get_best_quality_from_m3u8(url):
    try:
        # Fetch the m3u8 file from the URL
        response = requests.get(url)
        response.raise_for_status()
        
        # Parse the m3u8 content
        playlist = m3u8.loads(response.text)
        
        # Extract the qualities (resolutions and bandwidth)
        qualities = []
        for stream in playlist.playlists:
            resolution = stream.stream_info.resolution
            bandwidth = stream.stream_info.bandwidth
            qualities.append({
                'resolution': resolution,
                'bandwidth': bandwidth,
                'url': stream.uri
            })
        
        # Return the best quality (highest resolution or bandwidth)
        best_quality = max(qualities, key=lambda x: (x['resolution'][1], x['bandwidth']) if x['resolution'] else (0, x['bandwidth']))
        
        return best_quality
    
    except Exception as e:
        print(f"Error fetching or parsing m3u8 from {url}: {e}")
        return None

# Function to extract the best qualities available from fixed server URLs
def extract_best_qualities_from_fixed_servers(fixed_servers_urls):
    best_qualities = []
    
    for server_url in fixed_servers_urls:
        print(f"Processing server: {server_url}")
        best_quality = get_best_quality_from_m3u8(server_url)  # Get the best quality for this server
        
        if best_quality:
            best_qualities.append(best_quality)
    
    return best_qualities

best_qualities = extract_best_qualities_from_fixed_servers(fixed_servers_urls)

# Output the best qualities
if best_qualities:
    print("\nBest Qualities from Fixed Servers:")
    for quality in best_qualities:
        print(f"Resolution: {quality['resolution']}, Bandwidth: {quality['bandwidth']}, URL: {quality['url']}")
else:
    print("No valid qualities found for fixed servers.")

# Handle multi lang URLs (if needed, e.g., select the English one)
target_language = 'english'
target_url = None
for entry in multi_lang_urls:
    if entry['language'].lower() == target_language:
        target_url = entry['m3u8_url']
        break

if target_url:
    print(f"Selected English URL: {target_url}")
else:
    print("English URL not found")

Processing server: https://tralvoxmoon.xyz/file1/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/cGxheWxpc3QubTN1OA==.m3u8
Processing server: https://tralvoxmoon.xyz/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/cGxheWxpc3QubTN1OA==.m3u8
Processing server: https://tralvoxmoon.xyz/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/cGxheWxpc3QubTN1OA==.m3u8
Processing server: https://tralvoxmoon.xyz/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/MTA4MA==/aW5kZXgubTN1OA==.m3u8
Err

In [14]:
# Simply pick the first available quality link from the list (or any specific logic)
def select_one_best_quality(best_qualities):
    if best_qualities:
        selected_quality = best_qualities[0]
        print(f"Selected URL for best quality: Resolution: {selected_quality['resolution']}, Bandwidth: {selected_quality['bandwidth']}, URL: {selected_quality['url']}")
        return selected_quality['url']
    else:
        print("No valid best qualities found.")
        return None

# Select the best quality URL
best_quality_url = select_one_best_quality(best_qualities)

Selected URL for best quality: Resolution: (1920, 1080), Bandwidth: 4500000, URL: https://tralvoxmoon.xyz/file1/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/MTA4MA==/aW5kZXgubTN1OA==.m3u8


In [15]:
def fetch_playlist(m3u8_url):
    try:
        # Fetch the playlist content
        response = requests.get(m3u8_url)
        
        if response.status_code == 200:
            # The playlist is a plain text file, so we treat it as text
            playlist_content = response.text
            print("Fetched Playlist Content:")
            print(playlist_content[:300])  # Print the first 300 characters as a preview

            # You can now parse the playlist content (e.g., extract segment URLs, qualities, etc.)
            return playlist_content
        else:
            print(f"Failed to fetch the playlist. Status Code: {response.status_code}")
            return None

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the playlist: {e}")
        return None

# Fetch the playlist content
playlist_content = fetch_playlist(best_quality_url)

Fetched Playlist Content:
#EXTM3U
#EXT-X-TARGETDURATION:13
#EXT-X-ALLOW-CACHE:YES
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-VERSION:3
#EXT-X-MEDIA-SEQUENCE:1
#EXTINF:10.385,
https://velloxfire.pro/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq


In [16]:
def extract_segments_from_playlist(playlist_content):
    segments = []
    
    # Use regular expressions to extract segment duration and URL
    pattern = re.compile(r"#EXTINF:(\d+\.\d+),\s*(https?://[^\s]+)")
    matches = pattern.findall(playlist_content)
    
    for match in matches:
        duration = float(match[0])  # Segment duration
        url = match[1]  # Segment URL
        segments.append({
            'duration': duration,
            'url': url
        })
    
    return segments

# Extract segments from the playlist content
segments = extract_segments_from_playlist(playlist_content)
# Print extracted segments and their durations
for segment in segments:
    print(f"Duration: {segment['duration']} seconds, URL: {segment['url']}")

Duration: 10.385 seconds, URL: https://velloxfire.pro/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/MTA4MA==/c2VnLTEtdjEtYTEuanBn
Duration: 2.044 seconds, URL: https://velloxfire.pro/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/MTA4MA==/c2VnLTQtdjEtYTEuaHRtbA==
Duration: 4.505 seconds, URL: https://velloxfire.pro/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe00x+lqe0RDCnCof7~JJl4E=/MTA4MA==/c2VnLTUtdjEtYTEuanM=
Duration: 2.002 seconds, URL: https://velloxfire.pro/file2/LXXf1bCCk6uega6YkWzMFdGE4VxJmWmKCRXiHl+pTEjjgmf8fsO~jmToyS+x+QoFDK1HGS2ILUpPMgIAmEHYT8dlYY6VopcPv7sV4TS~pjmJxvMQGMqfgGXApLZuJpGFrOfzq8eiPxWGmKr8mBQIe0

In [17]:
# Function to create a folder if it doesn't exist
def create_folder(folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

def download_segment(segment, index, folder_name):
    url = segment['url']
    duration = segment['duration']
    filename = f"segment_{index}_{duration}s.ts"  # Index added to filename for order tracking
    
    # Full path to save the segment in the folder
    file_path = os.path.join(folder_name, filename)

    # Check if the segment already exists
    if os.path.exists(file_path):
        # print(f"Skipping download of {filename}, already exists.")
        return

    # Send a GET request to download the segment
    response = requests.get(url, stream=True)
    
    # If the request was successful, save the segment
    if response.status_code == 200:
        with open(file_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
        # print(f"Downloaded {filename}")
    else:
        print(f"Failed to download {filename}. Status Code: {response.status_code}")

# Folder where the segments will be downloaded
folder_name = "downloaded_segments"
create_folder(folder_name)  # Create folder if it doesn't exist

# Resume the download starting from the 760th segment
start_index = 1

# Create a single progress bar for all segments (based on number of segments)
with tqdm(total=len(segments), desc="Downloading segments") as bar:
    for idx, segment in enumerate(segments[start_index - 1:], start=start_index):
        download_segment(segment, idx, folder_name)
        bar.update(1)  # Update progress bar for each segment


Downloading segments: 100%|██████████| 1850/1850 [00:00<00:00, 265271.70it/s]


In [24]:
# Function to create a list of .ts files in the folder
def create_file_list(folder_name):
    segment_files = []
    # Get all .ts files in the folder and sort them
    for filename in sorted(os.listdir(folder_name), key=lambda x: int(x.split('_')[1])):
        if filename.endswith('.ts'):
            segment_files.append(os.path.join(folder_name, filename))
    
    if not segment_files:
        print("No .ts files found in the folder.")
    return segment_files

# Function to merge the .ts segments using ffmpeg
def merge_segments(folder_name, output_filename="output_video.mp4"):
    # Create the list of segment files
    segment_files = create_file_list(folder_name)
    
    # If no files were found, skip the merge
    if not segment_files:
        print("No segments to merge.")
        return

    # Create a temporary text file with the list of .ts files
    with open('file_list.txt', 'w') as f:
        for file in segment_files:
            f.write(f"file '{file}'\n")

    try:
        # Run ffmpeg to concatenate the segments using the text file
        ffmpeg.input('file_list.txt', format='concat', safe=0).output(output_filename, c='copy').run()
        print(f"Video successfully merged into {output_filename}")
    except ffmpeg.Error as e:
        print(f"Error occurred during merging: {e}")

folder_name = "downloaded_segments"
merge_segments(folder_name, output_filename="output_video.mp4")


ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

Video successfully merged into output_video.mp4


[mpegts @ 0x6403193729c0] start time for stream 2 is not set in estimate_timings_from_pts
[mp4 @ 0x640318ea7500] pts has no value
[NULL @ 0x640319a83780] missing picture in access unit with size 10
[mpegts @ 0x6403193729c0] start time for stream 2 is not set in estimate_timings_from_pts
[mp4 @ 0x640318ea7500] pts has no value
[NULL @ 0x6403193969c0] missing picture in access unit with size 10
[mpegts @ 0x6403193729c0] start time for stream 2 is not set in estimate_timings_from_pts
[mp4 @ 0x640318ea7500] pts has no value
[NULL @ 0x640319a83780] missing picture in access unit with size 10
[mpegts @ 0x6403193729c0] start time for stream 2 is not set in estimate_timings_from_pts
[mp4 @ 0x640318ea7500] pts has no value
[NULL @ 0x6403193969c0] missing picture in access unit with size 10
[mpegts @ 0x6403193729c0] start time for stream 2 is not set in estimate_timings_from_pts
[mp4 @ 0x640318ea7500] pts has no value
[NULL @ 0x640319a83780] missing picture in access unit with size 10
[mpegts @ 