# Migrating Videos from StreamingVideoPlayer to Mux

In [None]:
import requests
import xmltodict
import json
import time
import urllib3
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from urllib.request import urlretrieve
from urllib.parse import urlparse, parse_qs, unquote
import csv
from requests.auth import HTTPBasicAuth

Generate Access Keys and Secret Tokens from the Mux Api settings using the following [guide](https://www.mux.com/docs/core/make-api-requests#http-basic-auth). Also, replace the svp usernames and password.

In [None]:
mux_access_keys = <Access Keys>
mux_secret_token = <Secret Token>
svp_username = <SVP LOGIN EMAIL>
svp_password = <SVP LOGIN PASSWORD>

Next, we need to set up web drivers due to the fact that svp doesn't have any publicly available apis that allow us to easily export the videos. We need to simulate human activity of
1. logging into the platform
2. Access the media files
3. Download them

We do not run it in headless mode completely because it's preferable to monitor the website to detect issues with it during crawling. 
One possible issue during crawling is that the algorithm for scrolling down the list fails at some point, so it's recommended to do it in intervals so one can easily detect issues. Experimentally, I found that crawling for 300 items at once is best.

In [None]:
options = Options()
# options.add_argument("--headless")  # Run browser in headless mode
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")

In [None]:
driver = webdriver.Chrome(options=options)  # Adjust for your browser/driver setup|

#### Algorithm for logging into streaming video provider

A valid email and password needs to be used

In [None]:
def login_to_svp(driver, email, password):

    def format_cookies(cookies):
        return ";".join([f"{cookie['name']}={cookie['value']}" for cookie in cookies])
    
    """Logs into StreamingVideoProvider."""
    login_url = "https://member.streamingvideoprovider.com"  # Update if necessary
    driver.get(login_url)
     
    time.sleep(30)

    if "#signIn" in driver.current_url:
        email_element = driver.find_element(By.CSS_SELECTOR, '[name="email"]')
        email_element.send_keys(email);
    
        pwd_element = driver.find_element(By.CSS_SELECTOR, '[name="password"]')
        pwd_element.send_keys(password);
    
        button_element = driver.find_element(By.ID, 'sign_in')
        button_element.click()
        time.sleep(20)

    _cookies = format_cookies(driver.get_cookies())
    return _cookies


#### Algorithm for scrolling down the list

In [None]:
def load_more_items(driver):
    try:
        load_more_button = driver.find_element(By.XPATH, '//svp-button[@label="Load more items"]')            
        load_more_button.click()
        time.sleep(10)  # Allow time for the new items to load
    except Exception as e:
        print("No more items to load or button not found.")

#### Algorithm for fetching the clip id

SVP has 2 different ids for marking the videos, the data_id is what's used in marking the videos for download, while the clip_id is what's used externally to mark the video e.g it's what's returned for storage in your application's db.

In [None]:
def fetch_ids(driver, start_count):
    for index in range(start_count+1, start_count+31):
        row = driver.find_element(By.XPATH, f"//tr[@data-index='{index}']")
        row.click()
        time.sleep(1)
        try:
            data_id = row.get_attribute("data-id")
            clip_id = driver.find_element(By.CSS_SELECTOR, '[name="item_key"]').text
        except Exception as ex:
            print(ex)
            clip_id = "N/A" 
        all_data_ids.append((data_id, clip_id))            

As mentioned earlier, there are usually issues when scrolling, so it makes sense to scroll through the list little by little. I scroll through the list 300 videos at a time, if an error arises, I make sure that the list is scrolled to the last entry, change the start_count of the process list to the next possible one.

In [None]:
all_data_ids = []

In [None]:
def process(start_count, item_count):
    login_to_svp(driver, svp_username, svp_password)
    i = start_count
    while i <= item_count:
        fetch_ids(driver, i)
        load_more_items(driver)
        print("section done")
        i += 30
        

In [None]:
process(0, 270)

Afterwards, we need to persist the data ids and clip ids to a file. This is also useful when we are crawling in intervals.

In [None]:
def save_data_ids_to_csv(data_ids, file_name="data_ids_with_clip_ids.csv"):
    # Write the data IDs and clip IDs to a CSV file with a serial number
    with open(file_name, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Number", "Data ID", "Clip ID"])  # Write header row
        for i, (data_id, clip_id) in enumerate(data_ids, start=1):
            writer.writerow([i, data_id, clip_id])  # Write each row
save_data_ids_to_csv(all_data_ids, file_name="data_ids_with_clip_ids_900-1000.csv")

Next, we define the function for generating the download link when giving the data_id

In [None]:
def downloadFile(data_id, cookies):    
    response = requests.get(
        f"https://member.streamingvideoprovider.com/panel/server/deliveryClip?a=generateDownloadLink&clipId={data_id}&id={data_id}",
                                headers={"Cookie": cookies}, 
                                cookies = {cookie["name"]: cookie["value"] for cookie in driver.get_cookies()}
    )
    downloadLink = response.json()['dlLink']
    return downloadLink

Afterwards, we need to find the file names, this can be generated directly from the download link.

In [None]:
def extract_filename(url):
        # Parse the URL
        parsed_url = urlparse(url)
        # Get query parameters as a dictionary
        query_params = parse_qs(parsed_url.query)
        # Decode and extract the filename from 'response-content-disposition'
        if 'response-content-disposition' in query_params:
            disposition = query_params['response-content-disposition'][0]
            # Look for 'filename=' in the content disposition
            if 'filename=' in disposition:
                filename = disposition.split('filename=')[-1]
                # Decode URL-encoded filename
                return unquote(filename).strip('"')
        return None

Following this, we need to upload the videos to mux using the mux api. For this, we can pass the download link generated by svp directly into it, i.e we do not need to download the videos first before uploading them to mux.

In [None]:
def create_mux_upload(file_name, mux_token_id, mux_token_secret):
    url = "https://api.mux.com/video/v1/assets"
    headers = {
        "Content-Type": "application/json",
    }
    payload = {
        "cors_origin": "*",        
        "playback_policy": ["public"],
        "video_quality": "basic",
        "input": [
            {
              "url": file_name
            }
          ],
        "passthrough": extract_filename(file_name)
        
    }
    
    # Make the POST request
    response = requests.post(
        url,
        json=payload,
        headers=headers,
        auth=HTTPBasicAuth(mux_token_id, mux_token_secret),
    )
    jsonResponse = response.json()
    return jsonResponse["data"]["id"], jsonResponse["data"]["playback_ids"][0]["id"]


Now, we can begin to piece everything together, 
1. We read the rows from the csv file.
2. Generate the download link for the row.
3. Upload the video to mux
4. Get the asset id, playback id fro mux
5. And write everithing to a csv file.

In [None]:
def extract_data_ids(csv_file, upload_csv_file):
    cookies = login_to_svp(driver, svp_username, svp_password)
    rows = []
    with open(csv_file, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            rows.append(row)

    for row in rows:
        if 'Data ID' in row:            
            file_name = downloadFile(row['Data ID'], cookies)                
            assetId, playbackId = create_mux_upload(file_name, mux_access_keys, mux_secret_token)
            row['Mux Asset ID'] = assetId
            row['Mux Playback ID'] = playbackId
            print(f"Data Id: {row['Data ID']} done")
            
    with open(upload_csv_file, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=["Number", "Data ID", "Clip ID",'Mux Asset ID', 'Mux Playback ID'])
        
        # Write the header
        writer.writeheader()
        
        # Write the updated rows
        writer.writerows(rows)

In [None]:
extract_data_ids("data_ids_with_clip_ids_900-1000.csv", "mux_upload.csv")

#### Additional method of getting file names.

Apart from the method earlier stated for extracting the file names, SVP also provides an endpoint for getting the title of the file as part of the video properties. 

We first need to generate the auth token using the svp api key and api code, that can be generated using the following [guide](https://help.streamingvideoprovider.com/en/articles/1356140-core-platform-api).

In [None]:
def get_auth_token(api_key, api_code):
    url = "https://www.streamingvideoprovider.com/"
    params = {
        "l": "api",
        "a": "svp_auth_get_token",
        "api_key": api_key,
        "api_code": api_code,
    }
    
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise an error for HTTP response codes >= 400
        return response.text  # Return the response as JSON
    except requests.exceptions.RequestException as e:
        return {"error": str(e)}

In [None]:
svp_api_key = <API KEY>
svp_secret_code = <API CODE>

In [None]:
token = get_auth_token(svp_api_key,svp_secret_code)

Next, we define the function for extracting the video titles when given a video id.

In [None]:
def get_video_properties(video_ref, token):
    url = "https://www.streamingvideoprovider.com/"
    params = {
        "l": "api",
        "a": "svp_list_videos",
        "token": token,
        "video_ref": video_ref,
    }
    
    try:
        response = requests.get(url, params=params)
        response.raise_for_status() 
        return  xmltodict.parse(response.text)["response"]["video_list"]["video"]['Title']
    except requests.exceptions.RequestException as e:
        return {"error": str(e)}

And finally, we write the titles to a new csv file with other information.

In [None]:
def extract_file_names_from_svp(csv_file, upload_csv_file):
    rows = []
    with open(csv_file, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            rows.append(row)
        for row in rows:
            if 'Clip ID' in row:            
                row['Title'] = get_video_properties(row['Data ID'], token)
                
        with open(upload_csv_file, mode='w', newline='', encoding='utf-8') as file:
            writer = csv.DictWriter(file, fieldnames=["Number", "Data ID", "Clip ID",'Mux Asset ID', 'Mux Playback ID', 'Title'])
            
            # Write the header
            writer.writeheader()
            
            # Write the updated rows
            writer.writerows(rows)

In [None]:
extract_file_names_from_svp("mux_upload.csv", "mux_upload_with_names.csv")

# Extracting subtitles and uploading them to SVP.

Firstly, we define the function for fetching the subtitles given the clip_id. This returns the list of subtitles attached to the video.

In [None]:
def fetch_subtitles(clip_id):
        url = f"https://service.webvideocore.net/?l=info&a=xmlClipPath&page_url=https%3A%2F%2Fplay.webvideocore.net%2Fpopplayer.php%3Fit%3D{clip_id}&clip_id={clip_id}"
        response = requests.get(url)
        response_text = xmltodict.parse(response.text)
        if 'subs' in response_text['links']['info'] and 'sub' in response_text['links']['info']['subs']:            
            return response_text['links']['info']['subs']['sub']

Next, we define our language map to map the language label on SVP to the language code recognized by MUX, the language map can be updated to include any language or labels to be used in both svp or mux.

In [None]:
language_map = {
        "English": "en-US",
        "Español": "es-ES",
        "Suomi": "fi-FI",
        "Afar": "aa-AA",
        "Deitsch": "de-DE"
    }

Afterwards, we need to define the function for updating the video with the appropriate list of subtitle tracks on mux.

In [None]:
def add_track_to_mux_asset(asset_id, url, mux_token_id, mux_token_secret, language):
    language_code = language_map.get(language, "en-US")
    
    api_url = f"https://api.mux.com/video/v1/assets/{asset_id}/tracks"
    headers = {
        "Content-Type": "application/json"
    }
    data = {
        "url": url,
        "type": "text",
        "text_type": "subtitles",
        "closed_captions": True,
        "language_code": language_code,
        "name": language,
        "passthrough": language
    }

    response = requests.post(
        api_url,
        headers=headers,
        json=data,
        auth=HTTPBasicAuth(mux_token_id, mux_token_secret)
    )

    return response.json()

Now, we put everything together by parsing the csv file, check if a particular video has a subtitle and then upload the subtitle track to Mux.

In [None]:
def upload_subtitle(csv_file):
    rows = []
    with open(csv_file, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            if 'Clip ID' in row:  
                subtitles = fetch_subtitles(row['Clip ID'])
                if subtitles is not None:
                    for subtitle in subtitles:
                         if subtitle['label'] != "Off":
                            try:
                                add_track_to_mux_asset(row['Mux Asset ID'], 
                                                       subtitle['link'],
                                                       mux_access_keys, 
                                                       mux_secret_token,
                                                       subtitle['label']
                                                      )
                            except Exception as ex:
                                print(ex)
                
upload_subtitle('mux_upload_with_names.csv')
