In [None]:
!pip install openai==0.28.0





In [None]:
import openai
import json
import re
import os
import time
from tqdm import tqdm

# ------------------------------------------------------------------
# Use your provided API key:
# ------------------------------------------------------------------
openai.api_key = "API KEY"
# ------------------------------------------------------------------

def extract_json(json_string: str) -> str:
    """
    Extract JSON from a string that may contain triple backticks.
    If we find fenced code (```json ... ```), we remove it.
    Otherwise, we return the original string stripped.
    """
    pattern = r"```(?:json)?(.*?)```"
    match = re.search(pattern, json_string, re.DOTALL)
    if match:
        return match.group(1).strip()
    else:
        return json_string.strip()

def augment_queries_for_video(video_id, queries, max_retries=3, backoff=5):
    """
    Sends all queries for one video in a single request.
    Returns a list of augmented queries in the same order.
    Retries up to 'max_retries' times if a RateLimitError or other OpenAIError occurs.
    """
    # Build a single user message that includes all queries:
    user_content = (
        "You will be given a list of sentences. "
        "For each sentence, generate a significantly longer version, "
        "while keeping the original meaning.\n\n"
        "IMPORTANT: Return only valid JSON (no triple backticks) with the augmented sentences in a JSON array.\n\n"
        "Here is the list of sentences:\n"
    )
    for i, q in enumerate(queries, start=1):
        user_content += f"{i}. {q}\n"

    # Attempt up to max_retries
    for attempt in range(max_retries):
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are a helpful assistant that processes multiple queries at once. "
                            "You will receive a list of sentences and must return a JSON array of augmented sentences. "
                            "Do not include triple backticks or any extra text—only return valid JSON."
                        )
                    },
                    {
                        "role": "user",
                        "content": user_content
                    }
                ],
                temperature=0.7
            )

            completion_text = response["choices"][0]["message"]["content"]
            cleaned_text = extract_json(completion_text)

            # Attempt to parse as JSON
            augmented_list = json.loads(cleaned_text)
            if not isinstance(augmented_list, list):
                raise ValueError("Response was not a JSON list.")

            return augmented_list

        except openai.error.RateLimitError as e:
            print(f"[RateLimitError] {e}. Retrying in {backoff} seconds...")
            time.sleep(backoff)
        except openai.error.OpenAIError as e:
            print(f"[OpenAIError] {e}. Retrying in {backoff} seconds...")
            time.sleep(backoff)
        except (ValueError, json.JSONDecodeError) as e:
            # If the JSON is invalid or we can't parse it at all, we won't retry infinitely
            # We'll just raise the error so we can stop for this video.
            raise ValueError(
                f"[JSONDecodeError] Could not parse model output for video '{video_id}'. Output was:\n{completion_text}"
            ) from e
        except Exception as e:
            print(f"[Unexpected Error] {e}")
            return None

    # If all retries fail (e.g., repeated RateLimitError), return None
    return None

def augment_json_file(input_json_path, output_json_path):
    """
    Reads an input JSON with format:
        {
            "video_file_1": ["query1", "query2", ...],
            "video_file_2": ["query1", "query2", ...],
            ...
        }
    For each video, it sends all queries at once to the API, expecting a JSON list in return.

    Partial results are saved to output_json_path, so if an error occurs,
    you won't lose progress on previously augmented videos.

    Additionally, uses a tqdm progress bar to show how many videos remain.
    """
    # Load the original data
    with open(input_json_path, "r", encoding="utf-8") as f:
        original_data = json.load(f)

    # If an output file already exists, load partial data
    if os.path.exists(output_json_path):
        with open(output_json_path, "r", encoding="utf-8") as f:
            augmented_data = json.load(f)
    else:
        augmented_data = {}

    all_videos = list(original_data.keys())
    total_videos = len(all_videos)

    print(f"Total number of videos to process: {total_videos}")

    # Use tqdm progress bar
    with tqdm(total=total_videos, desc="Processing videos") as pbar:
        for video_id in all_videos:
            # Check if this video is already done
            if video_id in augmented_data:
                pbar.update(1)
                continue  # Skip to the next

            print(f"\nNow augmenting queries for '{video_id}'")
            queries = original_data[video_id]

            # Call the augmentation function
            try:
                augmented_queries = augment_queries_for_video(video_id, queries)
            except ValueError as e:
                # If JSON parsing error or invalid response, we stop here
                # so the script eventually re-runs from the beginning.
                print(e)
                print(f"Failed to augment queries for '{video_id}'. Saving partial data and stopping.")
                break

            if augmented_queries is None:
                # None means we tried max times and still got an error
                print(f"Failed to augment queries for '{video_id}' after multiple retries. Saving partial data and stopping.")
                break

            # If we reach here, we have valid augmented queries
            augmented_data[video_id] = augmented_queries

            # Save partial results after each video
            with open(output_json_path, "w", encoding="utf-8") as f:
                json.dump(augmented_data, f, ensure_ascii=False, indent=4)

            # Update the progress bar
            pbar.update(1)

    print("\nAugmentation process complete.")
    print(f"Final augmented data saved to: {output_json_path}")


# ----------------------------------------------------------------------------
# Example usage: the script re-runs itself if any error occurs
# ----------------------------------------------------------------------------
if __name__ == "__main__":
    input_file_path = "/content/MSRVTT_captions_test.json"
    output_file_path = "/content/output_MSRVTT.json"

    while True:
        try:
            augment_json_file(input_file_path, output_file_path)
            # If augment_json_file completes with no fatal error, break the loop
            break
        except Exception as e:
            # If there's ANY error not caught inside augment_json_file,
            # we simply print and re-run
            print(f"\n[RE-RUN TRIGGER] An error occurred: {e}")
            print("Re-running the entire script...\n")
            time.sleep(2)
            continue


Total number of videos to process: 1000


Processing videos:   0%|          | 0/1000 [00:00<?, ?it/s]


Now augmenting queries for 'video9032'


Processing videos:  79%|███████▉  | 788/1000 [00:12<00:03, 65.48it/s]


Now augmenting queries for 'video7169'

Now augmenting queries for 'video9818'

Now augmenting queries for 'video8089'


Processing videos:  79%|███████▉  | 791/1000 [00:29<00:09, 21.31it/s]


Now augmenting queries for 'video7501'


Processing videos:  79%|███████▉  | 792/1000 [00:32<00:11, 18.14it/s]


Now augmenting queries for 'video8828'


Processing videos:  79%|███████▉  | 793/1000 [00:38<00:15, 12.99it/s]


Now augmenting queries for 'video9871'


Processing videos:  79%|███████▉  | 794/1000 [00:43<00:20,  9.82it/s]


Now augmenting queries for 'video8811'


Processing videos:  80%|███████▉  | 795/1000 [00:48<00:28,  7.23it/s]


Now augmenting queries for 'video8311'


Processing videos:  80%|███████▉  | 796/1000 [00:53<00:37,  5.51it/s]


Now augmenting queries for 'video9574'


Processing videos:  80%|███████▉  | 797/1000 [00:58<00:53,  3.81it/s]


Now augmenting queries for 'video9512'


Processing videos:  80%|███████▉  | 798/1000 [01:05<01:21,  2.48it/s]


Now augmenting queries for 'video7613'


Processing videos:  80%|███████▉  | 799/1000 [01:10<01:44,  1.93it/s]


Now augmenting queries for 'video7822'


Processing videos:  80%|████████  | 800/1000 [01:14<02:09,  1.54it/s]


Now augmenting queries for 'video7219'


Processing videos:  80%|████████  | 801/1000 [01:20<03:11,  1.04it/s]


Now augmenting queries for 'video7967'


Processing videos:  80%|████████  | 802/1000 [01:25<04:06,  1.25s/it]


Now augmenting queries for 'video7064'


Processing videos:  80%|████████  | 803/1000 [01:33<06:04,  1.85s/it]


Now augmenting queries for 'video7699'


Processing videos:  80%|████████  | 804/1000 [01:41<08:13,  2.52s/it]


Now augmenting queries for 'video7649'


Processing videos:  80%|████████  | 805/1000 [01:46<09:12,  2.83s/it]


Now augmenting queries for 'video7614'


Processing videos:  81%|████████  | 806/1000 [01:49<09:27,  2.92s/it]


Now augmenting queries for 'video8690'


Processing videos:  81%|████████  | 807/1000 [01:54<10:59,  3.42s/it]


Now augmenting queries for 'video9821'


Processing videos:  81%|████████  | 808/1000 [01:59<11:35,  3.62s/it]


Now augmenting queries for 'video7233'


Processing videos:  81%|████████  | 809/1000 [02:03<11:45,  3.70s/it]


Now augmenting queries for 'video7590'


Processing videos:  81%|████████  | 810/1000 [02:07<11:54,  3.76s/it]


Now augmenting queries for 'video8721'


Processing videos:  81%|████████  | 811/1000 [02:10<11:37,  3.69s/it]


Now augmenting queries for 'video8804'


Processing videos:  81%|████████  | 812/1000 [02:15<12:27,  3.98s/it]


Now augmenting queries for 'video9022'


Processing videos:  81%|████████▏ | 813/1000 [02:20<13:25,  4.31s/it]


Now augmenting queries for 'video8423'


Processing videos:  81%|████████▏ | 814/1000 [02:27<15:50,  5.11s/it]


Now augmenting queries for 'video8835'


Processing videos:  82%|████████▏ | 815/1000 [02:31<14:18,  4.64s/it]


Now augmenting queries for 'video7418'


Processing videos:  82%|████████▏ | 816/1000 [02:34<13:20,  4.35s/it]


Now augmenting queries for 'video8327'


Processing videos:  82%|████████▏ | 817/1000 [02:39<13:24,  4.39s/it]


Now augmenting queries for 'video9731'


Processing videos:  82%|████████▏ | 818/1000 [02:43<13:06,  4.32s/it]


Now augmenting queries for 'video7573'


Processing videos:  82%|████████▏ | 819/1000 [02:48<13:34,  4.50s/it]


Now augmenting queries for 'video9253'


Processing videos:  82%|████████▏ | 820/1000 [02:53<14:18,  4.77s/it]


Now augmenting queries for 'video9693'


Processing videos:  82%|████████▏ | 821/1000 [02:57<13:06,  4.39s/it]


Now augmenting queries for 'video8837'


Processing videos:  82%|████████▏ | 822/1000 [03:03<14:47,  4.99s/it]


Now augmenting queries for 'video9489'


Processing videos:  82%|████████▏ | 823/1000 [03:07<13:36,  4.61s/it]


Now augmenting queries for 'video8444'


Processing videos:  82%|████████▏ | 824/1000 [03:13<14:30,  4.94s/it]


Now augmenting queries for 'video9677'


Processing videos:  82%|████████▎ | 825/1000 [03:17<14:18,  4.90s/it]


Now augmenting queries for 'video8863'


Processing videos:  83%|████████▎ | 826/1000 [03:22<14:14,  4.91s/it]


Now augmenting queries for 'video9347'


Processing videos:  83%|████████▎ | 827/1000 [03:27<14:02,  4.87s/it]


Now augmenting queries for 'video7549'


Processing videos:  83%|████████▎ | 828/1000 [03:32<14:07,  4.93s/it]


Now augmenting queries for 'video9353'


Processing videos:  83%|████████▎ | 829/1000 [03:36<12:41,  4.45s/it]


Now augmenting queries for 'video7849'


Processing videos:  83%|████████▎ | 830/1000 [03:40<12:56,  4.57s/it]


Now augmenting queries for 'video9368'


Processing videos:  83%|████████▎ | 831/1000 [03:45<13:04,  4.64s/it]


Now augmenting queries for 'video7828'


Processing videos:  83%|████████▎ | 832/1000 [03:51<13:38,  4.87s/it]


Now augmenting queries for 'video8450'


Processing videos:  83%|████████▎ | 833/1000 [03:56<14:08,  5.08s/it]


Now augmenting queries for 'video7342'


Processing videos:  83%|████████▎ | 834/1000 [04:03<15:17,  5.53s/it]


Now augmenting queries for 'video7135'


Processing videos:  84%|████████▎ | 835/1000 [04:07<14:18,  5.20s/it]


Now augmenting queries for 'video9806'


Processing videos:  84%|████████▎ | 836/1000 [04:14<15:10,  5.55s/it]


Now augmenting queries for 'video8261'


Processing videos:  84%|████████▎ | 837/1000 [04:18<14:25,  5.31s/it]


Now augmenting queries for 'video7844'


Processing videos:  84%|████████▍ | 838/1000 [04:26<16:18,  6.04s/it]


Now augmenting queries for 'video8022'


Processing videos:  84%|████████▍ | 839/1000 [04:29<14:05,  5.25s/it]


Now augmenting queries for 'video8664'


Processing videos:  84%|████████▍ | 840/1000 [04:34<13:28,  5.05s/it]


Now augmenting queries for 'video7174'


Processing videos:  84%|████████▍ | 841/1000 [04:40<13:51,  5.23s/it]


Now augmenting queries for 'video9689'


Processing videos:  84%|████████▍ | 842/1000 [04:43<12:17,  4.67s/it]


Now augmenting queries for 'video7413'


Processing videos:  84%|████████▍ | 843/1000 [04:46<11:05,  4.24s/it]


Now augmenting queries for 'video7412'


Processing videos:  84%|████████▍ | 844/1000 [04:50<10:59,  4.23s/it]


Now augmenting queries for 'video7035'


Processing videos:  84%|████████▍ | 845/1000 [04:56<12:10,  4.71s/it]


Now augmenting queries for 'video7939'


Processing videos:  85%|████████▍ | 846/1000 [05:00<11:26,  4.46s/it]


Now augmenting queries for 'video8783'


Processing videos:  85%|████████▍ | 847/1000 [05:09<14:51,  5.83s/it]


Now augmenting queries for 'video9403'


Processing videos:  85%|████████▍ | 848/1000 [05:13<13:19,  5.26s/it]


Now augmenting queries for 'video9839'


Processing videos:  85%|████████▍ | 849/1000 [05:19<13:44,  5.46s/it]


Now augmenting queries for 'video9131'


Processing videos:  85%|████████▌ | 850/1000 [05:23<12:20,  4.94s/it]


Now augmenting queries for 'video9207'


Processing videos:  85%|████████▌ | 851/1000 [05:27<11:59,  4.83s/it]


Now augmenting queries for 'video8240'


Processing videos:  85%|████████▌ | 852/1000 [05:33<12:24,  5.03s/it]


Now augmenting queries for 'video8175'


Processing videos:  85%|████████▌ | 853/1000 [05:38<12:36,  5.15s/it]


Now augmenting queries for 'video7360'


Processing videos:  85%|████████▌ | 854/1000 [05:42<11:37,  4.78s/it]


Now augmenting queries for 'video8322'


Processing videos:  86%|████████▌ | 855/1000 [05:47<11:26,  4.73s/it]


Now augmenting queries for 'video9015'


Processing videos:  86%|████████▌ | 856/1000 [05:51<10:39,  4.44s/it]


Now augmenting queries for 'video8948'


Processing videos:  86%|████████▌ | 857/1000 [05:54<09:54,  4.16s/it]


Now augmenting queries for 'video8073'


Processing videos:  86%|████████▌ | 858/1000 [05:59<10:25,  4.40s/it]


Now augmenting queries for 'video7214'


Processing videos:  86%|████████▌ | 859/1000 [06:05<11:18,  4.81s/it]


Now augmenting queries for 'video8498'


Processing videos:  86%|████████▌ | 860/1000 [06:11<12:19,  5.28s/it]


Now augmenting queries for 'video7793'


Processing videos:  86%|████████▌ | 861/1000 [06:16<11:34,  5.00s/it]


Now augmenting queries for 'video9882'


Processing videos:  86%|████████▌ | 862/1000 [06:19<10:26,  4.54s/it]


Now augmenting queries for 'video9830'


Processing videos:  86%|████████▋ | 863/1000 [06:25<11:04,  4.85s/it]


Now augmenting queries for 'video8851'


Processing videos:  86%|████████▋ | 864/1000 [06:31<11:48,  5.21s/it]


Now augmenting queries for 'video8348'


Processing videos:  86%|████████▋ | 865/1000 [06:35<11:07,  4.94s/it]


Now augmenting queries for 'video8489'


Processing videos:  87%|████████▋ | 866/1000 [06:39<10:07,  4.54s/it]


Now augmenting queries for 'video9834'


Processing videos:  87%|████████▋ | 867/1000 [06:43<10:06,  4.56s/it]


Now augmenting queries for 'video8320'


Processing videos:  87%|████████▋ | 868/1000 [06:48<10:16,  4.67s/it]


Now augmenting queries for 'video8670'


Processing videos:  87%|████████▋ | 869/1000 [06:52<09:47,  4.48s/it]


Now augmenting queries for 'video9010'


Processing videos:  87%|████████▋ | 870/1000 [06:58<10:54,  5.03s/it]


Now augmenting queries for 'video7898'


Processing videos:  87%|████████▋ | 871/1000 [07:03<10:34,  4.91s/it]


Now augmenting queries for 'video7027'


Processing videos:  87%|████████▋ | 872/1000 [07:06<09:30,  4.46s/it]


Now augmenting queries for 'video7237'


Processing videos:  87%|████████▋ | 873/1000 [07:10<09:04,  4.29s/it]


Now augmenting queries for 'video9509'


Processing videos:  87%|████████▋ | 874/1000 [07:14<08:25,  4.01s/it]


Now augmenting queries for 'video8931'


Processing videos:  88%|████████▊ | 875/1000 [07:19<08:54,  4.27s/it]


Now augmenting queries for 'video7568'


Processing videos:  88%|████████▊ | 876/1000 [07:24<09:13,  4.46s/it]


Now augmenting queries for 'video8831'


Processing videos:  88%|████████▊ | 877/1000 [07:27<08:38,  4.21s/it]


Now augmenting queries for 'video8254'


Processing videos:  88%|████████▊ | 878/1000 [07:31<08:06,  3.99s/it]


Now augmenting queries for 'video8789'


Processing videos:  88%|████████▊ | 879/1000 [07:35<08:04,  4.00s/it]


Now augmenting queries for 'video9223'


Processing videos:  88%|████████▊ | 880/1000 [07:43<10:54,  5.45s/it]


Now augmenting queries for 'video9520'


Processing videos:  88%|████████▊ | 881/1000 [07:49<11:03,  5.57s/it]


Now augmenting queries for 'video9205'


Processing videos:  88%|████████▊ | 882/1000 [07:55<11:00,  5.60s/it]


Now augmenting queries for 'video8335'


Processing videos:  88%|████████▊ | 883/1000 [08:00<10:28,  5.37s/it]


Now augmenting queries for 'video7577'


Processing videos:  88%|████████▊ | 884/1000 [08:06<10:33,  5.46s/it]


Now augmenting queries for 'video7142'


Processing videos:  88%|████████▊ | 885/1000 [08:10<10:00,  5.22s/it]


Now augmenting queries for 'video9337'


Processing videos:  89%|████████▊ | 886/1000 [08:15<09:57,  5.24s/it]


Now augmenting queries for 'video7061'


Processing videos:  89%|████████▊ | 887/1000 [08:25<12:27,  6.62s/it]


Now augmenting queries for 'video7152'


Processing videos:  89%|████████▉ | 888/1000 [08:32<12:36,  6.76s/it]


Now augmenting queries for 'video8801'


Processing videos:  89%|████████▉ | 889/1000 [08:38<11:38,  6.29s/it]


Now augmenting queries for 'video9735'


Processing videos:  89%|████████▉ | 890/1000 [08:41<10:02,  5.48s/it]


Now augmenting queries for 'video8572'


Processing videos:  89%|████████▉ | 891/1000 [08:45<09:02,  4.98s/it]


Now augmenting queries for 'video7034'


Processing videos:  89%|████████▉ | 892/1000 [08:48<08:04,  4.48s/it]


Now augmenting queries for 'video7579'


Processing videos:  89%|████████▉ | 893/1000 [08:53<07:54,  4.43s/it]


Now augmenting queries for 'video7562'


Processing videos:  89%|████████▉ | 894/1000 [08:57<07:42,  4.36s/it]


Now augmenting queries for 'video9825'


Processing videos:  90%|████████▉ | 895/1000 [09:03<08:21,  4.78s/it]


Now augmenting queries for 'video9835'


Processing videos:  90%|████████▉ | 896/1000 [09:05<07:18,  4.22s/it]


Now augmenting queries for 'video8342'


Processing videos:  90%|████████▉ | 897/1000 [09:10<07:27,  4.34s/it]


Now augmenting queries for 'video8618'


Processing videos:  90%|████████▉ | 898/1000 [09:20<10:03,  5.91s/it]


Now augmenting queries for 'video7131'


Processing videos:  90%|████████▉ | 899/1000 [09:24<09:13,  5.48s/it]


Now augmenting queries for 'video7024'


Processing videos:  90%|█████████ | 900/1000 [09:28<08:28,  5.08s/it]


Now augmenting queries for 'video8677'


Processing videos:  90%|█████████ | 901/1000 [09:33<08:00,  4.85s/it]


Now augmenting queries for 'video7542'


Processing videos:  90%|█████████ | 902/1000 [09:38<08:18,  5.08s/it]


Now augmenting queries for 'video7206'


Processing videos:  90%|█████████ | 903/1000 [09:45<08:52,  5.49s/it]


Now augmenting queries for 'video7154'


Processing videos:  90%|█████████ | 904/1000 [09:50<08:38,  5.40s/it]


Now augmenting queries for 'video7119'


Processing videos:  90%|█████████ | 905/1000 [09:55<08:16,  5.23s/it]


Now augmenting queries for 'video8935'


Processing videos:  91%|█████████ | 906/1000 [09:59<07:49,  4.99s/it]


Now augmenting queries for 'video9576'


Processing videos:  91%|█████████ | 907/1000 [10:07<09:15,  5.97s/it]


Now augmenting queries for 'video9240'


Processing videos:  91%|█████████ | 908/1000 [10:12<08:31,  5.56s/it]


Now augmenting queries for 'video8906'


Processing videos:  91%|█████████ | 909/1000 [10:15<07:27,  4.92s/it]


Now augmenting queries for 'video8477'


Processing videos:  91%|█████████ | 910/1000 [10:19<06:48,  4.54s/it]


Now augmenting queries for 'video7150'


Processing videos:  91%|█████████ | 911/1000 [10:24<07:04,  4.76s/it]


Now augmenting queries for 'video8257'


Processing videos:  91%|█████████ | 912/1000 [10:30<07:28,  5.10s/it]


Now augmenting queries for 'video8751'


Processing videos:  91%|█████████▏| 913/1000 [10:34<06:56,  4.79s/it]


Now augmenting queries for 'video9307'


Processing videos:  91%|█████████▏| 914/1000 [10:39<06:49,  4.76s/it]


Now augmenting queries for 'video9230'


Processing videos:  92%|█████████▏| 915/1000 [10:42<06:10,  4.36s/it]


Now augmenting queries for 'video7724'


Processing videos:  92%|█████████▏| 916/1000 [10:48<06:43,  4.81s/it]


Now augmenting queries for 'video7723'


Processing videos:  92%|█████████▏| 917/1000 [10:52<06:22,  4.61s/it]


Now augmenting queries for 'video9017'


Processing videos:  92%|█████████▏| 918/1000 [10:56<05:55,  4.34s/it]


Now augmenting queries for 'video9348'


Processing videos:  92%|█████████▏| 919/1000 [11:05<07:52,  5.84s/it]


Now augmenting queries for 'video7978'


Processing videos:  92%|█████████▏| 920/1000 [11:11<07:35,  5.69s/it]


Now augmenting queries for 'video8665'


Processing videos:  92%|█████████▏| 921/1000 [11:19<08:25,  6.40s/it]


Now augmenting queries for 'video9203'


Processing videos:  92%|█████████▏| 922/1000 [11:25<08:01,  6.18s/it]


Now augmenting queries for 'video9024'


Processing videos:  92%|█████████▏| 923/1000 [11:31<08:10,  6.37s/it]


Now augmenting queries for 'video7234'


Processing videos:  92%|█████████▏| 924/1000 [11:36<07:34,  5.98s/it]


Now augmenting queries for 'video8025'


Processing videos:  92%|█████████▎| 925/1000 [11:43<07:38,  6.11s/it]


Now augmenting queries for 'video8118'


Processing videos:  93%|█████████▎| 926/1000 [11:48<07:14,  5.87s/it]


Now augmenting queries for 'video7491'


Processing videos:  93%|█████████▎| 927/1000 [11:56<07:48,  6.42s/it]


Now augmenting queries for 'video7352'


Processing videos:  93%|█████████▎| 928/1000 [12:01<07:06,  5.92s/it]


Now augmenting queries for 'video9639'


Processing videos:  93%|█████████▎| 929/1000 [12:06<06:48,  5.75s/it]


Now augmenting queries for 'video8129'


Processing videos:  93%|█████████▎| 930/1000 [12:10<06:09,  5.28s/it]


Now augmenting queries for 'video7415'


Processing videos:  93%|█████████▎| 931/1000 [12:15<05:46,  5.02s/it]


Now augmenting queries for 'video9503'


Processing videos:  93%|█████████▎| 932/1000 [12:19<05:34,  4.91s/it]


Now augmenting queries for 'video7669'


Processing videos:  93%|█████████▎| 933/1000 [12:24<05:29,  4.91s/it]


Now augmenting queries for 'video9026'


Processing videos:  93%|█████████▎| 934/1000 [12:29<05:26,  4.94s/it]


Now augmenting queries for 'video9752'


Processing videos:  94%|█████████▎| 935/1000 [12:34<05:22,  4.97s/it]


Now augmenting queries for 'video8514'


Processing videos:  94%|█████████▎| 936/1000 [12:39<05:21,  5.03s/it]


Now augmenting queries for 'video9975'


Processing videos:  94%|█████████▎| 937/1000 [12:44<05:03,  4.81s/it]


Now augmenting queries for 'video7790'


Processing videos:  94%|█████████▍| 938/1000 [12:49<05:14,  5.08s/it]


Now augmenting queries for 'video8868'


Processing videos:  94%|█████████▍| 939/1000 [12:54<05:09,  5.07s/it]


Now augmenting queries for 'video8241'


Processing videos:  94%|█████████▍| 940/1000 [13:00<05:09,  5.17s/it]


Now augmenting queries for 'video8248'


Processing videos:  94%|█████████▍| 941/1000 [13:03<04:34,  4.64s/it]


Now augmenting queries for 'video8908'


Processing videos:  94%|█████████▍| 942/1000 [13:08<04:28,  4.63s/it]


Now augmenting queries for 'video8426'


Processing videos:  94%|█████████▍| 943/1000 [13:12<04:17,  4.52s/it]


Now augmenting queries for 'video8300'


Processing videos:  94%|█████████▍| 944/1000 [13:16<03:59,  4.27s/it]


Now augmenting queries for 'video9810'


Processing videos:  94%|█████████▍| 945/1000 [13:21<04:03,  4.43s/it]


Now augmenting queries for 'video7725'


Processing videos:  95%|█████████▍| 946/1000 [13:24<03:45,  4.18s/it]


Now augmenting queries for 'video7028'


Processing videos:  95%|█████████▍| 947/1000 [13:30<04:03,  4.59s/it]


Now augmenting queries for 'video8312'


Processing videos:  95%|█████████▍| 948/1000 [13:35<04:06,  4.74s/it]


Now augmenting queries for 'video9117'


Processing videos:  95%|█████████▍| 949/1000 [13:38<03:38,  4.27s/it]


Now augmenting queries for 'video7854'


Processing videos:  95%|█████████▌| 950/1000 [13:45<04:19,  5.19s/it]


Now augmenting queries for 'video7411'


Processing videos:  95%|█████████▌| 951/1000 [13:50<04:11,  5.13s/it]


Now augmenting queries for 'video9774'


Processing videos:  95%|█████████▌| 952/1000 [13:54<03:47,  4.73s/it]


Now augmenting queries for 'video7834'


Processing videos:  95%|█████████▌| 953/1000 [13:58<03:24,  4.36s/it]


Now augmenting queries for 'video7890'


Processing videos:  95%|█████████▌| 954/1000 [14:01<03:06,  4.05s/it]


Now augmenting queries for 'video9037'


Processing videos:  96%|█████████▌| 955/1000 [14:05<02:56,  3.92s/it]


Now augmenting queries for 'video9309'


Processing videos:  96%|█████████▌| 956/1000 [14:12<03:44,  5.11s/it]


Now augmenting queries for 'video9508'


Processing videos:  96%|█████████▌| 957/1000 [14:17<03:33,  4.95s/it]


Now augmenting queries for 'video9232'


Processing videos:  96%|█████████▌| 958/1000 [14:22<03:23,  4.84s/it]


Now augmenting queries for 'video7371'


Processing videos:  96%|█████████▌| 959/1000 [14:32<04:26,  6.50s/it]


Now augmenting queries for 'video7686'


Processing videos:  96%|█████████▌| 960/1000 [14:37<03:57,  5.94s/it]


Now augmenting queries for 'video9339'


Processing videos:  96%|█████████▌| 961/1000 [14:41<03:31,  5.43s/it]


Now augmenting queries for 'video8682'


Processing videos:  96%|█████████▌| 962/1000 [14:45<03:08,  4.97s/it]


Now augmenting queries for 'video9698'


Processing videos:  96%|█████████▋| 963/1000 [14:48<02:46,  4.49s/it]


Now augmenting queries for 'video9409'


Processing videos:  96%|█████████▋| 964/1000 [14:51<02:27,  4.11s/it]


Now augmenting queries for 'video9335'


Processing videos:  96%|█████████▋| 965/1000 [14:55<02:21,  4.04s/it]


Now augmenting queries for 'video7155'


Processing videos:  97%|█████████▋| 966/1000 [14:58<02:06,  3.72s/it]


Now augmenting queries for 'video9620'


Processing videos:  97%|█████████▋| 967/1000 [15:01<01:55,  3.51s/it]


Now augmenting queries for 'video9228'


Processing videos:  97%|█████████▋| 968/1000 [15:05<01:51,  3.49s/it]


Now augmenting queries for 'video9625'


Processing videos:  97%|█████████▋| 969/1000 [15:09<01:57,  3.79s/it]


Now augmenting queries for 'video9824'


Processing videos:  97%|█████████▋| 970/1000 [15:13<01:54,  3.80s/it]


Now augmenting queries for 'video7632'


Processing videos:  97%|█████████▋| 971/1000 [15:17<01:52,  3.87s/it]


Now augmenting queries for 'video7565'


Processing videos:  97%|█████████▋| 972/1000 [15:21<01:50,  3.95s/it]


Now augmenting queries for 'video7744'


Processing videos:  97%|█████████▋| 973/1000 [15:25<01:50,  4.09s/it]


Now augmenting queries for 'video9885'


Processing videos:  97%|█████████▋| 974/1000 [15:28<01:37,  3.76s/it]


Now augmenting queries for 'video9405'


Processing videos:  98%|█████████▊| 975/1000 [15:32<01:28,  3.55s/it]


Now augmenting queries for 'video9016'


Processing videos:  98%|█████████▊| 976/1000 [15:36<01:28,  3.68s/it]


Now augmenting queries for 'video9320'


Processing videos:  98%|█████████▊| 977/1000 [15:40<01:31,  3.98s/it]


Now augmenting queries for 'video9119'


Processing videos:  98%|█████████▊| 978/1000 [15:44<01:28,  4.04s/it]


Now augmenting queries for 'video7799'


Processing videos:  98%|█████████▊| 979/1000 [15:54<01:59,  5.68s/it]


Now augmenting queries for 'video7205'


Processing videos:  98%|█████████▊| 980/1000 [15:58<01:42,  5.10s/it]


Now augmenting queries for 'video8260'


Processing videos:  98%|█████████▊| 981/1000 [16:02<01:31,  4.82s/it]


Now augmenting queries for 'video9452'


Processing videos:  98%|█████████▊| 982/1000 [16:07<01:27,  4.88s/it]


Now augmenting queries for 'video9609'


Processing videos:  98%|█████████▊| 983/1000 [16:13<01:28,  5.23s/it]


Now augmenting queries for 'video8865'


Processing videos:  98%|█████████▊| 984/1000 [16:17<01:20,  5.02s/it]


Now augmenting queries for 'video9428'


Processing videos:  98%|█████████▊| 985/1000 [16:23<01:15,  5.04s/it]


Now augmenting queries for 'video7431'


Processing videos:  99%|█████████▊| 986/1000 [16:28<01:10,  5.06s/it]


Now augmenting queries for 'video7963'


Processing videos:  99%|█████████▊| 987/1000 [16:31<01:00,  4.66s/it]


Now augmenting queries for 'video8243'


Processing videos:  99%|█████████▉| 988/1000 [16:38<01:03,  5.32s/it]


Now augmenting queries for 'video7571'


Processing videos:  99%|█████████▉| 989/1000 [16:43<00:55,  5.09s/it]


Now augmenting queries for 'video7333'


Processing videos:  99%|█████████▉| 990/1000 [16:46<00:45,  4.52s/it]


Now augmenting queries for 'video7592'


Processing videos:  99%|█████████▉| 991/1000 [16:50<00:40,  4.47s/it]


Now augmenting queries for 'video7204'


Processing videos:  99%|█████████▉| 992/1000 [16:55<00:35,  4.42s/it]


Now augmenting queries for 'video7171'


Processing videos:  99%|█████████▉| 993/1000 [17:00<00:32,  4.69s/it]


Now augmenting queries for 'video7528'


Processing videos:  99%|█████████▉| 994/1000 [17:06<00:29,  4.98s/it]


Now augmenting queries for 'video9246'


Processing videos: 100%|█████████▉| 995/1000 [17:11<00:25,  5.07s/it]


Now augmenting queries for 'video9581'


Processing videos: 100%|█████████▉| 996/1000 [17:16<00:20,  5.21s/it]


Now augmenting queries for 'video7946'


Processing videos: 100%|█████████▉| 997/1000 [17:21<00:14,  4.96s/it]


Now augmenting queries for 'video7795'


Processing videos: 100%|█████████▉| 998/1000 [17:25<00:09,  4.60s/it]


Now augmenting queries for 'video7112'


Processing videos: 100%|█████████▉| 999/1000 [17:28<00:04,  4.32s/it]


Now augmenting queries for 'video8978'


Processing videos: 100%|██████████| 1000/1000 [17:33<00:00,  1.05s/it]


Augmentation process complete.
Final augmented data saved to: /content/output_MSRVTT.json





In [None]:
# Step 1: Mount Google Drive
from google.colab import drive
import shutil
import os

# Mount Google Drive
drive.mount('/content/drive')

# Step 2: Define file paths
colab_json_path = "/content/output_MSRVTT.json"  # Existing file in Colab
drive_json_path = "/content/drive/My Drive/output_MSRVTT.json"  # Destination in Drive

# Step 3: Check if the file exists in Colab
if os.path.exists(colab_json_path):
    # Copy the file from Colab to Google Drive
    shutil.copy(colab_json_path, drive_json_path)
    print(f"File successfully saved to Google Drive at: {drive_json_path}")
else:
    print(f"Error: File not found at {colab_json_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
File successfully saved to Google Drive at: /content/drive/My Drive/output_MSRVTT.json
