In [4]:
from typing import List, Optional
from google.colab import files
import io
import pandas as pd
import re

def uploadFile() -> dict:
    """
    Uploads a file and returns a dictionary containing the uploaded files

    Returns:
        dict: A dictionary containing the uploaded files.
    """
    uploaded = files.upload()
    return uploaded

def extractVideoIDs(data: pd.DataFrame, column: str) -> List[Optional[str]]:
    """
    Extracts video IDs from a DataFrame column.

    Args:
        data (pd.DataFrame): The DataFrame containing the data.
        column (str): The name of the column containing video URLs.

    Returns:
        List[Optional[str]]: A list of video IDs extracted from the URLs.
    """
    pattern = r'(?<=watch\?v=)[^&]+'
    videoIDs = [re.search(pattern, url).group(0) if re.search(pattern, url) else None for url in data[column]]
    return videoIDs

def saveVideoIds(videoIDs: List[Optional[str]], file_name: str) -> None:
    """
    Saves video IDs to a text file.

    Args:
        videoIDs (List[Optional[str]]): A list of video IDs to be saved.
        file_name (str): The name of the text file to save the video IDs.
    """
    with open(file_name, 'w') as file:
        for videoID in videoIDs:
            if videoID:
                file.write(videoID + '\n')

# Upload AirPollutionVideos.csv
searchQuery1 = uploadFile()
dfSQ1 = pd.read_csv(io.BytesIO(searchQuery1['AirPollutionVideos.csv'])) # saving file to dataframe

# Extract video IDs from AirPollutionVideos.csv and save them to VideoIDs.txt
videoIds = extractVideoIDs(dfSQ1, 'Video')
saveVideoIds(videoIds, 'VideoIDs.txt')

# Upload AirPollutionNews.csv
searchQuery2 = uploadFile()
dfSQ2 = pd.read_csv(io.BytesIO(searchQuery2['AirPollutionNEWS.CSV'])) # saving file to dataframe

# Extract video IDs from AirPollutionNews.csv and save them to VideoIDs1.txt
videoIds1 = extractVideoIDs(dfSQ2, 'VideoURL')
saveVideoIds(videoIds1, 'VideoIDs1.txt')


Saving AirPollutionVideos.csv to AirPollutionVideos.csv


Saving AirPollutionNEWS.CSV to AirPollutionNEWS.CSV
