## Fetching YouTube Video IDs for Movies using the YouTube Data API

### Step 1: Importing Libraries

In [1]:
import pandas as pd
import configparser
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError


### Step 2: Files Loading

In [2]:
# Load u.data file
ratings_data = pd.read_csv('files/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])

# Load u.item file
columns = ['movie_id', 'movie_title', 'release_date', 'video_release_date', 'IMDb_URL', 'unknown', 'Action',
           'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
           'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
movies_data = pd.read_csv('files/u.item', sep='|', names=columns, encoding='latin-1')

# Extract relevant columns from movies_data
movies_data = movies_data[['movie_id', 'movie_title', 'unknown', 'Action', 'Adventure', 'Animation', 'Children',
                           'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical',
                           'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']]


## Youtube ID's Fetching

In [3]:
# Function to read the YouTube API keys from the config.ini file
def get_youtube_api_keys():
    config = configparser.ConfigParser()
    config.read('files/config.ini')
    api_keys = []
    for key in config['YouTubeAPI']:
        api_keys.append(config['YouTubeAPI'][key])
    return api_keys

In [4]:
# Function to fetch YouTube video IDs for movies using relevant search queries with multiple API keys
def fetch_youtube_video_ids(api_keys, movie_titles, starting_index):
    """
    Fetch YouTube video IDs for movies using relevant search queries with multiple API keys.
    
    Parameters:
    - api_keys: List of API keys for the YouTube Data API.
    - movie_titles: List of movie titles to fetch video IDs for.
    - starting_index: Index to start processing API keys.
    
    Returns:
    - video_id_mapping: Dictionary containing movie titles as keys and their respective YouTube video IDs as values.
    - data_fetched: Total number of data entries fetched.
    """
    video_id_mapping = {}
    data_fetched = 0
    current_key_index = starting_index

    for index, movie_title in enumerate(movie_titles, start=starting_index):
        # Skip the movie if the YouTube ID is already present in the DataFrame
        if pd.notnull(movies_with_youtube_ids[movies_with_youtube_ids['movie_title'] == movie_title]['youtube_video_id'].iloc[0]):
            continue

        search_query = f'{movie_title} movie trailer'
        api_key = api_keys[current_key_index]
        youtube = build('youtube', 'v3', developerKey=api_key)
        try:
            response = youtube.search().list(
                part='id',
                q=search_query,
                type='video',
                maxResults=1
            ).execute()

            if 'items' in response and response['items']:
                video_id = response['items'][0]['id']['videoId']
                video_id_mapping[movie_title] = video_id
                data_fetched += 1
                print(f"Fetched YouTube ID for \"{movie_title}\" (Total fetched: {data_fetched})")
            else:
                print(f"No videos found for \"{movie_title}\"")

        except HttpError as error:
            if error.resp.status == 403 and 'quotaExceeded' in str(error):
                print(f"Quota exceeded for API Key {current_key_index + 1}. Trying next key.")
                current_key_index = (current_key_index + 1) % len(api_keys)
                if current_key_index == starting_index:
                    print('\n\n-----------------------------------------------------------------')
                    print("All API keys' quotas have been exceeded. Stopping the program.")
                    print('-----------------------------------------------------------------\n\n')
                    break
            else:
                print(f"Error fetching video ID for \"{movie_title}\": {error}")
                break  # Stop the program if any other error occurs

    return video_id_mapping, data_fetched

# Fetch YouTube API keys from the config.ini file
api_keys = get_youtube_api_keys()

# Read the movies_with_youtube_ids DataFrame from the CSV file
movies_with_youtube_ids = pd.read_csv('files/movies_with_youtube_ids.csv')

# Extract movie_titles from the movies_data DataFrame
movie_titles = movies_data['movie_title'].tolist()

# Filter out movies for which YouTube IDs are already fetched
movie_titles_to_fetch = [
    title for title in movie_titles if pd.isnull(
        movies_with_youtube_ids[movies_with_youtube_ids['movie_title'] == title]['youtube_video_id'].iloc[0]
    )
]

# Start fetching YouTube video IDs from the last processed index
starting_index = 0
video_id_mapping, data_fetched = fetch_youtube_video_ids(api_keys, movie_titles_to_fetch, starting_index)

# Print the total data fetched
print('================================================')
print(f"Data fetched until quota limit was reached: {data_fetched}")
print('================================================\n\n')

# Add the fetched video_id_mapping to the movies_with_youtube_ids DataFrame
for title, video_id in video_id_mapping.items():
    movies_with_youtube_ids.loc[movies_with_youtube_ids['movie_title'] == title, 'youtube_video_id'] = video_id

# Save the updated movies_with_youtube_ids DataFrame to a CSV file
movies_with_youtube_ids.to_csv('files/movies_with_youtube_ids.csv', index=False)

# Print the updated movies_with_youtube_ids DataFrame
print("Updated movies_with_youtube_ids DataFrame:")
print(movies_with_youtube_ids)


Quota exceeded for API Key 1. Trying next key.
Quota exceeded for API Key 2. Trying next key.
Quota exceeded for API Key 3. Trying next key.
Quota exceeded for API Key 4. Trying next key.
Quota exceeded for API Key 5. Trying next key.
Quota exceeded for API Key 6. Trying next key.
Quota exceeded for API Key 7. Trying next key.
Quota exceeded for API Key 8. Trying next key.
Quota exceeded for API Key 9. Trying next key.
Quota exceeded for API Key 10. Trying next key.
Quota exceeded for API Key 11. Trying next key.
Data fetched until quota limit was reached: 0


Updated movies_with_youtube_ids DataFrame:
      movie_id                                movie_title  unknown  Action  \
0            1                           Toy Story (1995)        0       0   
1            2                           GoldenEye (1995)        0       1   
2            3                          Four Rooms (1995)        0       0   
3            4                          Get Shorty (1995)        0       1   


In [5]:
# Read the movies_with_youtube_ids DataFrame from the CSV file
movies_with_youtube_ids = pd.read_csv('files/movies_with_youtube_ids.csv')

# Count the non-null values in the 'youtube_video_id' column
total_youtube_ids = movies_with_youtube_ids['youtube_video_id'].count()

# Print the total number of YouTube IDs
print(f"Total YouTube IDs fetched: {total_youtube_ids}")


Total YouTube IDs fetched: 1671


In [7]:
# Function to fetch YouTube comments for a video using multiple API keys
def fetch_youtube_comments(api_keys, video_ids, starting_index):
    """
    Fetch YouTube comments for videos using multiple API keys.
    
    Parameters:
    - api_keys: List of API keys for the YouTube Data API.
    - video_ids: List of YouTube video IDs to fetch comments for.
    - starting_index: Index to start processing API keys.
    
    Returns:
    - comments: List of fetched comments.
    - current_key_index: Index of the API key used.
    """
    comments = []
    current_key_index = starting_index

    for video_id in video_ids:
        api_key = api_keys[current_key_index]
        youtube = build('youtube', 'v3', developerKey=api_key)
        try:
            response = youtube.commentThreads().list(
                part='snippet',
                videoId=video_id,
                textFormat='plainText',
                maxResults=100
            ).execute()

            if 'items' in response and response['items']:
                for item in response['items']:
                    comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
                    comments.append(comment)

                print(f"Fetched {len(response['items'])} comments for video ID: {video_id}")
            else:
                print(f"No comments found for video ID: {video_id}")

        except HttpError as error:
            if error.resp.status == 403 and 'quotaExceeded' in str(error):
                print('\n-----------------------------------------------------------------')
                print(f"Quota exceeded for API Key {current_key_index + 1}. Trying next key.")
                print('-----------------------------------------------------------------\n')

                current_key_index = (current_key_index + 1) % len(api_keys)
                if current_key_index == starting_index:
                    print('\n\n-----------------------------------------------------------------')
                    print("All API keys' quotas have been exceeded. Stopping the program.")
                    print('-----------------------------------------------------------------\n\n')
                    break
            else:
                print(f"Error fetching comments for video ID {video_id}")
                continue

    return comments, current_key_index

# Read the fetched YouTube video IDs from the CSV file
movies_with_youtube_ids = pd.read_csv('files/movies_with_youtube_ids.csv')

# Extract video IDs from the DataFrame
video_ids_to_fetch = movies_with_youtube_ids['youtube_video_id'].tolist()

# Fetch YouTube API keys from the config.ini file
api_keys = get_youtube_api_keys()

# Read the comments_data DataFrame from the CSV file
try:
    comments_data = pd.read_csv('files/youtube_comments.csv')
except FileNotFoundError:
    comments_data = pd.DataFrame(columns=['youtube_video_id', 'comment_text'])

# Filter out video IDs for which comments are already fetched
video_ids_to_fetch = [
    video_id for video_id in video_ids_to_fetch if video_id not in comments_data['youtube_video_id'].tolist()
]

# Start fetching YouTube comments from the last processed index
starting_index = 0
comments, current_key_index = fetch_youtube_comments(api_keys, video_ids_to_fetch, starting_index)

print(f"Number of video IDs to fetch comments for: {len(video_ids_to_fetch)}")
print(f"Number of fetched comments: {len(comments)}")

# Loop through comments and video IDs to save every 100 comments
comments_to_save = []
for i, (video_id, comment) in enumerate(zip(video_ids_to_fetch, comments)):
    comments_to_save.append({'youtube_video_id': video_id, 'comment_text': comment})
    if (i + 1) % 100 == 0 or i == len(video_ids_to_fetch) - 1:
        new_comments_data = pd.DataFrame(comments_to_save)
        comments_data = comments_data.append(new_comments_data, ignore_index=True)
        comments_to_save = []
        print(f"Saved comments for video IDs: {i - 100 + 1} to {i + 1}")

# Save the comments_data DataFrame to a CSV file
comments_data.to_csv('files/youtube_comments.csv', index=False)

# Print the updated comments_data DataFrame
print("Updated comments_data DataFrame:")
print(comments_data)



-----------------------------------------------------------------
Quota exceeded for API Key 1. Trying next key.
-----------------------------------------------------------------


-----------------------------------------------------------------
Quota exceeded for API Key 2. Trying next key.
-----------------------------------------------------------------


-----------------------------------------------------------------
Quota exceeded for API Key 3. Trying next key.
-----------------------------------------------------------------


-----------------------------------------------------------------
Quota exceeded for API Key 4. Trying next key.
-----------------------------------------------------------------


-----------------------------------------------------------------
Quota exceeded for API Key 5. Trying next key.
-----------------------------------------------------------------


-----------------------------------------------------------------
Quota exceeded for API Key 6

Fetched 100 comments for video ID: oDuma1M09B0
Fetched 100 comments for video ID: jGsiY-2CkPU
Fetched 6 comments for video ID: tVpgRZURgsA
Fetched 45 comments for video ID: nWCct8XbQD0
Error fetching comments for video ID 2cBja3AbahY
Error fetching comments for video ID VDC1SUlXuGI
Fetched 34 comments for video ID: 0WTwVpEEMps
Fetched 72 comments for video ID: TKPmGjVFbrY
Fetched 100 comments for video ID: eIcmQNy9FsM
Fetched 100 comments for video ID: vayksn4Y93A
Fetched 68 comments for video ID: R8weLPF4qBQ
Fetched 100 comments for video ID: YCTgcZ6ImsQ
Fetched 55 comments for video ID: EhRhuJOJkGg
Fetched 12 comments for video ID: g2ofJqqsL5g
Fetched 100 comments for video ID: xa_z57UatDY
Fetched 41 comments for video ID: Y85sfYVOS84
Fetched 15 comments for video ID: a1AMga5GaEQ
Fetched 100 comments for video ID: 4zbpL3LeW7k
Fetched 23 comments for video ID: rDJXPiyvQfg
Fetched 13 comments for video ID: EfKFPktklmA
Fetched 42 comments for video ID: KTuip4gVJWg
Fetched 100 comments f

Fetched 100 comments for video ID: 1nKjO9QCSic
Fetched 52 comments for video ID: UN9lpOEqg04
Fetched 100 comments for video ID: 5nSlJSfnkWc
Fetched 73 comments for video ID: DWLFW9iAr-E
Fetched 100 comments for video ID: adYGUai7PNc
Fetched 35 comments for video ID: i63VGK1ElN0
Fetched 27 comments for video ID: x3W4bBk63MU
Fetched 3 comments for video ID: hkSJGkgql3M
Error fetching comments for video ID duTNQE0NnlI
Fetched 100 comments for video ID: nsKdR05ZsGE
Fetched 100 comments for video ID: uUK79LRP-Nc
Fetched 12 comments for video ID: V4nDMM5Jjws
Fetched 8 comments for video ID: -WaJA7mVk28
Fetched 10 comments for video ID: 48-M7MfrYMc
Fetched 47 comments for video ID: OxLQZVmKWEo
Fetched 33 comments for video ID: r4fIOWINnHk
Fetched 20 comments for video ID: -7-JVbUtyoI
Fetched 9 comments for video ID: NnqNzGt9BiQ
Fetched 20 comments for video ID: -YAhpfcdx0U
Fetched 82 comments for video ID: 15g12dGxWX4
Fetched 62 comments for video ID: fQe1Kehx110
Fetched 15 comments for video

Fetched 14 comments for video ID: 8tO1xhgYvUM
Fetched 16 comments for video ID: 9RTIQWXzexU
Fetched 96 comments for video ID: hZpm1zj9510
Fetched 83 comments for video ID: JWP_hrNHSN4
Fetched 20 comments for video ID: HWHH5TwEwtI
Fetched 53 comments for video ID: u9YgJjSCT08
Fetched 64 comments for video ID: fR6l4D47fW8
Fetched 10 comments for video ID: xWPS3-mjkZc
Fetched 30 comments for video ID: nlXBaP7l5n4
Fetched 54 comments for video ID: i0IzV1q9g9Q
Fetched 8 comments for video ID: qRCtqfd9IWQ
Fetched 100 comments for video ID: r9yyDEDGlr0
Fetched 99 comments for video ID: OqVgCfZX-yE
Fetched 11 comments for video ID: KhNnU4h1Q50
Fetched 44 comments for video ID: VCFhg6Q6gXU
Fetched 100 comments for video ID: JEoEGW4Hb9w
Fetched 100 comments for video ID: hkJIcFMN_pc
Fetched 53 comments for video ID: XZ8Q9mOYJgE
Fetched 67 comments for video ID: r9Q_WESQUVw
Fetched 33 comments for video ID: g7q1SjVdsNk
Fetched 11 comments for video ID: 7OHg4Ayewao
Fetched 58 comments for video ID

Fetched 13 comments for video ID: QDfs0SIvXqo
Fetched 6 comments for video ID: Fr2WXx129PY
Fetched 80 comments for video ID: rEnCEM48QaY
Fetched 72 comments for video ID: CJWolzZRQx4
Fetched 38 comments for video ID: FuZdS9da4S4
Fetched 65 comments for video ID: hY4EfSHyIyA
Fetched 77 comments for video ID: dGDIOhZMmzo
Fetched 21 comments for video ID: K_MLWXe3PCc
Fetched 40 comments for video ID: mZXVBHwFJy8
Fetched 100 comments for video ID: nszMBbCZKo4
Fetched 100 comments for video ID: m9a_7WFcPlI
Fetched 100 comments for video ID: gt9HkO-cGGo
Fetched 73 comments for video ID: dyGibtCfMLM
Fetched 100 comments for video ID: EJXDMwGWhoA
Fetched 34 comments for video ID: lYSHAyODiGs
Fetched 29 comments for video ID: hMJuES9UYGY
Fetched 3 comments for video ID: Vf1cyURBziw
Fetched 80 comments for video ID: -Zc7T0vUpj0
No comments found for video ID: yhKQH-ZLtrM
Fetched 27 comments for video ID: sET83nlk1NU
No comments found for video ID: H-FofBXlWKA
Fetched 9 comments for video ID: 916

Fetched 61 comments for video ID: EgeBYZSSIIc
Fetched 60 comments for video ID: 6-NmMuukdYE
Fetched 8 comments for video ID: 2hZKHf4OvrU
Fetched 100 comments for video ID: jhJYMEzQA-Q
Fetched 6 comments for video ID: eQ4Zg9tv0NI
Fetched 38 comments for video ID: _57GKDXmXKk
Fetched 53 comments for video ID: bNRJsanqOCE
Fetched 77 comments for video ID: Tw1jwFTDrSU
Fetched 16 comments for video ID: bHvOeMRgPuw
Fetched 22 comments for video ID: QcE6CdR60NY
Error fetching comments for video ID hLKa24D1KUk
Fetched 10 comments for video ID: JMHkz97bddc
Fetched 46 comments for video ID: U5jQU0gsuaE
Fetched 71 comments for video ID: xNMoQ_Cqt4E
Fetched 23 comments for video ID: A2YjI0Fre2o
Fetched 1 comments for video ID: H3ZEYe-qYrI
Fetched 3 comments for video ID: fDRO_SlFnZ4
Fetched 42 comments for video ID: juTBjT-hzlc
Error fetching comments for video ID GhX7-V-ShiQ
Error fetching comments for video ID KcoXN28yL9Q
Fetched 27 comments for video ID: UURCLhzk5P8
Fetched 40 comments for vide

Fetched 15 comments for video ID: wwzxZvnx9Aw
Fetched 17 comments for video ID: TUN3_yAOUNo
Fetched 9 comments for video ID: cN2omT-8oRs
Fetched 26 comments for video ID: G8ZHyJoKYnM
Fetched 54 comments for video ID: oiFqWwJsbAk
Fetched 10 comments for video ID: 00-WPchnLTE
Fetched 20 comments for video ID: 4NeHWLZbs0g
Fetched 100 comments for video ID: Tm5jBa4LzxQ
Fetched 58 comments for video ID: OzpqLRb2lfs
Fetched 5 comments for video ID: I19IuJYbuyg
Fetched 22 comments for video ID: _3_TVzGHxnk
Error fetching comments for video ID SlZX2o4n3Pk
Fetched 53 comments for video ID: n-X00-BoG9o
Fetched 3 comments for video ID: BBAkNCO7R78
Fetched 28 comments for video ID: ctsxTvTQUZI
Fetched 100 comments for video ID: RQLVzTtt2Ws
Error fetching comments for video ID nan
Fetched 13 comments for video ID: Mt2__27L358
Fetched 14 comments for video ID: 22UrI0YdzEQ
Error fetching comments for video ID 8e9fp9TAJCY
Fetched 81 comments for video ID: w7aE0a8keiA
Fetched 78 comments for video ID: 

Fetched 25 comments for video ID: XCuD8Q_Y10Q
Fetched 100 comments for video ID: umvFBoLOOgo
Error fetching comments for video ID aFYCQoIpGuE
Fetched 62 comments for video ID: R9CZErJ9idU
Fetched 9 comments for video ID: fk-qz0zdYwU
Fetched 100 comments for video ID: G5p7wq2PcJM
Error fetching comments for video ID fHamua2ORRY
Fetched 6 comments for video ID: bcwqBH0QqVY
Fetched 21 comments for video ID: x1pyVPFQ7lw
Error fetching comments for video ID IV4ILkVxycA
Fetched 47 comments for video ID: HPBkA3GuvcY
Fetched 18 comments for video ID: LorQZ4TfbTQ
Fetched 41 comments for video ID: LBSR7PAHow0
Fetched 21 comments for video ID: ENrpGFroUbw
Fetched 92 comments for video ID: 9KvW9Q9875Q
Fetched 35 comments for video ID: CtYp9YKhUZ8
No comments found for video ID: tQK6omIiKTg
Fetched 100 comments for video ID: c4dcjfaO0qk
Fetched 98 comments for video ID: pyRUijyrXr4
Fetched 2 comments for video ID: 0bBhEciISaY
Fetched 100 comments for video ID: OgG2jfBfLzI
Fetched 1 comments for vid

Fetched 100 comments for video ID: 8I3UfGMYr6I
Fetched 1 comments for video ID: j2VTZmegLnI
Fetched 2 comments for video ID: mIYgklAHuzI
Fetched 17 comments for video ID: YRGocXhqGPc
Fetched 59 comments for video ID: fgANS15AN4I
Error fetching comments for video ID uWYPozEgFmw
Fetched 6 comments for video ID: C_l75RaVadc
Fetched 94 comments for video ID: d9Z0DV33gAY
Fetched 100 comments for video ID: P28qmkCrNSM
Fetched 51 comments for video ID: J5mszOh_Qis
Fetched 93 comments for video ID: mqTrjDsEg4g
Error fetching comments for video ID RSTZl1bENrc
Fetched 30 comments for video ID: RgUCPG2G50s
Fetched 100 comments for video ID: M6WwZLvxV9Q
Fetched 33 comments for video ID: Ks5eE-lTFDw
Fetched 9 comments for video ID: QJhtT0KGvdg
Fetched 56 comments for video ID: HsTKoGcFS78
Fetched 2 comments for video ID: noSrT4BgwB0
Fetched 32 comments for video ID: 5LNE7jk2QWY
Fetched 11 comments for video ID: wHSwdSTETXA
Fetched 3 comments for video ID: Vxn8LscM2aE
Fetched 1 comments for video ID

Fetched 1 comments for video ID: 1HkkMSTEKms
Fetched 9 comments for video ID: HMcPo-S1v-k
Fetched 1 comments for video ID: Lj2odhzbISw
Fetched 17 comments for video ID: mgNUNTVmZwo
No comments found for video ID: YATfZRHl6-k
Fetched 3 comments for video ID: RVcgvVYI6W0
Fetched 13 comments for video ID: _nRtxtvqZu8
Fetched 6 comments for video ID: wPkVD4Ga35s
Fetched 4 comments for video ID: 7eZMfQqcjmM
Fetched 32 comments for video ID: SqMPzcGQJzA
Fetched 2 comments for video ID: 4dh5-rTIVzE
No comments found for video ID: YnVzRiZ2EeU
Fetched 41 comments for video ID: f2E7ArARdaE
Fetched 50 comments for video ID: gSz13CIyX7M
No comments found for video ID: 4iulZqioWZM
Fetched 42 comments for video ID: czuP9LDime8
Fetched 6 comments for video ID: E9JXrJ-y5tA
Error fetching comments for video ID 1khEBbx1xbU
Fetched 19 comments for video ID: hjYtwGdK_48
No comments found for video ID: L-DNN2CaRO8
Fetched 33 comments for video ID: kizEd4wD3Rw
Fetched 22 comments for video ID: 1MKr_bETSWA
F

  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)
  comments_data = comments_data.append(new_comments_data, ignore_index=True)