In [12]:
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
import concurrent.futures
from google.colab import drive  # Import for Google Colab

# Step 1: Mount Google Drive (for Google Colab)
drive.mount('/content/drive', force_remount=True)

# Step 2: Define a function to get the frame count for each GIF with error handling
def get_frame_count(gif_url):
    try:
        # Send a request to get the GIF
        response = requests.get(gif_url, timeout=10)  # Set timeout to avoid long waits
        response.raise_for_status()  # Raise an error if the request fails (e.g., 404 or 500)

        gif = Image.open(BytesIO(response.content))  # Open the GIF from the response
        frames = 0

        # Count frames in the GIF
        while True:
            try:
                gif.seek(frames)
                frames += 1
            except EOFError:  # Reached the end of the GIF
                break

        return frames

    except requests.exceptions.RequestException as e:  # Network-related exceptions
        print(f"Network error for {gif_url}: {e}")
        return 'Unreachable'  # Return custom value for unreachable links

    except Exception as e:  # Handle other exceptions, e.g., malformed GIF
        print(f"Error processing {gif_url}: {e}")
        return 'Error'  # Return custom value for processing errors


# Step 3: Define the main function to process GIFs from a given starting ID
def process_gifs_from_id(starting_index):
    # Step 4: Set the range to process 10,000 GIFs in total (from starting_index to starting_index + 9999)
    last_id = starting_index + 9999

    # Step 5: Load the main TSV file containing the GIF URLs into a DataFrame
    file_path = '/content/drive/My Drive/Colab_Notebooks/tgif-v2.0.tsv'  # Correct file path
    df = pd.read_csv(file_path, sep='\t')  # Load the TSV

    # Step 6: Filter the DataFrame for the range [starting_index, starting_index + 9999]
    df_filtered = df[(df['ID'] >= starting_index) & (df['ID'] <= last_id)]

    # Step 7: Extract URLs corresponding to the filtered DataFrame
    urls = df_filtered['links'].tolist()

    # Step 8: Use parallel processing to speed up retrieval of frame counts for the selected URLs
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        frame_counts = list(executor.map(get_frame_count, urls))

    # Step 9: Create a new DataFrame for the results with IDs, links, descriptions, and Frame Counts
    results_df = pd.DataFrame({
        'ID': df_filtered['ID'],
        'links': df_filtered['links'],
        'description': df_filtered['description'],
        'Frame Count': frame_counts
    })

    # Step 10: Create a dynamic filename based on the last processed ID (e.g., `updated_csv_10000.tsv`)
    output_file_name = f'updated_csv_{last_id}.tsv'

    # Step 11: Save the results DataFrame to the dynamically generated TSV file
    results_df.to_csv(output_file_name, sep='\t', index=False)

    # Print the results DataFrame to verify and confirm where it's saved
    print(results_df)  # Show only the results DataFrame
    print(f"Data saved to {output_file_name}")


Mounted at /content/drive


Kheshini Budhna (C0909662)

In [6]:
# Example of how to call the function
starting_id = 1  # Starting from ID 1
process_gifs_from_id(starting_id)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Frame Count'] = frame_counts


         ID                                              links  \
0         1  https://38.media.tumblr.com/9f6c25cc350f12aa74...   
1         2  https://38.media.tumblr.com/9ead028ef62004ef6a...   
2         3  https://38.media.tumblr.com/9f43dc410be85b1159...   
3         4  https://38.media.tumblr.com/9f659499c8754e40cf...   
4         5  https://38.media.tumblr.com/9ed1c99afa7d714118...   
...     ...                                                ...   
9995   9996  https://38.media.tumblr.com/23a7ac98bd14b8acb2...   
9996   9997  https://38.media.tumblr.com/tumblr_lxt32ho3Nv1...   
9997   9998  https://33.media.tumblr.com/fe80a7ea4ed469b2fc...   
9998   9999  https://38.media.tumblr.com/459647e0ba2030d7cb...   
9999  10000  https://38.media.tumblr.com/de81f2bb86b7f336bb...   

                                            description  Frame Count  
0     a man is glaring, and someone with sunglasses ...           18  
1              a cat tries to catch a mouse on a tablet          

In [13]:
# Process the next batch from 10,001 to 20,000
process_gifs_from_id(10001)

          ID                                              links  \
10000  10001  https://38.media.tumblr.com/3764bb37b840729250...   
10001  10002  https://33.media.tumblr.com/d668444afb13a19dc8...   
10002  10003  https://33.media.tumblr.com/22d840ef1e7a39fc81...   
10003  10004  https://38.media.tumblr.com/b83f90c529d1889148...   
10004  10005  https://33.media.tumblr.com/027e2ed1ffa4e45d14...   
...      ...                                                ...   
19995  19996  https://38.media.tumblr.com/0bc2fa11f3769b41fd...   
19996  19997  https://38.media.tumblr.com/0ee04a29d221935179...   
19997  19998  https://38.media.tumblr.com/b1367133d4f7b7daf5...   
19998  19999  https://38.media.tumblr.com/0c04c65cf9bdad8e35...   
19999  20000  https://38.media.tumblr.com/a80203f94abbc08409...   

                                             description  Frame Count  
10000             a woman is walking down a dark hallway           58  
10001  a man is in a pen with various animals while

In [14]:
# Process the next batch from 10,001 to 20,000
process_gifs_from_id(20001)

Network error for https://33.media.tumblr.com/cf3d907da94bc417fc1234d71d3c9260/tumblr_np5v6oemH61sbwmk4o1_400.gif: 502 Server Error: Bad Gateway for url: https://64.media.tumblr.com/cf3d907da94bc417fc1234d71d3c9260/tumblr_np5v6oemH61sbwmk4o1_400.gif
          ID                                              links  \
20000  20001  https://38.media.tumblr.com/25ed9eeeefdb778511...   
20001  20002  https://38.media.tumblr.com/55357fd5a336c67dd8...   
20002  20003  https://38.media.tumblr.com/8b6e6b111f39205471...   
20003  20004  https://38.media.tumblr.com/a52904b90b8741b9d8...   
20004  20005  https://33.media.tumblr.com/tumblr_mc9ud7zJZp1...   
...      ...                                                ...   
29995  29996  https://38.media.tumblr.com/41dd93ef71f6e50272...   
29996  29997  https://33.media.tumblr.com/6318a5ee2f6c474c6d...   
29997  29998  https://38.media.tumblr.com/a54dabf5505c3232ca...   
29998  29999  https://33.media.tumblr.com/f16551af0ef9580fb5...   
29999  30000 

In [15]:
# Process the next batch from 10,001 to 20,000
process_gifs_from_id(30001)

Network error for https://33.media.tumblr.com/34f3484910558595cd64b072fc1ae128/tumblr_nex5arpyUG1u2473po1_250.gif: 500 Server Error: Internal Server Error for url: https://64.media.tumblr.com/34f3484910558595cd64b072fc1ae128/tumblr_nex5arpyUG1u2473po1_250.gif
          ID                                              links  \
30000  30001  https://38.media.tumblr.com/7da8f8cf155f910797...   
30001  30002  https://38.media.tumblr.com/196c58d479e32cd845...   
30002  30003  https://38.media.tumblr.com/fedb011aa632b9559f...   
30003  30004  https://38.media.tumblr.com/0d83a8b8d34bcee39f...   
30004  30005  https://33.media.tumblr.com/a4b1422b8b014b8c1c...   
...      ...                                                ...   
39995  39996  https://38.media.tumblr.com/3846f15265f6f3423a...   
39996  39997  https://38.media.tumblr.com/556ec0de5e04c3b598...   
39997  39998  https://38.media.tumblr.com/49998397e0c72c5ff0...   
39998  39999  https://38.media.tumblr.com/bebf7e03b2b586f3f2...   
399

In [16]:
# Process the next batch from 10,001 to 20,000
process_gifs_from_id(40001)

          ID                                              links  \
40000  40001  https://37.media.tumblr.com/6548f98d405d031732...   
40001  40002  https://33.media.tumblr.com/731741f3712650e78c...   
40002  40003  https://38.media.tumblr.com/2025279d45fe89209d...   
40003  40004  https://33.media.tumblr.com/61406fb0e62e9b82f4...   
40004  40005  https://33.media.tumblr.com/4cf042d3dd77c9f0fe...   
...      ...                                                ...   
49995  49996  https://33.media.tumblr.com/6b616537e19696cabf...   
49996  49997  https://31.media.tumblr.com/625bb406ee01c285b8...   
49997  49998  https://31.media.tumblr.com/75c7676c81ad45ba8b...   
49998  49999  https://31.media.tumblr.com/984bd3c51ae95cf65a...   
49999  50000  https://31.media.tumblr.com/c83723f73113f06a09...   

                                             description  Frame Count  
40000             a woman gives a man and a child a kiss           17  
40001      this is a woman closing her eyes and smi

In [17]:
# Process the next batch from 10,001 to 20,000
process_gifs_from_id(50001)

Error processing https://33.media.tumblr.com/55b6b551c2326f4969cc200e8b2992e2/tumblr_nd6ypj326X1tk7w4eo1_500.gif: image has no palette
          ID                                              links  \
50000  50001  https://33.media.tumblr.com/0ad9c307b0d60255ab...   
50001  50002  https://33.media.tumblr.com/08ae973525b7adeeeb...   
50002  50003  https://33.media.tumblr.com/0a3fa3435d0aacd9a9...   
50003  50004  https://31.media.tumblr.com/aef378fcd7b1950cc5...   
50004  50005  https://33.media.tumblr.com/2b1a535a85a62d7346...   
...      ...                                                ...   
59995  59996  https://33.media.tumblr.com/3dcb43494b710b09d6...   
59996  59997  https://31.media.tumblr.com/73e3ec7683eaab3f79...   
59997  59998  https://38.media.tumblr.com/b697d43240e913f4e0...   
59998  59999  https://31.media.tumblr.com/bd87825dd7180e6cb4...   
59999  60000  https://33.media.tumblr.com/1fd14a23357223ddf1...   

                                             description Fra

In [18]:
# Process the next batch from 10,001 to 20,000
process_gifs_from_id(60001)

Network error for https://38.media.tumblr.com/b169f4e3b96603031d808beda4be86a6/tumblr_nkuohvy4ff1qadfw9o1_400.gif: 502 Server Error: Bad Gateway for url: https://64.media.tumblr.com/b169f4e3b96603031d808beda4be86a6/tumblr_nkuohvy4ff1qadfw9o1_400.gif
Network error for https://33.media.tumblr.com/4add60f0e4d5123b5b490f61592ea27a/tumblr_ndwjaaRuOU1s8r7lxo1_400.gif: 502 Server Error: Bad Gateway for url: https://64.media.tumblr.com/4add60f0e4d5123b5b490f61592ea27a/tumblr_ndwjaaRuOU1s8r7lxo1_400.gif
          ID                                              links  \
60000  60001  https://33.media.tumblr.com/50b7e47d47ab0ba9e1...   
60001  60002  https://33.media.tumblr.com/63df0f12cd4a921785...   
60002  60003  https://38.media.tumblr.com/9fc04b54a0c1c34695...   
60003  60004  https://38.media.tumblr.com/8ddb70cccdf949fcfb...   
60004  60005  https://33.media.tumblr.com/2194f80ed464739bf0...   
...      ...                                                ...   
69995  69996  https://33.media.

In [19]:
# Process the next batch from 10,001 to 20,000
process_gifs_from_id(70001)

          ID                                              links  \
70000  70001  https://33.media.tumblr.com/c0987ba964de2b567f...   
70001  70002  https://38.media.tumblr.com/75782a991fe7f37d39...   
70002  70003  https://38.media.tumblr.com/96d3cc45c2b4213500...   
70003  70004  https://33.media.tumblr.com/a963ead503037a67f8...   
70004  70005  https://31.media.tumblr.com/259b1117c7a0e07b64...   
...      ...                                                ...   
79995  79996  https://38.media.tumblr.com/712403c0fcb2fa55bd...   
79996  79997  https://38.media.tumblr.com/d901e76a8ae61ea443...   
79997  79998  https://38.media.tumblr.com/d0e9a3907d8acc261f...   
79998  79999  https://38.media.tumblr.com/01a11627847ffc664d...   
79999  80000  https://33.media.tumblr.com/9f9c7fcaa5b256d94c...   

                                             description  Frame Count  
70000             a boy rubs his neck with headphone on.           24  
70001            a young woman is looking at a snow

In [20]:
# Process the next batch from 10,001 to 20,000
process_gifs_from_id(80001)

Network error for https://38.media.tumblr.com/b82ad15ed8404591aecd16d2ff0c51cf/tumblr_nbkigaAxRs1qej93ko1_500.gif: 502 Server Error: Bad Gateway for url: https://64.media.tumblr.com/b82ad15ed8404591aecd16d2ff0c51cf/tumblr_nbkigaAxRs1qej93ko1_500.gif
          ID                                              links  \
80000  80001  https://38.media.tumblr.com/e2f9d28a4a195282d1...   
80001  80002  https://38.media.tumblr.com/91960f513cf16bc4d3...   
80002  80003  https://33.media.tumblr.com/8f7dab12832a5b430e...   
80003  80004  https://38.media.tumblr.com/c57eb4430867086bf6...   
80004  80005  https://38.media.tumblr.com/b221bcda6ecf17ccd7...   
...      ...                                                ...   
89730  89731  https://38.media.tumblr.com/62493b85c2e2a38b04...   
89731  89732  https://31.media.tumblr.com/1213c26056c39e2e6a...   
89732  89733  https://33.media.tumblr.com/0c4ca6f63e2065d6e4...   
89733  89734  https://33.media.tumblr.com/d94f412610a6f58e59...   
89734  89735 