In [2]:
import pandas as pd

# Load the .tsv file
file_path = 'tgif-v2.1.tsv'  # Replace with your actual file path

# Read the .tsv file into a pandas DataFrame
df = pd.read_csv(file_path, sep='\t')

# Display the first 5 rows of the DataFrame
print(df.head())

   ID                                              links  \
0   1  https://38.media.tumblr.com/9f6c25cc350f12aa74...   
1   2  https://38.media.tumblr.com/9ead028ef62004ef6a...   
2   3  https://38.media.tumblr.com/9f43dc410be85b1159...   
3   4  https://38.media.tumblr.com/9f659499c8754e40cf...   
4   5  https://38.media.tumblr.com/9ed1c99afa7d714118...   

                                         description  
0  a man is glaring, and someone with sunglasses ...  
1           a cat tries to catch a mouse on a tablet  
2                   a man dressed in red is dancing.  
3     an animal comes close to another in the jungle  
4  a man in a hat adjusts his tie and makes a wei...  


In [5]:
!pip install google-cloud-storage



In [19]:
# Ruta del archivo .tsv
dataset_path = '/content/tgif-v2.1.tsv'  # Cambia esta ruta si es necesario

# Cargar el dataset en un DataFrame
df = pd.read_csv(dataset_path, sep='\t')

# Especifica el ID que deseas buscar
id_a_buscar = '25991'  # Cambia esto por el ID que deseas buscar (sin .gif)

# Convert the ID column to string to avoid the AttributeError
df['ID'] = df['ID'].astype(str)

# Filter the DataFrame to find the specific ID
resultado = df[df['ID'].str.strip() == id_a_buscar]

# Check if the ID was found
if not resultado.empty:
    print("Description found:")
    print(resultado)
else:
    print(f"No description found for ID: {id_a_buscar}")

Description found:
          ID                                              links  \
25990  25991  https://38.media.tumblr.com/3d4967ab2d22045c5d...   

                                             description  
25990  there are two men in a car, and they are talki...  


In [22]:
!pip install google-cloud-storage




In [23]:
from google.cloud import storage
from google.colab import auth

# Step 1: Authenticate the user
auth.authenticate_user()

# Step 2: Install Google Cloud Storage library if not already done
!pip install google-cloud-storage

# Initialize the GCS client
client = storage.Client()



In [None]:
# Define your bucket name
bucket_name = 'gif-bucket-1000'  # Change this to your actual bucket name
bucket = client.get_bucket(bucket_name)

# Paths for the dataset and text files
dataset_path = '/content/tgif-v2.1.tsv'  # Adjust if necessary
train_file_path = '/content/train.txt'    # Adjust if necessary
val_file_path = '/content/val.txt'        # Adjust if necessary
test_file_path = '/content/test.txt'      # Adjust if necessary

In [24]:
# Load the dataset into a DataFrame
df = pd.read_csv(dataset_path, sep='\t')

# Ensure 'ID' column is treated as string
df['ID'] = df['ID'].astype(str)


In [25]:
# Function to create new text files with descriptions
def create_file_with_description(input_file, output_file):
    with open(input_file, 'r') as f:
        ids = f.read().splitlines()  # Read IDs from the text file

    # Remove .gif extension from the IDs
    ids = [id.replace('.gif', '') for id in ids]

    # List to store lines with ID and description
    lines_with_description = []

    for id_to_search in ids:
        result = df[df['ID'] == id_to_search]  # Search for ID in DataFrame
        if not result.empty:
            description = result.iloc[0]['description']  # Adjust if column name is different
            lines_with_description.append(f"{id_to_search}: {description}")  # Format: ID: description
        else:
            lines_with_description.append(f"{id_to_search}: No description available")  # If ID not found

    # Write the lines with ID and description to a new text file
    with open(output_file, 'w') as f:
        for line in lines_with_description:
            f.write(line + '\n')

    print(f'Created {output_file} with descriptions.')

In [26]:
# Create files with descriptions
train_with_desc = '/content/train_with_description.txt'
val_with_desc = '/content/val_with_description.txt'
test_with_desc = '/content/test_with_description.txt'

create_file_with_description(train_file_path, train_with_desc)
create_file_with_description(val_file_path, val_with_desc)
create_file_with_description(test_file_path, test_with_desc)

Created /content/train_with_description.txt with descriptions.
Created /content/val_with_description.txt with descriptions.
Created /content/test_with_description.txt with descriptions.


In [27]:
# Upload the new text files to GCS
def upload_to_gcs(file_path):
    blob = bucket.blob(file_path.split('/')[-1])  # Get the filename from the path
    blob.upload_from_filename(file_path)  # Upload the file
    print(f'Uploaded {file_path} to {bucket_name}')

# Upload the files
upload_to_gcs(train_with_desc)
upload_to_gcs(val_with_desc)
upload_to_gcs(test_with_desc)

print("All files with descriptions created and uploaded successfully.")

Uploaded /content/train_with_description.txt to gif-bucket-1000
Uploaded /content/val_with_description.txt to gif-bucket-1000
Uploaded /content/test_with_description.txt to gif-bucket-1000
All files with descriptions created and uploaded successfully.
