# **Pre-Processing: Adding description to json Files and updating gif_id**

In [2]:
import json
import os
from google.cloud import storage
from google.colab import auth

auth.authenticate_user()

# Set up Google Cloud Storage client
storage_client = storage.Client()
bucket_name = 'gif-bucket-1000'
bucket = storage_client.bucket(bucket_name)

def update_json_file(json_file_name, description_file_name):
    """Updates the JSON file with new gif_id and gif_description."""
    try:
        # Download the JSON file from GCS
        json_blob = bucket.blob(json_file_name)
        json_data = json.loads(json_blob.download_as_string())

        # Download the description file from GCS
        description_blob = bucket.blob(description_file_name)
        description_content = description_blob.download_as_string().decode('utf-8')

        # Create a dictionary mapping gif_id to gif_description
        descriptions = {}
        for line in description_content.splitlines():
            gif_id, description = line.split(': ')
            descriptions[gif_id] = description

        # Update the JSON data
        for gif_item in json_data:
            gif_file = gif_item['gif_file']
            gif_id = gif_file.split('.')[0]  # Extract ID from filename
            gif_item['gif_id'] = gif_id
            gif_item['gif_description'] = descriptions.get(gif_id, '')  # Add description

        # Upload the updated JSON file to GCS
        json_blob.upload_from_string(json.dumps(json_data, indent=4), content_type='application/json')
        print(f"Updated {json_file_name} in GCS")

    except Exception as e:
        print(f"Error updating {json_file_name}: {e}")

# Update the JSON files
update_json_file('train_data.json', 'train_with_description.txt')
update_json_file('test_data.json', 'test_with_description.txt')
update_json_file('val_data.json', 'val_with_description.txt')

Updated train_data.json in GCS
Updated test_data.json in GCS
Updated val_data.json in GCS
