##                 to run in all cases

### imports

In [1]:
import json, os
from typing import Dict, Optional, Any, List
from pprint import pprint

from fb_api_client import FbApiClient
from fb_post_manager import FbPostManager
from fb_utils import FbUtils
from fb_comment_manager import FbCommentManager


import logging
logging.basicConfig(level=logging.DEBUG)  # Set the logging level to DEBUG

### Environmental Variables based on App and User/Page

Use fb_create_json_config.py to create the JSON config file
the structure of the file is User_Appname_PageName_config.json

As I am running it from the Notebook, we need to use the relative directory ..\


In [2]:
config_file = r"..\config_files\FB_JK_JK Travel_JK Travel_config.json"

In [3]:

# load the JSON configuration
def load_fb_config(config_file):
    with open(config_file, 'r') as f:
        return json.load(f)

fb_config = load_fb_config(config_file)

# Load credentials
try:
    app_id = os.environ[fb_config["app_id"]]
    app_secret = os.environ[fb_config["app_secret"]]
    access_token = os.environ[fb_config["access_token"]]    
    page_id = os.environ[fb_config["page_id"]]
    user_id = os.environ[fb_config["user_id"]]
    user_token = os.environ[fb_config["user_token"]]


except KeyError as e:
    print(f"Error: Environment variable {e} not set. Please set it before running the tests.")
    exit(1)  # Exit with error code

# try:
# app_id = os.environ["FB_M001_App_id"]
    # app_secret = os.environ["FB_M001_App_secret"]
    # access_token = os.environ["FB_M001_ES_App_token"]
    # page_id = os.environ["FB_ES_Pg_id"]
    # user_id = os.environ["FB_LS_User_id"]
# except KeyError:
#     print("Error: Environment variables not set. Please set them before running the tests.")
#     exit(1)  # Exit with error code

### get the credentials and instantiate the fb_client

In [4]:

# Instantiate the FacebookAPIClient and PostManager
fb_client = FbApiClient(app_id, app_secret, access_token, page_id)
fb_post_manager = FbPostManager(fb_client)
fb_utils = FbUtils(fb_client)
fb_comment_manager = FbCommentManager(fb_client)

In [5]:
print("app_id", app_id)
print("app_secret", app_secret)
print("access_token", access_token)
print("page_id", page_id)
print("user_id", user_id)
print("user_token", user_token)

app_id 1631848667637716
app_secret 7df81d162be6d02601314f5308585198
access_token EAAXMKGApZA9QBO3SwNB9yNZA3qLdWI19f6PTPcd8ZClXvGjPwgCnZAADAYCJZCM8VH7Gaj9om1i6xYFLMcxZCVbdaECTJWHFTqriSrZCzCQXcBtNIgAH6lgl1ZAHHkktTF1Aa1tLmLFsDHjG33RLuHYcYFPoVKeByEM5qZCXWsbCAoLnlyHvAj5ZBPvCREL6QWOYhaoA8MYwQZA
page_id 379955691858248
user_id 379955691858248
user_token EAAXMKGApZA9QBOZCZAvuR1roZCPtiQqFOPNwzsZBs0stea0MP5ZCOUCZBAvGYOiOwSZCk2CdMz8T19KGP4w7r3mQKXBqZBzdNuxiaSfujMqHNSjtSWZCQjtHFEdfsmSpUihliDzDkkoUbn1myZAzJBgPobIpbqbbspx96KhUODZBJG5IcT6WpZAm4WQXXzkSaHkO98QNu


In [6]:
print(dir(fb_client))
print(dir(fb_post_manager)) 
print(dir(fb_utils))
print(dir(fb_comment_manager))


['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_graph', 'access_token', 'api_version', 'app_id', 'app_secret', 'get_graph_api_object', 'load_credentials', 'page_id']
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'api_client', 'get_latest_posts', 'get_post_by_id', 'get_post_likes', 'get_post_shares', 'publish_photo_post', 'publish_text_post']
['__class__', '__delattr__

## Test to get information about a Group
### problems to get the id of a group - probably scraping better
### problems to get the feed of a Group. Probably scraping better



## Test to grab the latests posts from any of my pages (num_posts)

In [13]:
try:
    latest_posts = fb_post_manager.get_latest_posts(page_id, num_posts=3)
    
    if latest_posts:  # Check if the list is not empty
        print("Latest posts retrieved successfully:")
        for post in latest_posts:
            print(f"  ID: {post['id']}")
            print(f"  Message: {post.get('message', 'No message available')}")  # Use get() to handle missing messages
            print(f"  Post link: {post['permalink_url']}")
            print(f"  Likes Count: {post['likes']['summary']['total_count']}")
            print(f"  Comments Count: {post['comments']['summary']['total_count']}")
            print("---------------------------------------------------------------------")

            # You can print other fields here as well
            # ----------------------------------------------------------------------------
            # NOTE NOTE NOTE MAY NEED TO Decode message text before printing JSON
            # ----------------------------------------------------------------------------
            
            import json
            print(json.dumps(post, indent=4))      # Pretty-print each post individually

    else:
        print("No latest posts found or error retrieving posts.")
except Exception as e:  # Catching general exceptions for this example
    print(f"Error retrieving latest posts: {e}")



DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "GET /v3.1/379955691858248/posts?fields=id%2Cmessage%2Ccreated_time%2Cpermalink_url%2Clikes.summary%28true%29%2Ccomments.summary%28true%29&limit=3&access_token=EAAXMKGApZA9QBO3SwNB9yNZA3qLdWI19f6PTPcd8ZClXvGjPwgCnZAADAYCJZCM8VH7Gaj9om1i6xYFLMcxZCVbdaECTJWHFTqriSrZCzCQXcBtNIgAH6lgl1ZAHHkktTF1Aa1tLmLFsDHjG33RLuHYcYFPoVKeByEM5qZCXWsbCAoLnlyHvAj5ZBPvCREL6QWOYhaoA8MYwQZA HTTP/11" 200 2457


Latest posts retrieved successfully:
  ID: 379955691858248_122132752322362482
  Message: El Rocio so full with horses
  Post link: https://www.facebook.com/122132753258362482/posts/122132752322362482
  Likes Count: 0
  Comments Count: 0
---------------------------------------------------------------------
{
    "id": "379955691858248_122132752322362482",
    "message": "El Rocio so full with horses",
    "created_time": "2024-10-02T17:54:49+0000",
    "permalink_url": "https://www.facebook.com/122132753258362482/posts/122132752322362482",
    "likes": {
        "data": [],
        "summary": {
            "total_count": 0,
            "can_like": true,
            "has_liked": false
        }
    },
    "comments": {
        "data": [],
        "summary": {
            "order": "chronological",
            "total_count": 0,
            "can_comment": true
        }
    }
}
  ID: 379955691858248_122132750960362482
  Message: Happy water? 
  https://www.instagram.com/reel/DAn7I2OuzbA/
  

## Test to retrieve a post and its media by post id

In [14]:

post_id = "379955691858248_122132752322362482"  # Replace with the actual post ID
specific_post = fb_post_manager.get_post_by_id(post_id)
if specific_post:
    import json
    print(json.dumps(specific_post, indent=4)) 

    if "attachments" in specific_post:
        attachments = specific_post["attachments"]["data"]
        for attachment in attachments:
            media_type = attachment.get("media_type")
            if media_type == "photo":
                photo_url = attachment["media"]["image"]["src"]
                print(f"Found photo attachment: {photo_url}")
            elif media_type == "video":
                video_url = attachment["media"]["source"]
                print(f"Found video attachment: {video_url}")
            # Add other media types (e.g., "link", "album") if needed
    else:
        print("No attachments found in this post.")
else:
    print("No post found or error retrieving post.")


DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "GET /v3.1/379955691858248_122132752322362482?fields=id%2Cmessage%2Ccreated_time%2Cpermalink_url%2Clikes.summary%28true%29%2Ccomments.summary%28true%29%2Cattachments&access_token=EAAXMKGApZA9QBO3SwNB9yNZA3qLdWI19f6PTPcd8ZClXvGjPwgCnZAADAYCJZCM8VH7Gaj9om1i6xYFLMcxZCVbdaECTJWHFTqriSrZCzCQXcBtNIgAH6lgl1ZAHHkktTF1Aa1tLmLFsDHjG33RLuHYcYFPoVKeByEM5qZCXWsbCAoLnlyHvAj5ZBPvCREL6QWOYhaoA8MYwQZA HTTP/11" 200 641


{
    "id": "379955691858248_122132752322362482",
    "message": "El Rocio so full with horses",
    "created_time": "2024-10-02T17:54:49+0000",
    "permalink_url": "https://www.facebook.com/122132753258362482/posts/122132752322362482",
    "likes": {
        "data": [],
        "summary": {
            "total_count": 0,
            "can_like": true,
            "has_liked": false
        }
    },
    "comments": {
        "data": [],
        "summary": {
            "order": "chronological",
            "total_count": 0,
            "can_comment": true
        }
    },
    "attachments": {
        "data": [
            {
                "description": "El Rocio so full with horses",
                "media": {
                    "image": {
                        "height": 1279,
                        "src": "https://scontent-mad1-1.xx.fbcdn.net/v/t39.30808-6/461798943_122132752304362482_3237548264267315728_n.jpg?stp=cp1_dst-jpg_p720x720&_nc_cat=107&ccb=1-7&_nc_sid=127cfc&_nc_ohc=jp

## Test to retrieve likes information by post id

### Ojo puede no funcionr, pero al extraer toda la informacion del ultio post, si incluye los likes



In [16]:
post_id = "379955691858248_122132752322362482"  # Replace with the actual post ID

likes = fb_post_manager.get_post_likes(post_id)

print(f"Post ID: {post_id}")
print(f"Number of Likes: {len(likes)}")  
for like in likes:
    if "unknown" in like:
        print(f"Liked by: Unknown user")
    else:
        print(f"Liked by: {like.get('name', 'Unknown Name')}") 


DEBUG:urllib3.connectionpool:Resetting dropped connection: graph.facebook.com
DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "GET /v3.1/379955691858248_122132752322362482/likes?access_token=EAAXMKGApZA9QBO3SwNB9yNZA3qLdWI19f6PTPcd8ZClXvGjPwgCnZAADAYCJZCM8VH7Gaj9om1i6xYFLMcxZCVbdaECTJWHFTqriSrZCzCQXcBtNIgAH6lgl1ZAHHkktTF1Aa1tLmLFsDHjG33RLuHYcYFPoVKeByEM5qZCXWsbCAoLnlyHvAj5ZBPvCREL6QWOYhaoA8MYwQZA HTTP/11" 200 189


Post ID: 379955691858248_122132752322362482
Number of Likes: 0


## Test to retrieve information regarding post shares


In [17]:
post_id = "379955691858248_122132752322362482"  # Replace with the actual post ID
shares = fb_post_manager.get_post_shares(post_id)
print(type(shares))
for item in shares:
    pprint(item)  # Pretty-print each item

DEBUG:urllib3.connectionpool:Resetting dropped connection: graph.facebook.com
DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "GET /v3.1/379955691858248_122132752322362482/sharedposts?access_token=EAAXMKGApZA9QBO3SwNB9yNZA3qLdWI19f6PTPcd8ZClXvGjPwgCnZAADAYCJZCM8VH7Gaj9om1i6xYFLMcxZCVbdaECTJWHFTqriSrZCzCQXcBtNIgAH6lgl1ZAHHkktTF1Aa1tLmLFsDHjG33RLuHYcYFPoVKeByEM5qZCXWsbCAoLnlyHvAj5ZBPvCREL6QWOYhaoA8MYwQZA HTTP/11" 200 11


<class 'list'>


# Tests to interact with comments

## Test to read and print the comments of a specific post

In [20]:
post_id = input("Enter the post ID: ")  # You can prompt for the post ID
    
# Retrieve comments with specified fields
comments = fb_comment_manager.get_post_comments(
    post_id, fields=["id", "message", "created_time", "from", "like_count", "parent", "user_likes","reactions"]
)

if comments:
    # Process the retrieved comments
    for comment in comments:
        print(f"Comment ID: {comment['id']}")
        
        # Modified Author Line (handling empty dictionary)
        author_info = comment.get('from', {})  
        if isinstance(author_info, dict) and author_info:  # Check if it's a non-empty dictionary
            author_name = author_info.get('name', "Unknown Author")  # Get name or default
        else:
            author_name = "Unknown Author (ID: Unknown ID)" 

        print(f"Author: {author_name}")

        print(f"Message: {comment['message']}")
        print(f"Created Time: {comment['created_time']}")
        print(f"Likes: {comment.get('like_count', 0)}")

        # Check if it's a reply
        if "parent" in comment:
            print(f"Reply to: {comment['parent']['id']}")
        else:
            print("Not a reply")

        # Check if liked by the user
        if comment.get("user_likes"):
            print("Liked by you")
        else:
            print("Not liked by you")

        print("-" * 20)  # Separator
else:
    print("No comments found or an error occurred.")

DEBUG:urllib3.connectionpool:Resetting dropped connection: graph.facebook.com
DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "GET /v3.1/379955691858248_122132752322362482/comments?fields=id%2Cmessage%2Ccreated_time%2Cfrom%2Clike_count%2Cparent%2Cuser_likes%2Creactions&access_token=EAAXMKGApZA9QBO3SwNB9yNZA3qLdWI19f6PTPcd8ZClXvGjPwgCnZAADAYCJZCM8VH7Gaj9om1i6xYFLMcxZCVbdaECTJWHFTqriSrZCzCQXcBtNIgAH6lgl1ZAHHkktTF1Aa1tLmLFsDHjG33RLuHYcYFPoVKeByEM5qZCXWsbCAoLnlyHvAj5ZBPvCREL6QWOYhaoA8MYwQZA HTTP/11" 200 295


Comment ID: 122132752322362482_3885889078335255
Author: JK Traveling Around The World
Message: I am commenting on this
Created Time: 2024-10-02T18:05:26+0000
Likes: 1
Not a reply
Liked by you
--------------------


In [19]:
pprint(comments)


[{'created_time': '2024-10-02T18:05:26+0000',
  'from': {'id': '379955691858248', 'name': 'JK Traveling Around The World'},
  'id': '122132752322362482_3885889078335255',
  'like_count': 0,
  'message': 'I am commenting on this',
  'user_likes': False}]


## Test to react to comments

In [21]:
if comments:
    for comment in comments:
        print(f"Comment ID: {comment['id']}")
        print(f"Message: {comment['message']}")
        
    while True:  # Loop to keep prompting for reactions until user wants to quit
        comment_id_to_react = input("Enter the comment ID to react to (or type 'q' to quit): ")
        if comment_id_to_react.lower() == 'q':
            break

        message = input("Enter the answer text (or press Enter to skip reply): ")
        
        # Improved like input handling
        while True:
            like_input = input("Do you want to like the comment? (y/n): ")
            if like_input.lower() in ['y', 'n']:
                like = like_input.lower() == 'y'
                break
            else:
                print("Invalid input. Please enter 'y' for yes or 'n' for no.")

        response = fb_comment_manager.react_to_comment(comment_id_to_react, message=message if message else None, like=like)
    
        if response:
            print("Reaction successful:", response)
        else:
            print("Failed to react to the comment.")
            

Comment ID: 122132752322362482_3885889078335255
Message: I am commenting on this


DEBUG:urllib3.connectionpool:Resetting dropped connection: graph.facebook.com
DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "POST /v3.1/122132752322362482_3885889078335255/comments HTTP/11" 200 44


Replied to comment 122132752322362482_3885889078335255: you sure?


DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "POST /v3.1/122132752322362482_3885889078335255/likes?access_token=EAAXMKGApZA9QBO3SwNB9yNZA3qLdWI19f6PTPcd8ZClXvGjPwgCnZAADAYCJZCM8VH7Gaj9om1i6xYFLMcxZCVbdaECTJWHFTqriSrZCzCQXcBtNIgAH6lgl1ZAHHkktTF1Aa1tLmLFsDHjG33RLuHYcYFPoVKeByEM5qZCXWsbCAoLnlyHvAj5ZBPvCREL6QWOYhaoA8MYwQZA HTTP/11" 200 16


Liked comment 122132752322362482_3885889078335255
Reaction successful: {'success': True}


## Test to publish a message post

In [7]:
   
# this checks how it includes the instagram or any other link.
# The link is included as a link in the post, pero no se ve el reel de instagram

# Publish a text post
text_post_message = ("Happy water? \n"
                    "  https://www.instagram.com/reel/DAn7I2OuzbA/")
try:
    text_post_result = fb_post_manager.publish_text_post(page_id, text_post_message)
except Exception as e:  # Catching general exceptions for this example
    print(f"Error publishing text post: {e}")

print("Text post result:", text_post_result)


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): graph.facebook.com:443
DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "POST /v3.1/379955691858248/feed HTTP/11" 200 43


Post published successfully. Post ID: 379955691858248_122132750960362482
Text post result: {'id': '379955691858248_122132750960362482'}


Test to publish a one photo post with a message

In [10]:


photo_post_message = "El Rocio so full with horses"
photo_path = r"C:\Users\manue\Documents\Disfrutando Huelva (DH)\Agrupados para editar\Rocio - Todos\Caballos carros y carretas en el rocio\20240518_131042~2.jpg"
try:
    photo_post_result = fb_post_manager.publish_photo_post(page_id, photo_post_message, photo_path)
except Exception as e:  # Catching general exceptions for this example
    print(f"Error publishing photo post: {e}")

# Print results
    
print("Photo post result:", photo_post_result)


DEBUG:urllib3.connectionpool:Resetting dropped connection: graph.facebook.com
DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "POST /v3.1/me/photos HTTP/11" 200 74


Post with photo published successfully. Post ID: 379955691858248_122132752322362482
Photo post result: {'id': '122132752298362482', 'post_id': '379955691858248_122132752322362482'}


In [11]:
# trying to publish a video with the publish_photo_post does not work.
# it sends the Error publishing post with photo: Invalid parameter

photo_post_message = "El Rocio, Huelva"
photo_path = r"C:\Users\manue\Documents\Disfrutando Huelva (DH)\Agrupados para editar\Rocio - Todos\Caballos carros y carretas en el rocio\20240518_130347.mp4"
try:
    photo_post_result = fb_post_manager.publish_photo_post(page_id, photo_post_message, photo_path)
except Exception as e:  # Catching general exceptions for this example
    print(f"Error publishing photo post: {e}")

# Print results
    
print("Photo post result:", photo_post_result)


DEBUG:urllib3.connectionpool:Resetting dropped connection: graph.facebook.com


DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "POST /v3.1/me/photos HTTP/11" 400 269


Error publishing post with photo: Invalid parameter
Photo post result: None


# Utils Tests

##  def get_page_or_user_info

### NO FUNCIONA PARA OTROS USUARIOS
### Las otras alternativas con BeautifulSoup (get the id from the name
### or with Hikerapi si funciona




In [24]:
user_info = FbUtils.get_page_or_user_info(api_client=fb_client, page_or_user_id="100064556895235")  # el de huelva foodies
print(user_info)

# Fetch specific fields
specific_fields = ["id", "name", "about", "link", ]
page_info = FbUtils.get_page_or_user_info(api_client=fb_client, page_or_user_id="100064556895235", fields=specific_fields)
pprint(page_info)

DEBUG:urllib3.connectionpool:Resetting dropped connection: graph.facebook.com
DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "GET /v3.1/100064556895235?fields=id%2Cname%2Cabout&access_token=EAAXMKGApZA9QBO3SwNB9yNZA3qLdWI19f6PTPcd8ZClXvGjPwgCnZAADAYCJZCM8VH7Gaj9om1i6xYFLMcxZCVbdaECTJWHFTqriSrZCzCQXcBtNIgAH6lgl1ZAHHkktTF1Aa1tLmLFsDHjG33RLuHYcYFPoVKeByEM5qZCXWsbCAoLnlyHvAj5ZBPvCREL6QWOYhaoA8MYwQZA HTTP/11" 400 294
DEBUG:urllib3.connectionpool:https://graph.facebook.com:443 "GET /v3.1/100064556895235?fields=id%2Cname%2Cabout%2Clink&access_token=EAAXMKGApZA9QBO3SwNB9yNZA3qLdWI19f6PTPcd8ZClXvGjPwgCnZAADAYCJZCM8VH7Gaj9om1i6xYFLMcxZCVbdaECTJWHFTqriSrZCzCQXcBtNIgAH6lgl1ZAHHkktTF1Aa1tLmLFsDHjG33RLuHYcYFPoVKeByEM5qZCXWsbCAoLnlyHvAj5ZBPvCREL6QWOYhaoA8MYwQZA HTTP/11" 400 295


Error retrieving page/user info: Unsupported get request. Object with ID '100064556895235' does not exist, cannot be loaded due to missing permissions, or does not support this operation. Please read the Graph API documentation at https://developers.facebook.com/docs/graph-api
{}
Error retrieving page/user info: Unsupported get request. Object with ID '100064556895235' does not exist, cannot be loaded due to missing permissions, or does not support this operation. Please read the Graph API documentation at https://developers.facebook.com/docs/graph-api
{}


##  def get_page_id (scraping)

## Get the id of a page by its name

In [28]:
page_name = "DisfrutandoHuelva"  # Replace with the actual Page name
user_id = FbUtils.get_page_id(page_name)

if user_id:
    print(f"Found User ID for {page_name}: {user_id}")
else:
    print(f"Could not find User ID for {page_name}.")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.facebook.com:443
DEBUG:urllib3.connectionpool:https://www.facebook.com:443 "GET /DisfrutandoHuelva HTTP/11" 200 None


Found User ID for DisfrutandoHuelva: //profile/61550207577627


# Test for Hiker API

In [None]:
%pip install --upgrade hikerapi




In [None]:
from hikerapi import Client
hiker_api_key = os.environ["HikerAPI_key"]
cl = Client(token=hiker_api_key)  

In [None]:
print(type(cl))

In [None]:
print(hiker_api_key)

### Hiker API - Get user info

In [None]:


user_info = cl.user_by_username_v2("huelvafoodie")

pprint(user_info)

## Extract the most important Huelva IG influencers profiles by name
##         including their FB id
### store them in a JSON file

"""Extracts and analyzes Instagram user information from a JSON file.

This class provides methods to process Instagram user data that has been
fetched using the HikerAPI and saved in a JSON file. It can extract
relevant information, create a Pandas DataFrame, and save it as a CSV file.

Methods:
    extract_info_data_from_json(input_file): Extracts user data from a JSON file
        and returns a DataFrame.
    save_to_csv(df, output_file): Saves a DataFrame containing user data to a CSV file.
"""


In [None]:
import os
import json
from hikerapi import Client

def fetch_and_save_profiles(usernames, output_file="instagram_profiles.json"):
    """Fetches user profiles from HikerAPI and saves the data to a JSON file.

    Args:
        usernames: A list of Instagram usernames to fetch.
        output_file: The path to the JSON file where data will be saved.
    """
    
    # Load your HikerAPI key securely from an environment variable
    hiker_api_key = os.environ.get("HikerAPI_key")
    if not hiker_api_key:
        raise ValueError("HikerAPI_key environment variable not found.")

    # Create the HikerAPI client
    cl = Client(token=hiker_api_key)

    all_profile_data = []

    for username in usernames:
        try:
            user_info = cl.user_by_username_v2(username)

            # If the request is successful, add it to the list
            if user_info['status'] == 'ok':
                all_profile_data.append(user_info)
            else:
                print(f"Error fetching data for {username}: {user_info.get('error')}")
        except Exception as e:
            print(f"Error fetching data for {username}: {e}")

    # Save the data to a JSON file
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(all_profile_data, f, ensure_ascii=False, indent=4)




In [None]:
# call the function to fetch the information
usernames_to_fetch = ["capturolavida_emn",
                      "huelvafoodie",
                      "laesenciadehuelva",
                      "huelvagram",
                      "agendahuelva",
                      "huelva_secreta",
                      "huelva.explore",
                      "huelva24com",
                      "huelvainformacion",
                      "huelvahoy",
                      "huelvaoriginal"]

# Call the function to fetch and save profiles
fetch_and_save_profiles(usernames_to_fetch)    # add arg for output_file - default "instagram_profiles.json"

## Extract relevant information from the JSON and store it in a pandas data frame
## and, latter, store in a .csv file
## to access it with Excel or Google Sheets

In [None]:
import os
import json
import pandas as pd

def extract_profile_data_from_json(input_file="instagram_profiles.json", output_file="instagram_profiles.csv"):
    """Loads Instagram profile data from a JSON file, extracts relevant fields,
    creates a Pandas DataFrame, and exports it to a CSV file.

    Args:
        input_file: The path to the JSON file containing the profile data.
        output_file: The path to save the CSV file.
    """

    with open(input_file, "r", encoding="utf-8") as f:
        all_profile_data = json.load(f)

    extracted_data = []

    for profile in all_profile_data:
        if profile["status"] == "ok":
            user = profile["user"]
            extracted_data.append({
                "username": user["username"],
                "full_name": user["full_name"],
                "instagram_id": user.get("pk", user.get("pk_id")),  # Extract the ID (either "pk" or "pk_id")
                "category": user["category"],
                "biography": user["biography"],
                "followers": user["follower_count"],
                "following": user["following_count"],
                "is_private": user["is_private"],
                "is_business": user["is_business"],
                "is_verified": user["is_verified"],
                "media_count": user["media_count"],
                "fb_page_id": str(user["page_id"]),
                "fb_page_name": user["page_name"],
                "profile_pic_url": user["profile_pic_url"],
                "external_url": user.get("external_url"), # Use get() to handle missing keys
                "bio_links": [link["url"] for link in user.get("bio_links", [])] # Extract URLs from bio links
            })

    # Create a DataFrame from the extracted data
    df = pd.DataFrame(extracted_data)

    # Export the DataFrame to a CSV file (for Google Sheets)
    df.to_csv(output_file, index=False)

# Example Usage: 
extract_profile_data_from_json()    # Using default file names
                                    # input_file="instagram_profiles.json",
                                    # output_file="instagram_profiles.csv"




In [None]:
# empty cell