##                 to run in all cases

### imports

In [1]:
import json, os
from typing import Dict, Optional, Any, List
from pprint import pprint

from fb_api_client import FbApiClient
from fb_post_manager import FbPostManager
from fb_utils import FbUtils
from fb_comment_manager import FbCommentManager
from fb_scraper import FbScraper

import logging

logging.basicConfig(level=logging.DEBUG)  # Set the logging level to DEBUG

  from .autonotebook import tqdm as notebook_tqdm


### Environmental Variables based on App and User/Page

Use fb_create_json_config.py to create the JSON config file
the structure of the file is User_Appname_PageName_config.json

As I am running it from the Notebook, we need to use the relative directory ..\


In [2]:
config_file = r"..\config_files\FB_JK_JK Travel_JK Travel_config.json"

In [3]:
# load the JSON configuration
def load_fb_config(config_file):
    with open(config_file, "r") as f:
        return json.load(f)


fb_config = load_fb_config(config_file)

# Load credentials
try:
    app_id = os.environ[fb_config["app_id"]]
    app_secret = os.environ[fb_config["app_secret"]]
    access_token = os.environ[fb_config["access_token"]]
    page_id = os.environ[fb_config["page_id"]]
    user_id = os.environ[fb_config["user_id"]]
    user_token = os.environ[fb_config["user_token"]]


except KeyError as e:
    print(
        f"Error: Environment variable {e} not set. Please set it before running the tests."
    )
    exit(1)  # Exit with error code

### get the credentials and instantiate the fb_client

In [4]:
# Instantiate the FacebookAPIClient and PostManager
fb_client = FbApiClient(app_id, app_secret, access_token, page_id)
fb_post_manager = FbPostManager(fb_client)
fb_utils = FbUtils(fb_client)
fb_comment_manager = FbCommentManager(fb_client)

In [None]:
print("app_id", app_id)
print("app_secret", app_secret)
print("access_token", access_token)
print("page_id", page_id)
print("user_id", user_id)
print("user_token", user_token)

In [None]:
print(dir(fb_client))
print(dir(fb_post_manager))
print(dir(fb_utils))
print(dir(fb_comment_manager))



## Test to get information about a Group
### problems to get the id of a group - probably scraping better
### problems to get the feed of a Group. Probably scraping better



In [None]:
# get group id by name

group_name = "lonelyplanettraveling"  # Replace with the actual group name
group_id = FbUtils.get_group_id(group_name)

if group_id:
    print(f"Found Group ID for {group_name}: {group_id}")
else:
    print(f"Could not find Group ID for {group_name}.")

## Test to grab the latests posts from any of my pages (num_posts)

In [None]:
try:
    latest_posts = fb_post_manager.get_latest_posts(page_id, num_posts=3)

    if latest_posts:  # Check if the list is not empty
        print("Latest posts retrieved successfully:")
        for post in latest_posts:
            print(f"  ID: {post['id']}")
            print(
                f"  Message: {post.get('message', 'No message available')}"
            )  # Use get() to handle missing messages
            print(f"  Post link: {post['permalink_url']}")
            print(f"  Likes Count: {post['likes']['summary']['total_count']}")
            print(f"  Comments Count: {post['comments']['summary']['total_count']}")
            print(
                "---------------------------------------------------------------------"
            )

            # You can print other fields here as well
            # ----------------------------------------------------------------------------
            # NOTE NOTE NOTE MAY NEED TO Decode message text before printing JSON
            # ----------------------------------------------------------------------------

            import json

            print(json.dumps(post, indent=4))  # Pretty-print each post individually

    else:
        print("No latest posts found or error retrieving posts.")
except Exception as e:  # Catching general exceptions for this example
    print(f"Error retrieving latest posts: {e}")

## Test to retrieve a post and its media by post id

Any user or just my own????

In [None]:
post_id = "379955691858248_122132752322362482"  # Replace with the actual post ID
specific_post = fb_post_manager.get_post_by_id(post_id)
if specific_post:
    import json

    print(json.dumps(specific_post, indent=4))

    if "attachments" in specific_post:
        attachments = specific_post["attachments"]["data"]
        for attachment in attachments:
            media_type = attachment.get("media_type")
            if media_type == "photo":
                photo_url = attachment["media"]["image"]["src"]
                print(f"Found photo attachment: {photo_url}")
            elif media_type == "video":
                video_url = attachment["media"]["source"]
                print(f"Found video attachment: {video_url}")
            # Add other media types (e.g., "link", "album") if needed
    else:
        print("No attachments found in this post.")
else:
    print("No post found or error retrieving post.")

## Test to retrieve likes information by post id

### Ojo puede no funcionr, pero al extraer toda la informacion del ultio post, si incluye los likes



In [None]:
post_id = "379955691858248_122132752322362482"  # Replace with the actual post ID

likes = fb_post_manager.get_post_likes(post_id)

print(f"Post ID: {post_id}")
print(f"Number of Likes: {len(likes)}")
for like in likes:
    if "unknown" in like:
        print(f"Liked by: Unknown user")
    else:
        print(f"Liked by: {like.get('name', 'Unknown Name')}")

## Test to retrieve information regarding post shares


In [None]:
post_id = "379955691858248_122132752322362482"  # Replace with the actual post ID
shares = fb_post_manager.get_post_shares(post_id)
print(type(shares))
for item in shares:
    pprint(item)  # Pretty-print each item

# Tests to interact with comments

## Test to read and print the comments of a specific post

In [None]:
post_id = input("Enter the post ID: ")  # You can prompt for the post ID

# Retrieve comments with specified fields
comments = fb_comment_manager.get_post_comments(
    post_id,
    fields=[
        "id",
        "message",
        "created_time",
        "from",
        "like_count",
        "parent",
        "user_likes",
        "reactions",
    ],
)

if comments:
    # Process the retrieved comments
    for comment in comments:
        print(f"Comment ID: {comment['id']}")

        # Modified Author Line (handling empty dictionary)
        author_info = comment.get("from", {})
        if (
            isinstance(author_info, dict) and author_info
        ):  # Check if it's a non-empty dictionary
            author_name = author_info.get(
                "name", "Unknown Author"
            )  # Get name or default
        else:
            author_name = "Unknown Author (ID: Unknown ID)"

        print(f"Author: {author_name}")

        print(f"Message: {comment['message']}")
        print(f"Created Time: {comment['created_time']}")
        print(f"Likes: {comment.get('like_count', 0)}")

        # Check if it's a reply
        if "parent" in comment:
            print(f"Reply to: {comment['parent']['id']}")
        else:
            print("Not a reply")

        # Check if liked by the user
        if comment.get("user_likes"):
            print("Liked by you")
        else:
            print("Not liked by you")

        print("-" * 20)  # Separator
else:
    print("No comments found or an error occurred.")

In [None]:
pprint(comments)

## Test to react to comments

In [None]:
if comments:
    for comment in comments:
        print(f"Comment ID: {comment['id']}")
        print(f"Message: {comment['message']}")

    while True:  # Loop to keep prompting for reactions until user wants to quit
        comment_id_to_react = input(
            "Enter the comment ID to react to (or type 'q' to quit): "
        )
        if comment_id_to_react.lower() == "q":
            break

        message = input("Enter the answer text (or press Enter to skip reply): ")

        # Improved like input handling
        while True:
            like_input = input("Do you want to like the comment? (y/n): ")
            if like_input.lower() in ["y", "n"]:
                like = like_input.lower() == "y"
                break
            else:
                print("Invalid input. Please enter 'y' for yes or 'n' for no.")

        response = fb_comment_manager.react_to_comment(
            comment_id_to_react, message=message if message else None, like=like
        )

        if response:
            print("Reaction successful:", response)
        else:
            print("Failed to react to the comment.")

## Test to publish a message post

In [None]:
# this checks how it includes the instagram or any other link.
# The link is included as a link in the post, pero no se ve el reel de instagram

# Publish a text post
text_post_message = "Happy water? \n" "  https://www.instagram.com/reel/DAn7I2OuzbA/"
try:
    text_post_result = fb_post_manager.publish_text_post(page_id, text_post_message)
except Exception as e:  # Catching general exceptions for this example
    print(f"Error publishing text post: {e}")

print("Text post result:", text_post_result)

Test to publish a one photo post with a message

In [None]:
photo_post_message = "El Rocio so full with horses"
photo_path = r"C:\Users\manue\Documents\Disfrutando Huelva (DH)\Agrupados para editar\Rocio - Todos\Caballos carros y carretas en el rocio\20240518_131042~2.jpg"
try:
    photo_post_result = fb_post_manager.publish_photo_post(
        page_id, photo_post_message, photo_path
    )
except Exception as e:  # Catching general exceptions for this example
    print(f"Error publishing photo post: {e}")

# Print results

print("Photo post result:", photo_post_result)

test to share a third party post by its id on my feed. Pending on getting the post id

In [None]:
post_id_to_share = ""
post_reel_message = " I like it!"

result = fb_post_manager.share_public_post(page_id, post_id_to_share, post_reel_message)

Test to share a third party reel by its id on my feed

In [None]:
reel_id_to_share = "503813719241668"
share_reel_message = "impressive"

result = fb_post_manager.share_public_reel(
    page_id, reel_id_to_share, share_reel_message
)  # message is optional

test to share a third party video by its id

In [None]:
video_id_to_share = "521378570537970"
share_video_message = " wooowwww!!!! "
result = fb_post_manager.share_public_video(
    page_id, video_id_to_share, share_video_message
)

# Utils Tests


##  def get_page_id (scraping)

## Get the id of a page by its name

In [None]:
page_name = "DisfrutandoHuelva"  # Replace with the actual Page name
user_id = FbUtils.get_page_id(page_name)

if user_id:
    print(f"Found Page ID for {page_name}: {user_id}")
else:
    print(f"Could not find User ID for {page_name}.")

Get the latest post of a page by page_id

In [5]:
# Get user ID, page ID, and group ID using fb_scraper methods

user_name = "intheskytravel04"  # Replace with the actual username
page_name = "travel.bg"  # Replace with the actual page name
group_name = "Traveling Around the World Group"  # Replace with the actual group name

# Retrieve IDs
user_id = FbScraper.get_user_id(user_name)
page_id = FbScraper.get_page_id(page_name)
group_id = FbScraper.get_group_id(group_name)

# Print the results
print(f"User ID for '{user_name}': {user_id}")
print(f"Page ID for '{page_name}': {page_id}")
print(f"Group ID for '{group_name}': {group_id}")

DEBUG:facebook_scraper.facebook_scraper:Requesting page from: intheskytravel04
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): m.facebook.com:443
DEBUG:urllib3.connectionpool:https://m.facebook.com:443 "GET /intheskytravel04 HTTP/1.1" 200 None
ERROR:facebook_scraper.facebook_scraper:Unable to extract top_post <class 'AssertionError'>:
ERROR:facebook_scraper.facebook_scraper:Friend_count extraction failed: list index out of range
ERROR:facebook_scraper.facebook_scraper:Follower_count extraction failed: 'NoneType' object has no attribute 'text'
DEBUG:facebook_scraper.facebook_scraper:Fetching /intheskytravel04?v=following
DEBUG:urllib3.connectionpool:https://m.facebook.com:443 "GET /intheskytravel04?v=following HTTP/1.1" 200 None
ERROR:facebook_scraper.facebook_scraper:Following_count extraction failed: 'NoneType' object has no attribute 'text'
DEBUG:facebook_scraper.facebook_scraper:Requesting page from: https://m.facebook.com/intheskytravel04/about/
DEBUG:urllib3.connec

Error retrieving group ID for Traveling Around the World Group: Your request couldn't be processed
User ID for 'intheskytravel04': None
Page ID for 'travel.bg': None
Group ID for 'Traveling Around the World Group': None


User ID for 'intheskytravel04': None
Page ID for 'travel.bg': None
Group ID for 'Traveling Around the World Group': None

## Test for Hiker API This is only for
Instagram accounts

get the FB page from IG accounts that I suggest


In [None]:
%pip install --upgrade hikerapi




In [None]:
from hikerapi import Client

hiker_api_key = os.environ["HikerAPI_key"]
cl = Client(token=hiker_api_key)

In [None]:
print(type(cl))

In [None]:
print(hiker_api_key)

### Hiker API - Get user info

In [None]:
user_info = cl.user_by_username_v2("huelvafoodie")

pprint(user_info)

## Extract the most important Huelva IG influencers profiles by name
##         including their FB id
### store them in a JSON file

"""Extracts and analyzes Instagram user information from a JSON file.

This class provides methods to process Instagram user data that has been
fetched using the HikerAPI and saved in a JSON file. It can extract
relevant information, create a Pandas DataFrame, and save it as a CSV file.

Methods:
    extract_info_data_from_json(input_file): Extracts user data from a JSON file
        and returns a DataFrame.
    save_to_csv(df, output_file): Saves a DataFrame containing user data to a CSV file.
"""


In [None]:
import os
import json
from hikerapi import Client


def fetch_and_save_profiles(usernames, output_file="instagram_profiles.json"):
    """Fetches user profiles from HikerAPI and saves the data to a JSON file.

    Args:
        usernames: A list of Instagram usernames to fetch.
        output_file: The path to the JSON file where data will be saved.
    """

    # Load your HikerAPI key securely from an environment variable
    hiker_api_key = os.environ.get("HikerAPI_key")
    if not hiker_api_key:
        raise ValueError("HikerAPI_key environment variable not found.")

    # Create the HikerAPI client
    cl = Client(token=hiker_api_key)

    all_profile_data = []

    for username in usernames:
        try:
            user_info = cl.user_by_username_v2(username)

            # If the request is successful, add it to the list
            if user_info["status"] == "ok":
                all_profile_data.append(user_info)
            else:
                print(f"Error fetching data for {username}: {user_info.get('error')}")
        except Exception as e:
            print(f"Error fetching data for {username}: {e}")

    # Save the data to a JSON file
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(all_profile_data, f, ensure_ascii=False, indent=4)

In [None]:
# call the function to fetch the information
usernames_to_fetch = [
    "capturolavida_emn",
    "huelvafoodie",
    "laesenciadehuelva",
    "huelvagram",
    "agendahuelva",
    "huelva_secreta",
    "huelva.explore",
    "huelva24com",
    "huelvainformacion",
    "huelvahoy",
    "huelvaoriginal",
]

# Call the function to fetch and save profiles
fetch_and_save_profiles(
    usernames_to_fetch
)  # add arg for output_file - default "instagram_profiles.json"

## Extract relevant information from the JSON and store it in a pandas data frame
## and, latter, store in a .csv file
## to access it with Excel or Google Sheets

In [None]:
import os
import json
import pandas as pd


def extract_profile_data_from_json(
    input_file="instagram_profiles.json", output_file="instagram_profiles.csv"
):
    """Loads Instagram profile data from a JSON file, extracts relevant fields,
    creates a Pandas DataFrame, and exports it to a CSV file.

    Args:
        input_file: The path to the JSON file containing the profile data.
        output_file: The path to save the CSV file.
    """

    with open(input_file, "r", encoding="utf-8") as f:
        all_profile_data = json.load(f)

    extracted_data = []

    for profile in all_profile_data:
        if profile["status"] == "ok":
            user = profile["user"]
            extracted_data.append(
                {
                    "username": user["username"],
                    "full_name": user["full_name"],
                    "instagram_id": user.get(
                        "pk", user.get("pk_id")
                    ),  # Extract the ID (either "pk" or "pk_id")
                    "category": user["category"],
                    "biography": user["biography"],
                    "followers": user["follower_count"],
                    "following": user["following_count"],
                    "is_private": user["is_private"],
                    "is_business": user["is_business"],
                    "is_verified": user["is_verified"],
                    "media_count": user["media_count"],
                    "fb_page_id": str(user["page_id"]),
                    "fb_page_name": user["page_name"],
                    "profile_pic_url": user["profile_pic_url"],
                    "external_url": user.get(
                        "external_url"
                    ),  # Use get() to handle missing keys
                    "bio_links": [
                        link["url"] for link in user.get("bio_links", [])
                    ],  # Extract URLs from bio links
                }
            )

    # Create a DataFrame from the extracted data
    df = pd.DataFrame(extracted_data)

    # Export the DataFrame to a CSV file (for Google Sheets)
    df.to_csv(output_file, index=False)


# Example Usage:
extract_profile_data_from_json()  # Using default file names
# input_file="instagram_profiles.json",
# output_file="instagram_profiles.csv"