In [12]:
import ipfshttpclient
import whoosh
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT, ID, NUMERIC
from whoosh.qparser import QueryParser
import os
import json


# Load configuration from config.json file
config_path = "config.json"  # Update this path as needed
with open(config_path, "r") as f:
    config = json.load(f)

IPFS_ADDRESS = config["IPFS_ADDRESS"]

# Connect to the IPFS node at a specific IP address and port
ipfs_address = f"/dns/{config['IPFS_ADDRESS']}/tcp/{config['IPFS_PORT']}/http"
client = ipfshttpclient.connect(ipfs_address)


# Connect to local IPFS
# client = ipfshttpclient.connect()

# Define schema for indexing
schema = Schema(
    cid=ID(stored=True),               # The IPFS CID
    name=TEXT(stored=True),            # Filename
    size=NUMERIC(stored=True),         # File size
    filetype=TEXT(stored=True),        # File type (MIME type)
)

# Create the index directory
if not os.path.exists("indexdir"):
    os.mkdir("indexdir")

# Create index in the directory
ix = create_in("indexdir", schema)

# Function to index IPFS content
def index_ipfs_content(cid, filename):
    try:
        # Fetch file stats from IPFS
        stats = client.object.stat(cid)
        file_size = stats['CumulativeSize']
        
        # Fetch file info (you can expand this for more file types)
        filetype = filename.split(".")[-1] if "." in filename else "unknown"

        # Index the content
        writer = ix.writer()
        writer.add_document(
            cid=cid,
            name=filename,
            size=file_size,
            filetype=filetype,
        )
        writer.commit()
        print(f"Indexed {filename} with CID: {cid}")
        
    except Exception as e:
        print(f"Failed to index CID {cid}: {e}")

# Search IPFS index based on keyword
def search_ipfs(keyword):
    with ix.searcher() as searcher:
        query = QueryParser("name", ix.schema).parse(keyword)
        results = searcher.search(query)
        if results:
            for result in results:
                print(f"CID: {result['cid']}, Name: {result['name']}, Size: {result['size']} bytes, File Type: {result['filetype']}")
        else:
            print(f"No results found for '{keyword}'")

# Example usage: Index some files on your local IPFS node
index_ipfs_content("QmXh37WXcrLXkJX2cAPjbPdKZ2cxJXD58XX6eU1Zc41ka4", "upload/Aeroacoustic-airfoil-shape-optimization-enhance_2023_Expert-Systems-with-App.pdf")  # Replace with actual CID and filename
index_ipfs_content("QmSSY49SnmbCZ3oSaTki7CYZe1ZaWZfE1CsWHpt8Ge7acJ", "upload/World_Energy_By_Country_And_Region_1965_to_2023.csv")  # Replace with actual CID and filename
index_ipfs_content("QmdiRawzVNUiB28ENKQ7WefeFLEJ1xMjsJjwtHL2jnJ9xW", "upload/Munafò et al. - 2022 - The reproducibility debate is an opportunity, not .pdf")  # Replace with actual CID and filename

upload/Munafò et al. - 2022 - The reproducibility debate is an opportunity, not .pdf
file_2_CID
QmdiRawzVNUiB28ENKQ7WefeFLEJ1xMjsJjwtHL2jnJ9xW


# Example search
search_ipfs("Mexico")






Indexed upload/Aeroacoustic-airfoil-shape-optimization-enhance_2023_Expert-Systems-with-App.pdf with CID: QmXh37WXcrLXkJX2cAPjbPdKZ2cxJXD58XX6eU1Zc41ka4
Indexed upload/World_Energy_By_Country_And_Region_1965_to_2023.csv with CID: QmSSY49SnmbCZ3oSaTki7CYZe1ZaWZfE1CsWHpt8Ge7acJ
No results found for 'Mexico'
