In [8]:
import requests
import difflib
import json

In [9]:
def fetch_data(api_url):
    data = []
    page = 1
    while True:
        response = requests.get(f"{api_url}?page={page}")
        if response.status_code != 200:
            break
        try:
            # If the response content is a string, parse it as JSON
            json_data = json.loads(response.content.decode('utf-8'))
        except json.JSONDecodeError:
            break
        if not json_data:
            break
        data.extend(json_data)
        page += 1
    return data

In [10]:
def find_citations(response_text, sources):
    citations = []
    for source in sources:
        if isinstance(source, str):
            context = source
        else:
            context = source.get('context', '')
        if context and difflib.SequenceMatcher(None, response_text, context).ratio() > 0.5:
            citation = {
                "id": source.get('id') if isinstance(source, dict) else None,
                "link": source.get('link', '') if isinstance(source, dict) else ''
            }
            citations.append(citation)
    return citations

In [11]:
def process_responses(data):
    all_citations = []
    for item in data:
        if isinstance(item, dict):
            response_text = item.get('response', '')
            sources = item.get('sources', [])
        else:
            response_text = item
            sources = []
        citations = find_citations(response_text, sources)
        all_citations.append({
            "response": response_text,
            "citations": citations
        })
    return all_citations

In [None]:
def main():
    api_url = 'https://devapi.beyondchats.com/api/get_message_with_sources'
    data = fetch_data(api_url)
    processed_data = process_responses(data)
    # Print the processed data
    print(json.dumps(processed_data, indent=2))

    # Optionally write the output to a file
    with open('citations_output.json', 'w') as f:
        json.dump(processed_data, f, indent=2)

if __name__ == '__main__':
    main()
