API endpoint: https://devapi.beyondchats.com/api/get_message_with_sources

* Above is a paginated GET API which returns an array of objects where each object contains a response text and a corresponding array of sources. 

* source is a JSON array. Each object of the array consists of an id, context, and an optional link.

# Libraries

In [1]:
#Import required Libraries

import re
import requests
import pandas as pd

# Fetching Data from API

In [2]:
#Declare constant URL

url = "https://devapi.beyondchats.com/api/get_message_with_sources"

In [3]:
#Function to fetch the data from URL

def fetch_messages_with_sources(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  
        data = response.json()
        total_pages = data['data']['last_page']
        return data, total_pages
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except Exception as err:
        print(f"Other error occurred: {err}")
        
data, total_pages = fetch_messages_with_sources(url)
print("Total Pages :", total_pages)
print(data)

Total Pages : 13
{'status': 'success', 'status_code': 200, 'message': 'Sample Sources fethced successfully!', 'data': {'current_page': 1, 'data': [{'id': 1, 'response': 'Yes, we offer online delivery services through major platforms like Swiggy and Zomato. You can also order directly from our website!', 'source': [{'id': '71', 'context': 'Order online Thank you for your trust in us! We are available on all major platforms: [Order online Order directly from our website](https://orders.brikoven.com), [Order from Swiggy](https://www.swiggy.com/direct/brand/7389?source=swiggy-direct&subSource=generic), [Order from zomato](https://www.zomato.com/bangalore/delivery?chain=18224650)', 'link': ''}, {'id': '8', 'context': 'Breakfast Reservations\r For Breakfast, we recommend making reservations in advance. \r For walk-ins, we only seat parties on a first come, first served basis. \r Your reservation is confirmed upon you filling the form. \r Done reserving? Check out the menu below! \r https://w

In [4]:
#Function to fetch the data from all the pages using URL and stores data into separate df

def fetch_and_transform_data(num_pages, base_url):
    all_rows = []

    for page in range(1, num_pages + 1):
        url = f"{base_url}?page={page}"
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()

            for item in data['data']['data']:
                message_id = item['id']
                response_text = item['response']
                for source in item['source']:
                    source_id = source['id']
                    source_context = source['context']
                    source_link = source.get('link', None)
                    all_rows.append([message_id, response_text, source_id, source_context, source_link])

        except requests.exceptions.HTTPError as http_err:
            print(f"HTTP error occurred: {http_err}")
        except Exception as err:
            print(f"Other error occurred: {err}")

    df = pd.DataFrame(all_rows, columns=['id', 'response', 'source_id', 'source_context', 'source_link'])
    return df

In [5]:
# Calling the function and store required attributes into df

num_pages = total_pages
df = fetch_and_transform_data(num_pages, url)

In [6]:
df.head(10)

Unnamed: 0,id,response,source_id,source_context,source_link
0,1,"Yes, we offer online delivery services through...",71,Order online Thank you for your trust in us! W...,
1,1,"Yes, we offer online delivery services through...",8,"Breakfast Reservations\r For Breakfast, we rec...",https://www.brikoven.com/reservations
2,1,"Yes, we offer online delivery services through...",159,"Do you give franchise if the brand No, we curr...",
3,1,"Yes, we offer online delivery services through...",157,"8105462986 I'm sorry, but I don't have access ...",
4,1,"Yes, we offer online delivery services through...",73,Order online Thank you for your trust in us! W...,
5,1,"Yes, we offer online delivery services through...",57,Order online Order directly from us - [Order f...,
6,1,"Yes, we offer online delivery services through...",11,I would like to understand what the closest lo...,
7,1,"Yes, we offer online delivery services through...",75,Order online Thank you for your trust in us! ...,
8,1,"Yes, we offer online delivery services through...",9,LOOKING FOR OUR STORE? \r We are available at ...,https://www.brikoven.com/stores-direction-info...
9,1,"Yes, we offer online delivery services through...",2,Hand-Made Fresh Mozzarella cheese\r Made every...,https://www.brikoven.com/deli


In [7]:
df.shape

(1367, 5)

# Identify whether the response for each response-sources pair came from any of the sources and List down the sources from which the response was formed. Returns an empty array if the response did not come from any source. The shortlisted sources will be called citations

In [8]:
#Null Values found before the identify the source links from context

count_null_or_empty = df[df['source_link'].isin([None, ""])].shape[0]
count_null_or_empty

975

In [9]:
#Identify whether the response for each response-sources pair came from any of the sources 

url_pattern = re.compile(r'https?://[^\s)]+')
def find_first_url(text):
    match = url_pattern.search(text)
    return match.group(0) if match else None
for index, row in df.iterrows():
    if df.iloc[index]['source_link'] in [None, ""]:
        found_url = find_first_url(row['source_context'])
        if found_url:
            df.at[index, 'source_link'] = found_url

In [10]:
df.head(10)

Unnamed: 0,id,response,source_id,source_context,source_link
0,1,"Yes, we offer online delivery services through...",71,Order online Thank you for your trust in us! W...,https://orders.brikoven.com
1,1,"Yes, we offer online delivery services through...",8,"Breakfast Reservations\r For Breakfast, we rec...",https://www.brikoven.com/reservations
2,1,"Yes, we offer online delivery services through...",159,"Do you give franchise if the brand No, we curr...",
3,1,"Yes, we offer online delivery services through...",157,"8105462986 I'm sorry, but I don't have access ...",
4,1,"Yes, we offer online delivery services through...",73,Order online Thank you for your trust in us! W...,https://orders.brikoven.com
5,1,"Yes, we offer online delivery services through...",57,Order online Order directly from us - [Order f...,https://www.swiggy.com/direct/brand/7389?sourc...
6,1,"Yes, we offer online delivery services through...",11,I would like to understand what the closest lo...,https://www.brikoven.com/
7,1,"Yes, we offer online delivery services through...",75,Order online Thank you for your trust in us! ...,
8,1,"Yes, we offer online delivery services through...",9,LOOKING FOR OUR STORE? \r We are available at ...,https://www.brikoven.com/stores-direction-info...
9,1,"Yes, we offer online delivery services through...",2,Hand-Made Fresh Mozzarella cheese\r Made every...,https://www.brikoven.com/deli


In [11]:
#Null Values found after the identify the source links from context

count_null_or_empty = df[df['source_link'].isin([None, ""])].shape[0]
count_null_or_empty

759

# Return the citations for all objects coming from the API. 

In [12]:
# Get the citations and list down them

citations = []

for index, row in df.iterrows():
    if row['source_link'] is not None and row['source_link'] != "":
        citation = {
            "id": str(row['source_id']),
            "link": row['source_link']
        }
        citations.append(citation)

In [13]:
len(citations)

608

In [14]:
citations

[{'id': '71', 'link': 'https://orders.brikoven.com'},
 {'id': '8', 'link': 'https://www.brikoven.com/reservations'},
 {'id': '73', 'link': 'https://orders.brikoven.com'},
 {'id': '57',
  'link': 'https://www.swiggy.com/direct/brand/7389?source=swiggy-direct&subSource=generic'},
 {'id': '11', 'link': 'https://www.brikoven.com/'},
 {'id': '9', 'link': 'https://www.brikoven.com/stores-direction-info-copy'},
 {'id': '2', 'link': 'https://www.brikoven.com/deli'},
 {'id': '62', 'link': 'https://wa.me/message/I6HNGDKCG432F1'},
 {'id': '127', 'link': 'https://www.brikoven.com/qr-menu'},
 {'id': '126', 'link': 'https://www.brikoven.com/qr-menu'},
 {'id': '71', 'link': 'https://orders.brikoven.com'},
 {'id': '73', 'link': 'https://orders.brikoven.com'},
 {'id': '115', 'link': 'https://www.brikoven.com/qr-menu'},
 {'id': '4682',
  'link': 'https://www.drmalpani.com/knowledge-center/articles/cost-of-ivf'},
 {'id': '4721',
  'link': 'https://www.drmalpani.com/knowledge-center/articles/cost-of-ivf-t