# This notebook was created to get the category IDs for the categories/courses that we want to analyse. For example, mlp<-->33

In [None]:
import pandas as pd
import yaml
with open("..\key.yaml", "r") as file:
    api_keys = yaml.safe_load(file)
# api_keys

In [None]:
import requests
import pandas as pd
import time
import json  # Importing json to handle JSON data

def execute_query(query_id, query_params=None):
    DISCOURSE_BASE_URL = "https://discourse.onlinedegree.iitm.ac.in"
    GROUP_NAME = "discourse_analytics"
    API_KEY_GLOBAL= api_keys["API_KEY"]
    API_USERNAME = 'shubhamG'

    iteration_count = 0  # Initialize iteration counter
    results_list = []  # List to store results
    has_more_results = True  # Flag to control the loop for pagination

    # Check if query_params is provided
    if query_params is None:
        pass  # No parameters provided, continue with default
    else:
        # Ensure query_params is a dictionary
        if not isinstance(query_params, dict):
            raise ValueError("Query parameters must be a dictionary.")

    # Set up headers for the API request
    headers = {
        "Accept": "*/*",
        "Api-Key": API_KEY_GLOBAL,  # Get API key from userdata
        "Api-Username": "shubhamG",  # Set the username for the API
        "Content-Type": "multipart/form-data"  # Set content type
    }

    # Loop until there are no more results
    while has_more_results:
        # Construct the request URL for the API
        request_url = f"{DISCOURSE_BASE_URL}/g/{GROUP_NAME}/reports/{query_id}/run"

        # Prepare the data payload for the request
        if query_params is not None:
            payload = {'page': str(iteration_count)}  # Add page number to payload
            payload.update(query_params)  # Update payload with additional query parameters
            data_payload = 'params=' + json.dumps(payload)  # Convert payload to JSON string
        else:
            data_payload = f'params={{"page": "{iteration_count}"}}'  # Default payload with page number

        try:
            # Send POST request to the API
            print(data_payload)
            response = requests.request("POST", request_url, data=data_payload, headers=headers)
            response.raise_for_status()  # Raise an error for bad responses

            json_response = response.json()  # Parse the JSON response

            # Check if there are no results
            if json_response["result_count"] == 0:
                has_more_results = False  # No more results to fetch
                break

            # Iterate over the rows in the response
            for index in range(len(json_response['rows'])):
                # Append each row as a dictionary to the results list
                results_list.append(dict(zip(json_response['columns'], json_response['rows'][index])))

        except requests.exceptions.RequestException as e:
            # Log request-related errors
            # logging.error(f'Request error: {e}')
            if hasattr(e, 'response') and e.response is not None:
                print(f'Status code: {e.response.status_code}')  # Log status code
                print(f'Error text: {e.response.text}')  # Log error text

            has_more_results = False  # Stop fetching results
            break
        except ValueError as e:
            # Log JSON parsing errors
            print(f'Error parsing JSON: {e}')
            has_more_results = False  # Stop fetching results
            break
        except KeyError as e:
            # Log key-related errors
            print(f'Key error: {e}')
            has_more_results = False  # Stop fetching results
            break

        iteration_count += 1  # Increment iteration count for pagination
        time.sleep(1.4)  # Wait before the next request

    results_dataframe = pd.DataFrame(results_list)  # Convert results list to DataFrame
    return results_dataframe  # Return the DataFrame with results



req_data = execute_query(query_id=107,query_params=None)

params={"page": "0"}
params={"page": "1"}


In [6]:
req_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69 entries, 0 to 68
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   category_id  69 non-null     int64 
 1   name         69 non-null     object
dtypes: int64(1), object(1)
memory usage: 1.2+ KB


In [7]:
req_data.head(5)

Unnamed: 0,category_id,name
0,18,Mathematics for Data Science I
1,19,Statistics for Data Science I
2,20,Computational Thinking
3,21,English I
4,22,English II


In [9]:
req_data.to_csv("all_category_ids.csv", index=False)