**Import necessary library**

In [1]:
import requests
import certifi
import json
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib.ticker import FixedLocator
from IPython.display import display, HTML
import numpy as np

**Replace with your API key**

In [2]:
api_key = "255446bcdde7ca9fe776258d09e8411bbb8d1cade2ebd6aba440f80f6817c3fd"

**EX2: The size of character parts from largest to smallest in these 20 plays**  

**Text set = Shakespeare 20 for demos**  
**Segment by text and by character**  
**Output – n=640**  
**Transform and chart – sort by Size, largest to smallest. Chart this row as a scatterplot.**

In [None]:
# Replace with your actual text set ID 
textset_id = 86 

#Character to exlude from search
excludeWords = ["[","\\", "]", "_", "`", "!", "\"", "#", "%", "'", "(", ")", "+", ",", "-", "–", ".", "/", ":", ";", "{", "|", "}", "=", "~", "?" ]

request_url = "https://sia.ardc-hdcl-sia-iaw.cloud.edu.au/api/v1/word-frequencies"
character_parts_request = {
    'textSet': textset_id,
    'option': {
        'segmentByCharacter': True,  #Segment by character
        'blockMethod' : 0,           #Segment by text
        'outputSize': 640,
        'excludeWords': excludeWords,
    }
}

# Make API request
response = requests.post(request_url, json=character_parts_request, headers={"X-API-KEY": api_key}, timeout=1200)

# Create a dictionary to hold character frequencies
character_frequency_map = {}
character_frequency_text_map = {}
text_name = set()
word_type_map = {}
word_size_map = {}

# Handle the response
if response.status_code == 200:
    response_data = response.json()
    blocks = response_data.get("blocks", [])

    for block in blocks:
        freqs = block.get('frequencies', [])
        name = block.get('name' , 'Unknown')
        
        for freq in freqs:
            word = freq.get('word' , "Unknown")
            word_count = freq.get('value', 0)
            
            character_frequency_map[word] = character_frequency_map.get(word, 0) + word_count
            
            # Store the frequency specific to this block
            if word not in character_frequency_text_map:
                character_frequency_text_map[word] = {}
            character_frequency_text_map[word][name] = character_frequency_text_map[word].get(name , 0) + word_count
        
        text_name.add(name)
        word_type_map[name] = block.get('uniqueWordCount' , 0)
        word_size_map[name] = block.get('size' , 0)

    # Sort by size and take the top 640 characters
    sorted_characters = sorted(character_frequency_map.items(), key=lambda x: x[1], reverse=True)
    names = [item[0] for item in sorted_characters]
    sizes = [item[1] for item in sorted_characters]

    # Scatterplot
    plt.figure(figsize=(16, 6))
    ranks = np.arange(1, len(names) + 1)
    plt.scatter(ranks, sizes, c='blue')
    plt.xticks(np.arange(0, 701, 100))  # Set tick marks up to 700
    plt.xlim([0, 700])  # Set x-axis limits up to 700
    plt.xlabel('Rank')
    plt.ylabel('Size of Spoken Part')
    plt.title('Size of Character Parts in 20 Shakespeare Plays')
    plt.show()
    
    # Table dispaly
    html = '<div style="overflow: auto; max-height: 500px; margin-top: 40px;"><table border="1">'
    # Table header
    html += '<tr><th>Word</th>'
    for name in text_name:
        html += f'<th style="white-space: nowrap;">{name}</th>'
    html += '</tr>'
    
    # Adding data rows
    for word in names:
        html += f"<tr><td>{word}</td>"
        for name in text_name:
            frequency = character_frequency_text_map.get(word, {}).get(name, 0)
            html += f"<td>{frequency}</td>"
        html += "</tr>"
        
    # Adding Word Types row
    html += "<tr><td>Word Types</td>"
    for name in text_name:
        word_types = word_type_map.get(name,0)
        html += f"<td>{word_types}</td>"
    html += "</tr>"
        
    # Adding Sizes row
    html += "<tr><td>Size</td>"
    for name in text_name:
        size = word_size_map.get(name,0)
        html += f"<td>{size}</td>"
    html += "</tr>"

    html += '</table></div>'

    display(HTML(html))


else:
    print(f"Failed: {response.status_code} {response.reason}")