In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt

def extractAPI() -> pd.DataFrame:
    API_URL = 'http://universities.hipolabs.com/search?country=Indonesia'
    response = requests.get(API_URL)
    if response.status_code == 200:
        api = response.json()
        data_api = pd.DataFrame(api)
        return data_api
    else:
        print("Failed to retrieve data.")
        return pd.DataFrame()  # Return an empty DataFrame in case of error

data_api = extractAPI()

# Verify the actual column names
print("Columns in DataFrame:", data_api.columns)

# Adjust these column names based on actual column names
data_api = data_api[["name", "alpha_code", "province", "domains", "web_pages"]]  # Example column names

data_api["province"] = data_api["province"].fillna('unknown')
data_api["domains"] = data_api["domains"].apply(lambda x: x[0] if len(x) > 0 else 'unknown')

data_api["name_length"] = data_api["name"].apply(len)
data_api["name_word_count"] = data_api["name"].apply(lambda x: len(x.split()))

print(data_api.head())

# Data visualization
if 'web_pages' in data_api.columns:
    # Convert 'web_pages' to string and measure length
    data_api["web_pages"] = data_api["web_pages"].astype(str).apply(len)
    
    # Plotting
    plt.figure(figsize=(12, 6))
    data_api.plot(kind='scatter', x='name', y='web_pages', alpha=0.7, edgecolors='w')
    plt.xlabel('University Name')
    plt.ylabel('Number of Web Pages')
    plt.title('Number of Web Pages by University')
    plt.xticks(rotation=90)
    plt.tight_layout()  # Adjust layout to prevent clipping of labels
    plt.show()
else:
    print("Column 'web_pages' not found or not numeric. Adjust plot settings accordingly.")
