In [None]:
# Module 15 - Python Package Manager Exercises
# This cell includes imports and any setup needed.

import requests
import re
from collections import Counter
import json
import math
from bs4 import BeautifulSoup

In [None]:
# Exercise 1: Romeo and Juliet - 10 Most Frequent Words

def most_frequent_words(url, num_words=10):
    try:
        response = requests.get(url)
        response.raise_for_status() # Raise HTTPError for bad responses
        text = response.text.lower()
        words = re.findall(r'\b\w+\b', text)
        word_counts = Counter(words)
        return word_counts.most_common(num_words)
    except requests.exceptions.RequestException as e:
        print(f"Error fetching or processing URL: {e}")
        return None

romeo_and_juliet_url = 'http://www.gutenberg.org/files/1112/1112.txt'
top_words = most_frequent_words(romeo_and_juliet_url)

if top_words:
    print("Romeo and Juliet - 10 Most Frequent Words:")
    for word, count in top_words:
        print(f"{word}: {count}")

In [None]:
# Exercise 2: Cats API - Weight and Lifespan Stats

def calculate_stats(data, key):
    values = [float(item[key]) for item in data if item[key] is not None and item[key] != 'NaN']
    if not values:
      return None, None, None, None, None

    min_val = min(values)
    max_val = max(values)
    mean_val = sum(values) / len(values)
    sorted_values = sorted(values)
    mid = len(sorted_values) // 2
    median_val = (sorted_values[mid - 1] + sorted_values[mid]) / 2 if len(sorted_values) % 2 == 0 else sorted_values[mid]
    variance = sum((x - mean_val) ** 2 for x in values) / len(values)
    std_val = math.sqrt(variance)
    return min_val, max_val, mean_val, median_val, std_val


def cats_stats(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        weights = []
        lifespans = []
        for cat in data:
          if 'weight' in cat and 'metric' in cat['weight']:
            try:
              weights.append(float(cat['weight']['metric'].split(" - ")[0]))#some are formatted as "x - y"
            except:
              pass #ignore if not able to process.
          if 'life_span' in cat:
            try:
              lifespans.append(float(cat['life_span'].split(" - ")[0]))#some are formatted as "x - y"
            except:
              pass #ignore if not able to process.

        weight_stats = calculate_stats(data, 'metric')
        lifespan_stats = calculate_stats(data, 'life_span')

        if weight_stats[0] is not None:
          print("\nCat Weights (Metric):")
          print(f"  Min: {weight_stats[0]:.2f}")
          print(f"  Max: {weight_stats[1]:.2f}")
          print(f"  Mean: {weight_stats[2]:.2f}")
          print(f"  Median: {weight_stats[3]:.2f}")
          print(f"  Std Dev: {weight_stats[4]:.2f}")

        if lifespan_stats[0] is not None:
          print("\nCat Lifespans (Years):")
          print(f"  Min: {lifespan_stats[0]:.2f}")
          print(f"  Max: {lifespan_stats[1]:.2f}")
          print(f"  Mean: {lifespan_stats[2]:.2f}")
          print(f"  Median: {lifespan_stats[3]:.2f}")
          print(f"  Std Dev: {lifespan_stats[4]:.2f}")

    except requests.exceptions.RequestException as e:
        print(f"Error fetching or processing Cat API: {e}")
    except Exception as e:
      print(f"Error processing: {e}")


cats_api_url = 'https://api.thecatapi.com/v1/breeds'
cats_stats(cats_api_url)

In [None]:
# Exercise 3: Cats API - Frequency Table of Country and Breed

def create_frequency_table(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        freq_table = {}
        for cat in data:
            country = cat.get('origin', 'Unknown')
            breed = cat.get('name', 'Unknown')

            if country in freq_table:
              if breed in freq_table[country]:
                freq_table[country][breed] += 1
              else:
                freq_table[country][breed] = 1
            else:
              freq_table[country] = {breed: 1}


        print("\nCat Country and Breed Frequency Table:")
        for country, breeds in freq_table.items():
            print(f"  {country}:")
            for breed, count in breeds.items():
                print(f"    - {breed}: {count}")
    except requests.exceptions.RequestException as e:
         print(f"Error fetching or processing Cat API: {e}")


cats_api_url = 'https://api.thecatapi.com/v1/breeds'
create_frequency_table(cats_api_url)

In [None]:
# Exercise 4: Countries API - Largest Countries and Languages
def country_stats(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        countries = response.json()

        # 10 Largest Countries
        sorted_countries = sorted(countries, key=lambda country: country.get('area', 0), reverse=True)[:10]
        print("\n10 Largest Countries:")
        for country in sorted_countries:
            print(f"  {country['name']['common']}: {country.get('area', 'N/A')} sq km")

        # 10 Most Spoken Languages
        language_counts = {}
        for country in countries:
            if 'languages' in country:
              for language in country['languages'].values():
                  language_counts[language] = language_counts.get(language, 0) + 1

        sorted_languages = sorted(language_counts.items(), key=lambda item: item[1], reverse=True)[:10]
        print("\n10 Most Spoken Languages:")
        for language, count in sorted_languages:
            print(f"  {language}: {count}")

        # Total Number of Languages
        total_languages = sum(len(country.get('languages', {})) for country in countries)
        print(f"\nTotal Number of Languages: {total_languages}")

    except requests.exceptions.RequestException as e:
        print(f"Error fetching or processing Countries API: {e}")

countries_api_url = 'https://restcountries.com/v3.1/all'
country_stats(countries_api_url)

In [None]:
# Exercise 5: UCI Datasets - Web Scraping

def fetch_uci_datasets(url):
  try:
    response = requests.get(url)
    response.raise_for_status()
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find('table', {'border':'1', 'cellpadding':'3'})
    if table:
       rows = table.find_all('tr')
       for row in rows[1:]:
        cells = row.find_all('td')
        if len(cells) >= 2:
          print(f"dataset: {cells[1].text}")
    else:
        print("No table was found with those attributes.")
  except requests.exceptions.RequestException as e:
    print(f"Error fetching UCL page: {e}")
  except Exception as e:
      print(f"An error has occurred: {e}")


uci_url = 'https://archive.ics.uci.edu/ml/datasets.php'
fetch_uci_datasets(uci_url)