## Exercises: Day 20

### Python PIP - Python Package Manager

In [5]:
#1
import requests
from collections import Counter
import re

romeo_and_juliet_url = 'http://www.gutenberg.org/files/1112/1112.txt'

def find_most_frequent_words(url, n=10):
    response = requests.get(url)
    if response.status_code == 200:
        text = response.text
        words = re.findall(r'\b\w+\b', text.lower())
        word_counts = Counter(words)
        return word_counts.most_common(n)
    else:
        return None

# Task 1
romeo_and_juliet_top_words = find_most_frequent_words(romeo_and_juliet_url, 10)
print("Task 1: 10 Most Frequent Words in 'Romeo and Juliet':")
print(romeo_and_juliet_top_words)


Task 1: 10 Most Frequent Words in 'Romeo and Juliet':
None


In [37]:
#2
import requests
import statistics
from collections import Counter

cats_api_url = 'https://api.thecatapi.com/v1/breeds'

def process_weight(weight_str):
    try:
        weight = float(weight_str.split()[0])
        return weight
    except ValueError:
        return None

def process_lifespan(lifespan_str):
    try:
        lifespan_values = [int(value) for value in lifespan_str.split() if value.isdigit()]
        if len(lifespan_values) > 0:
            return max(lifespan_values)  # Taking the maximum value if multiple values are present
        else:
            return None
    except ValueError:
        return None

def analyze_cats_api(url):
    response = requests.get(url)
    if response.status_code == 200:
        cat_data = response.json()

        # Process weights and lifespans
        weights = [process_weight(cat.get('weight', {}).get('metric', '')) for cat in cat_data]
        lifespans = [process_lifespan(cat.get('life_span', '')) for cat in cat_data]

        # Remove None values from weights and lifespans
        weights = [weight for weight in weights if weight is not None]
        lifespans = [lifespan for lifespan in lifespans if lifespan is not None]

        # Task 1: Weight Analysis
        weight_min = min(weights)
        weight_max = max(weights)
        weight_mean = statistics.mean(weights)
        weight_median = statistics.median(weights)
        weight_stddev = statistics.stdev(weights)

        # Task 2: Lifespan Analysis
        lifespan_min = min(lifespans)
        lifespan_max = max(lifespans)
        lifespan_mean = statistics.mean(lifespans)
        lifespan_median = statistics.median(lifespans)
        lifespan_stddev = statistics.stdev(lifespans)

        # Task 3: Frequency Table of Country and Breed
        country_breed_freq = Counter((cat.get('origin', ''), cat.get('name', '')) for cat in cat_data)

        return (weight_min, weight_max, weight_mean, weight_median, weight_stddev), \
               (lifespan_min, lifespan_max, lifespan_mean, lifespan_median, lifespan_stddev), \
               country_breed_freq
    else:
        return None


weight_stats, lifespan_stats, country_breed_freq = analyze_cats_api(cats_api_url)
print("\nCats' Weight Statistics (in metric units):")
print(weight_stats)
print("\nCats' Lifespan Statistics (in years):")
print(lifespan_stats)
print("\nFrequency Table of Country and Breed:")
print(country_breed_freq)



Cats' Weight Statistics (in metric units):
(2.0, 5.0, 3.2238805970149254, 3.0, 0.8845628182703051)

Cats' Lifespan Statistics (in years):
(12, 20, 15.417910447761194, 15, 1.6343774065406076)

Frequency Table of Country and Breed:
Counter({('Egypt', 'Abyssinian'): 1, ('Greece', 'Aegean'): 1, ('United States', 'American Bobtail'): 1, ('United States', 'American Curl'): 1, ('United States', 'American Shorthair'): 1, ('United States', 'American Wirehair'): 1, ('United Arab Emirates', 'Arabian Mau'): 1, ('Australia', 'Australian Mist'): 1, ('United States', 'Balinese'): 1, ('United States', 'Bambino'): 1, ('United States', 'Bengal'): 1, ('France', 'Birman'): 1, ('United States', 'Bombay'): 1, ('United Kingdom', 'British Longhair'): 1, ('United Kingdom', 'British Shorthair'): 1, ('Burma', 'Burmese'): 1, ('United Kingdom', 'Burmilla'): 1, ('United States', 'California Spangled'): 1, ('United States', 'Chantilly-Tiffany'): 1, ('France', 'Chartreux'): 1, ('Egypt', 'Chausie'): 1, ('United State

In [38]:
#3
import requests

countries_api_url = 'https://restcountries.eu/rest/v2/all'

def analyze_countries_api(url):
    response = requests.get(url)
    if response.status_code == 200:
        countries_data = response.json()

        # Task 1: 10 Largest Countries
        largest_countries = sorted(countries_data, key=lambda x: x['area'], reverse=True)[:10]

        # Task 2: 10 Most Spoken Languages
        languages = [language['name'] for country in countries_data for language in country['languages']]
        most_spoken_languages = [language[0] for language in Counter(languages).most_common(10)]

        # Task 3: Total Number of Languages
        total_languages = len(set(languages))

        return largest_countries, most_spoken_languages, total_languages
    else:
        return None

# Task 3
largest_countries, most_spoken_languages, total_languages = analyze_countries_api(countries_api_url)
print("\n10 Largest Countries:")
print(largest_countries)
print("\n10 Most Spoken Languages:")
print(most_spoken_languages)
print("\nTotal Number of Languages:")
print(total_languages)


KeyboardInterrupt: 

In [19]:
import requests
from bs4 import BeautifulSoup

uci_url = 'https://archive.ics.uci.edu/ml/datasets.php'

def get_uci_datasets(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        datasets = [a.text.strip() for a in soup.select('p a')]
        return datasets
    else:
        return None


uci_datasets = get_uci_datasets(uci_url)
print("\nUCI Datasets:")
print(uci_datasets)



UCI Datasets:
None
