# Tasks for laboratory assignment 1

In [18]:
# imports section

import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from datetime import datetime
import csv
import numpy as np
import matplotlib.pyplot as plt

## Extract webpage data given the url

Create a Python script that performs basic web scraping on a page to extract all the information into text and returns it as a string.
String should not contain tags.

In [None]:
import requests
from bs4 import BeautifulSoup

def parse_web_page(url):
    """
    Fetch the content of the given web page.

    Args:
        url (str): The URL of the web page to fetch.

    Returns:
        str: The content of the page as a string without HTML tags.

    Raises:
        requests.exceptions.HTTPError: If the HTTP request returned an unsuccessful status code.
    """
    # Заголовки, що імітують браузер, для уникнення блокування
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/142.0.0.0 Safari/537.36"
    }
    
    try:
        
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status() 
        soup = BeautifulSoup(response.text, 'html.parser')
        text = soup.get_text(separator=' ', strip=True)
        clean_text = ' '.join(text.split())
        
        return clean_text

    except requests.exceptions.HTTPError as http_err:
        print(f"❌ HTTP Помилка: {http_err}")
        return f"[ПОМИЛКА] Не вдалося завантажити сторінку {url}"
    except requests.exceptions.RequestException as req_err:
        print(f"❌ Помилка запиту: {req_err}")
        return f"[ПОМИЛКА] Помилка підключення до {url}"


# ТЕСТУВАННЯ 
print(parse_web_page('https://fmi.chnu.edu.ua/')[:255])
print(parse_web_page('https://en.wikipedia.org/wiki/Web_scraping')[:255])
print(parse_web_page('https://books.toscrape.com/')[:255])

## Get data from the API

Create a python script that performs basic request to API endpoint and saves that data to a JSON file `result.json`.

In [None]:
import requests
import json

def parse_api(api_url):
    """
    Fetch the data of the given API endpoint and save it to result.json.

    Args:
        api_url (str): The URL of the API endpoint.

    Returns:
        None.

    Raises:
        requests.exceptions.HTTPError: If the HTTP request returned an unsuccessful status code.
    """
    try:
        response = requests.get(api_url, timeout=10)
        response.raise_for_status()  

        data = response.json()

        with open('result.json', 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=4)

        print(f"Дані успішно збережено з {api_url}")

    except requests.exceptions.HTTPError as e:
        print(f"HTTP помилка: {e}")
    except requests.exceptions.RequestException as e:
        print(f"Помилка запиту: {e}")
    except json.JSONDecodeError:
        print(f"Помилка декодування JSON: Відповідь від {api_url} не є коректним JSON.")
    except Exception as e:
        print(f"Несподівана помилка: {e}")

# ТЕСТУВАННЯ
parse_api('https://api.github.com/')
parse_api('https://example.com/')

## Parse the json file

Parse the `weather.json` file and return weather data for a specific date, that is given as a parameter. Return the data as an array.

In [None]:
import json
import os


def parse_json(date):
    """
    Parse the data from weather.json file and return weather data for a given date.

    Args:
        date (str): The date for which we look up the weather.

    Returns:
        list: a list of weather data for a given date.
    """
    filename = 'weather.json'
    dir_name = 'resources'
    file_path = os.path.join(dir_name, filename)

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            all_weather_data = json.load(f)

        if not isinstance(all_weather_data, list):
            print("❌ Помилка: Неочікуваний формат даних (очікувався список).")
            return []
            
        result_list = []
        for entry in all_weather_data:
            if isinstance(entry, dict) and entry.get('date') == date:
                result_list.append(entry)

        return result_list

    except FileNotFoundError:
        print(f"❌ Файл {file_path} не знайдено.")
        return []
    except json.JSONDecodeError:
        print(f"❌ Помилка декодування JSON у файлі {file_path}.")
        return []
    except Exception as e:
        print(f"❌ Виникла несподівана помилка: {e}")
        return []

# ТЕСТУВАННЯ
target_date = '2024-8-19'
print(parse_json(target_date))

## Parse the csv file

Parse the `weather.csv` file and return weather data for a specific date, that is given as a parameter. Return the data as an array.

In [None]:
import csv
import os


def parse_csv(date):
    """
    Parse the data from weather.csv file and return weather data for a given date.

    Args:
        date (str): The date for which we look up the weather.

    Returns:
        list: a list of weather data for a given date.
    """
    
    file_path = os.path.join('resources', 'weather.csv')

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            
           
            result_list = [
                row for row in reader if row.get('CET') == date
            ]
            
            return result_list
            
    except FileNotFoundError:
        print(f"❌ Файл {file_path} не знайдено. Перевірте шлях до папки resources.")
        return []
    except Exception as e:
        print(f"❌ Виникла помилка під час парсингу CSV: {e}")
        return []

# ТЕСТУВАННЯ
target_date = '1997-5-22'
print(f"Результат для {target_date}:")
print(parse_csv(target_date))

# Додатковий тест 
target_date_exist = '1997-1-2' 
print(f"\nРезультат для {target_date_exist} (якщо є у файлі):")
print(parse_csv(target_date_exist))

## Visualize data

Visualize the `weather.csv` data using matplotlib. Choose your own approach to data visualization. Save the results (as `.png`, `.webp` files etc., your choise) in this repository. 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

def visualize_data():
    """
    Parse the data from weather.csv file and visualize it using Matplotlib. Use more then one visualization. 
    Save the results in the repository.

    Args:
        None: None.

    Returns:
        None: None.
    """
    file_path = os.path.join('resources', 'weather.csv')
    
    try:
        data = pd.read_csv(file_path)
        
        
        date_col = 'CET'
        temp_col = 'Max TemperatureC'
        wind_col = 'Max Wind SpeedKm/h'
        
        
        data[date_col] = pd.to_datetime(data[date_col])
        
        
        plt.figure(figsize=(8, 6))
        plt.scatter(data[temp_col], data[wind_col], alpha=0.6, color='darkred', edgecolors='white', linewidths=0.5)
        plt.title('Max Temperature vs. Max Wind Speed')
        plt.xlabel('Max Temperature (°C)')
        plt.ylabel('Max Wind Speed (Km/h)')
        plt.grid(True, linestyle='--', alpha=0.6)
        plt.tight_layout()
        plt.savefig('temp_wind_scatter.png')
        plt.close()
        
        plt.figure(figsize=(6, 5))
        plt.boxplot(data[temp_col], vert=False, patch_artist=True, 
                    boxprops=dict(facecolor='lightblue', color='blue'),
                    medianprops=dict(color='red'))
        plt.title('Distribution of Max Temperature')
        plt.xlabel('Max Temperature (°C)')
        plt.yticks([1], [temp_col])
        plt.tight_layout()
        plt.savefig('max_temperature_boxplot.png')
        plt.close()
        
        print("Візуалізації збережено: 'temp_wind_scatter.png' та 'max_temperature_boxplot.png'.")
        
    except FileNotFoundError:
        print(f"❌ Файл {file_path} не знайдено.")
    except KeyError as e:
        print(f"❌ Не знайдено необхідну колонку у CSV: {e}. Перевірте назви колонок.")
    except Exception as e:
        print(f"❌ Виникла помилка: {e}")

visualize_data()