In [None]:
#Copernicus climate indicators graphic and metadata
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
import shutil
import re
from google.colab import files

# download an image
def download_image(image_url, folder_path, filename):
    response = requests.get(image_url, stream=True)
    if response.status_code == 200:
        file_path = os.path.join(folder_path, filename)
        with open(file_path, 'wb') as out_file:
            shutil.copyfileobj(response.raw, out_file)
    del response

# clean up the description
def clean_description(description):
    description = re.sub(r'HIGH-RESOLUTION IMAGE', '', description).strip()
    return description

# extract key messages from each indicator page
def extract_key_messages(soup):
    key_messages_div = soup.find('div', class_='key--messages')
    if key_messages_div:
        key_messages_list = key_messages_div.find_all('li')
        key_messages = ' | '.join([li.text.strip() for li in key_messages_list])
        return key_messages
    return 'No key messages'

# base URL
base_url = 'https://climate.copernicus.eu'

# each climate indicator url
indicator_urls = {
    'Temperature': 'https://climate.copernicus.eu/climate-indicators/temperature',
    'Sea level': 'https://climate.copernicus.eu/climate-indicators/sea-level',
    'Greenhouse gas concentrations': 'https://climate.copernicus.eu/climate-indicators/greenhouse-gas-concentrations',
    'Greenhouse gas fluxes': 'https://climate.copernicus.eu/climate-indicators/greenhouse-gas-fluxes',
    'Glaciers': 'https://climate.copernicus.eu/climate-indicators/glaciers',
    'Ice sheets': 'https://climate.copernicus.eu/climate-indicators/ice-sheets',
    'Sea ice': 'https://climate.copernicus.eu/climate-indicators/sea-ice',
    'Sea surface temperature': 'https://climate.copernicus.eu/climate-indicators/sea-surface-temperature'
}

image_folder = 'downloaded_images'
os.makedirs(image_folder, exist_ok=True)


all_data = []

for indicator_name, url in indicator_urls.items():
    response = requests.get(url)
    response.encoding = 'utf-8'
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        key_messages = extract_key_messages(soup)

        for tab_content in soup.find_all('div', class_='paragraph--type--tab'):
            tab_title = tab_content.find('h3').text if tab_content.find('h3') else 'No Title'
            img_tag = tab_content.find('img', class_='align-center')
            if img_tag:
                relative_image_url = img_tag.get('src', '')
                absolute_image_url = base_url + relative_image_url
                image_filename = os.path.basename(relative_image_url)
                download_image(absolute_image_url, image_folder, image_filename)
                local_image_path = os.path.join(image_folder, image_filename)
                description_tag = img_tag.find_next('p')
                description = clean_description(description_tag.text) if description_tag else 'No Description'
                all_data.append([indicator_name, tab_title, local_image_path, description, key_messages])
    else:
        print(f"Failed to retrieve data for {indicator_name}")

df = pd.DataFrame(all_data, columns=['Indicator', 'Tab Title', 'Local Image Path', 'Description', 'Key Messages'])
df.to_csv('climate_indicators_with_images.csv', index=False, encoding='utf-8-sig')

# zip file as it would be v time-consuming to download the full folder
shutil.make_archive(image_folder, 'zip', image_folder)


files.download(image_folder + '.zip')
from google.colab import files
files.download('climate_indicators_with_images.csv')

print("Data scraped, images downloaded and compressed successfully.")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Data scraped, images downloaded and compressed successfully.


In [1]:
from google.colab import files
files.download('climate_reports_images.csv')

FileNotFoundError: Cannot find file: climate_reports_images.csv

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
import shutil
from urllib.parse import urljoin


def download_image(url, folder, filename):
    response = requests.get(url, stream=True)
    with open(os.path.join(folder, filename), 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
    del response


def clean_description(description):
    return ' '.join(description.split())


base_url = 'https://climate.copernicus.eu'

# URL for the "Global Climate Highlights 2023" page
page_url = 'https://climate.copernicus.eu/global-climate-highlights-2023'


image_folder = 'downloaded_images'
os.makedirs(image_folder, exist_ok=True)


response = requests.get(page_url)
soup = BeautifulSoup(response.content, 'html.parser')


all_data = []


for section in soup.find_all('div', class_='paragraph--type--tab'):
    tab_title = section.find('h2', class_='ccl-block-title').get_text(strip=True) if section.find('h2', class_='ccl-block-title') else 'No Title'
    img_tag = section.find('img')
    key_messages_tag = section.find('div', class_='key--messages')

    if img_tag:
        relative_image_url = img_tag.get('src')
        absolute_image_url = urljoin(base_url, relative_image_url)
        image_filename = os.path.basename(relative_image_url)
        download_image(absolute_image_url, image_folder, image_filename)
        local_image_path = os.path.join(image_folder, image_filename)

        description_tag = img_tag.find_next('p')
        description = clean_description(description_tag.get_text()) if description_tag else 'No Description'

        key_messages = clean_description(key_messages_tag.get_text()) if key_messages_tag else 'No Key Messages'

        all_data.append([tab_title, local_image_path, description, key_messages])


df = pd.DataFrame(all_data, columns=['Tab Title', 'Local Image Path', 'Description', 'Key Messages'])
csv_file = 'global_climate_highlights_2023.csv'
df.to_csv(csv_file, index=False)

# Compress the downloaded images folder
shutil.make_archive(image_folder, 'zip', image_folder)
files.download(image_folder + '.zip')
from google.colab import files
files.download('global_climate_highlights_2023.csv')
print(f"Data scraped and saved to {csv_file}. Images downloaded and compressed in {image_folder}.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Data scraped and saved to global_climate_highlights_2023.csv. Images downloaded and compressed in downloaded_images.zip


In [2]:
#concatenation of text + graphics
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import pandas as pd
import cv2
import numpy as np
import os
import textwrap

# CSV file and base directory paths
csv_file_path = '/content/drive/MyDrive/zooniverse/zooniverse_modified.csv'
base_dir = '/content/drive/MyDrive/zooniverse/'

# Read the CSV file
df = pd.read_csv(csv_file_path)

# Adding prefixes to Task 2 and Task 3 texts directly in the DataFrame
df['Task 2'] = 'Q2: ' + df['Task 2'].astype(str)
df['Task 3'] = 'Q3: ' + df['Task 3'].astype(str)

# Concatenate task texts with explicit line breaks
df['All_Tasks'] = df[['Task 1', 'Task 2', 'Task 3']].apply(lambda x: '\n'.join(x), axis=1)

# Create output directory
output_dir = os.path.join(base_dir, 'All_Tasks_Images')
os.makedirs(output_dir, exist_ok=True)

def add_text_to_image(image_path, text, output_path, initial_font_size, min_font_size):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Could not read image: {image_path}")
        return
    original_height, original_width = image.shape[:2]

    # Calculate the optimal font size and scale factor
    scale_factor = max(original_width, original_height) / 1024
    font_size = max(int(initial_font_size * scale_factor), min_font_size)
    font_scale = font_size / 48
    font = cv2.FONT_HERSHEY_SIMPLEX
    thickness = 5  # Increased thickness for darker text
    color = (0, 0, 0)  # Black color
    margin = int(30 * scale_factor)

    # Split text into lines considering explicit line breaks
    lines = text.split('\n')
    wrap_width = max(int(original_width / (font_size / 2)), 10)
    wrapped_lines = []
    for line in lines:
        wrapped_lines.extend(textwrap.wrap(line, width=wrap_width))

    # Adjust text box height based on wrapped lines
    text_box_height = len(wrapped_lines) * (font_size + int(20 * scale_factor))

    # Adjust canvas height to fit wrapped text
    canvas_height = original_height + text_box_height + 2 * margin

    # Create expanded canvas
    canvas = np.ones((canvas_height, original_width, 3), dtype=np.uint8) * 255
    canvas[canvas_height-original_height:, 0:original_width] = image

    # Add wrapped text on the canvas
    y_offset = margin
    for line in wrapped_lines:
        text_size = cv2.getTextSize(line, font, font_scale, thickness)[0]
        text_x = (original_width - text_size[0]) // 2
        cv2.putText(canvas, line, (text_x, y_offset), font, font_scale, color, thickness)
        y_offset += font_size + int(20 * scale_factor)

    # Save the modified image
    cv2.imwrite(output_path, canvas)
    print(f"Image saved: {output_path}")

# Process each row in the DataFrame
for index, row in df.iterrows():
    image_path = row['Image Path']  # Ensure this matches your actual column name
    if not isinstance(image_path, str): continue
    image_name = os.path.basename(image_path)
    original_image_path = os.path.join(base_dir, image_path)
    all_tasks_text = row['All_Tasks']
    output_image_path = os.path.join(output_dir, image_name)
    add_text_to_image(original_image_path, all_tasks_text, output_image_path, initial_font_size, min_font_size)

print("All images have been processed successfully.")


Image saved: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_1_INDICATOR_TEMPERATURE.png
Image saved: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_2_INDICATOR_TEMPERATURE.png
Image saved: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_3_INDICATOR_TEMPERATURE.png
Image saved: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_4_INDICATOR_TEMPERATURE.png
Image saved: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_1_INDICATOR_SEA_LEVEL.png
Image saved: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_2_INDICATOR_SEA_LEVEL.png
Image saved: /content/drive/MyDrive/zooniverse/All_Tasks_Images/C3S_ESOTC2022_INDICATORS_GHG_CONC_Fig1.png
Image saved: /content/drive/MyDrive/zooniverse/All_Tasks_Images/C3S_ESOTC2022_INDICATORS_GHG_CONC_Fig4.png
Image saved: /content/drive/MyDrive/zooniverse/All_Tasks_Images/C3S_ESOTC2022_INDICATORS_GHGFlux_Fig2.png
Image saved: /content/drive/MyDrive/zooniverse/All_Tasks_Images/C3S_ESOTC2022_INDICATORS_G

In [8]:
import cv2
import os

# Directory containing the images to resize
image_dir = '/content/drive/MyDrive/zooniverse/All_Tasks_Images'

# Scale factor (0 < scale < 1 to reduce size)
scale_factor = 0.5  # Adjust as needed to decrease the image size

# Iterate over each file in the directory
for filename in os.listdir(image_dir):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(image_dir, filename)
        # Read the image
        image = cv2.imread(image_path)

        # Skip files that aren't images
        if image is None:
            continue

        # Calculate new dimensions
        new_width = int(image.shape[1] * scale_factor)
        new_height = int(image.shape[0] * scale_factor)
        new_dimensions = (new_width, new_height)

        # Resize the image
        resized_image = cv2.resize(image, new_dimensions, interpolation=cv2.INTER_AREA)

        # Overwrite the original image
        cv2.imwrite(image_path, resized_image)
        print(f"Resized and overwritten: {image_path}")

print("All images resized and overwritten successfully.")


Resized and overwritten: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_1_INDICATOR_TEMPERATURE.png
Resized and overwritten: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_2_INDICATOR_TEMPERATURE.png
Resized and overwritten: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_3_INDICATOR_TEMPERATURE.png
Resized and overwritten: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_4_INDICATOR_TEMPERATURE.png
Resized and overwritten: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_1_INDICATOR_SEA_LEVEL.png
Resized and overwritten: /content/drive/MyDrive/zooniverse/All_Tasks_Images/FIGURE_2_INDICATOR_SEA_LEVEL.png
Resized and overwritten: /content/drive/MyDrive/zooniverse/All_Tasks_Images/C3S_ESOTC2022_INDICATORS_GHG_CONC_Fig1.png
Resized and overwritten: /content/drive/MyDrive/zooniverse/All_Tasks_Images/C3S_ESOTC2022_INDICATORS_GHG_CONC_Fig4.png
Resized and overwritten: /content/drive/MyDrive/zooniverse/All_Tasks_Images/C3S_ESOTC2022_INDICATORS