### Cropgrids ingestion
- Objective: this notebook ingests cropgrids data, which provides an indication of where rice is cultivated globally
- Outputs: xarray dataset that will be used for a crop mask on NDVI data

Load modules

In [10]:
# import modules
import os
import requests
import zipfile
import geopandas as gpd
import rasterio
from rasterio.mask import mask

Ingest data from cropgrids repo

In [11]:
# download cropgrids data

def download_data(url, save_path):
    """
    Downloads data from the specified URL and saves it to the given path.

    :param url: URL to download data from
    :param save_path: Path to save the downloaded data
    """
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Check if the target folder exists, create if not
        if not os.path.exists(os.path.dirname(save_path)):
            os.makedirs(os.path.dirname(save_path))

        # Write the content to the file
        with open(save_path, 'wb') as file:
            file.write(response.content)
        
        print("Download completed successfully.")
    except requests.RequestException as e:
        print(f"An error occurred: {e}")

# URL and file path
url = "https://figshare.com/ndownloader/articles/22491997/versions/3"
file_path = r"C:\Users\djava\OneDrive\Documents\Oxford\Projects\india_rice_early_warning\4_data\RAW_DATA\CROPGRIDS\cropgrids.zip"

# Call the function
download_data(url, file_path)

Download completed successfully.


Unzip data and keep relevant rice cultivated area

In [12]:
# unzip and delete original files

def unzip_and_delete(zip_file_path):
    """
    Unzips the specified ZIP file into the same directory and then deletes the ZIP file.

    :param zip_file_path: Path to the ZIP file
    """
    try:
        # Unzipping the file
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            # Extract all the contents into the same directory as the zip file
            extract_path = os.path.dirname(zip_file_path)
            zip_ref.extractall(extract_path)
            print(f"Extracted to {extract_path}")

        # Deleting the original zip file
        os.remove(zip_file_path)
        print(f"Deleted the ZIP file: {zip_file_path}")

    except Exception as e:
        print(f"An error occurred: {e}")

# File path
zip_file_path = r"C:\Users\djava\OneDrive\Documents\Oxford\Projects\india_rice_early_warning\4_data\RAW_DATA\CROPGRIDS\cropgrids.zip"

# Call the function
unzip_and_delete(zip_file_path)



In [15]:
# unzip sub-folder and delete other folders 

def unzip_and_delete(zip_to_unzip, file_to_delete, directory):
    """
    Unzips a specified ZIP file and deletes another specified file in the given directory.

    :param zip_to_unzip: Name of the ZIP file to unzip
    :param file_to_delete: Name of the file to delete
    :param directory: Directory where the files are located
    """
    try:
        # Full path of the zip file to unzip
        zip_file_path = os.path.join(directory, zip_to_unzip)

        # Unzipping the file
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(directory)
            print(f"Extracted {zip_to_unzip}")

        # Full path of the file to delete
        file_to_delete_path = os.path.join(directory, file_to_delete)

        # Deleting the specified file
        if os.path.exists(file_to_delete_path):
            os.remove(file_to_delete_path)
            print(f"Deleted {file_to_delete}")
        else:
            print(f"File {file_to_delete} not found in the directory.")

    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
directory = r"C:\Users\djava\OneDrive\Documents\Oxford\Projects\india_rice_early_warning\4_data\RAW_DATA\CROPGRIDS"
unzip_file = "CROPGRIDSv1.06_NC_maps.zip"
delete_file = "CROPGRIDSv1.06_PNG_maps.zip"

unzip_and_delete(unzip_file, delete_file, directory)

Extracted CROPGRIDSv1.06_NC_maps.zip
Deleted CROPGRIDSv1.06_PNG_maps.zip


In [16]:
# only keep rice map and delete other files

def clean_directory(keep_file, directory):
    """
    Deletes all files in the specified directory except the specified file to keep.

    :param keep_file: Name of the file to keep
    :param directory: Directory to clean
    """
    try:
        # Iterate through all files in the directory
        for filename in os.listdir(directory):
            file_path = os.path.join(directory, filename)
            # Check if it's a file and not the file we want to keep
            if os.path.isfile(file_path) and filename != keep_file:
                os.remove(file_path)
                print(f"Deleted {filename}")

        print("Directory cleaned, kept only the specified file.")

    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
directory = r"C:\Users\djava\OneDrive\Documents\Oxford\Projects\india_rice_early_warning\4_data\RAW_DATA\CROPGRIDS"
keep_file = "CROPGRIDSv1.06_rice.nc"

clean_directory(keep_file, directory)

Deleted Countries_2018.nc
Deleted CROPGRIDSv1.06_abaca.nc
Deleted CROPGRIDSv1.06_agave.nc
Deleted CROPGRIDSv1.06_alfalfa.nc
Deleted CROPGRIDSv1.06_almond.nc
Deleted CROPGRIDSv1.06_aniseetc.nc
Deleted CROPGRIDSv1.06_apple.nc
Deleted CROPGRIDSv1.06_apricot.nc
Deleted CROPGRIDSv1.06_areca.nc
Deleted CROPGRIDSv1.06_artichoke.nc
Deleted CROPGRIDSv1.06_asparagus.nc
Deleted CROPGRIDSv1.06_avocado.nc
Deleted CROPGRIDSv1.06_bambara.nc
Deleted CROPGRIDSv1.06_banana.nc
Deleted CROPGRIDSv1.06_barley.nc
Deleted CROPGRIDSv1.06_bean.nc
Deleted CROPGRIDSv1.06_beetfor.nc
Deleted CROPGRIDSv1.06_berrynes.nc
Deleted CROPGRIDSv1.06_blueberry.nc
Deleted CROPGRIDSv1.06_brazil.nc
Deleted CROPGRIDSv1.06_broadbean.nc
Deleted CROPGRIDSv1.06_buckwheat.nc
Deleted CROPGRIDSv1.06_cabbage.nc
Deleted CROPGRIDSv1.06_cabbagefor.nc
Deleted CROPGRIDSv1.06_canaryseed.nc
Deleted CROPGRIDSv1.06_carob.nc
Deleted CROPGRIDSv1.06_carrot.nc
Deleted CROPGRIDSv1.06_carrotfor.nc
Deleted CROPGRIDSv1.06_cashew.nc
Deleted CROPGRIDSv1.0