# Import Packages

In [None]:
import os
import json
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from dotenv import load_dotenv

# Set-up Environment

In [None]:
# load the .env file variables 
load_dotenv()

GEEE_STORAGE_DIR = os.getenv("GEEE_STORAGE_DIR")

LANDSAT8_START_DATE = os.getenv("LANDSAT8_START_DATE")
LANDSAT8_DATA_STORAGE_DIR = os.getenv("LANDSAT8_DATA_STORAGE_DIR")

LANDSAT8_STORAGE_DIR = f"{GEEE_STORAGE_DIR}{os.sep}{LANDSAT8_DATA_STORAGE_DIR}"

# Helper Functions

In [None]:
get_metadata_path = lambda file_name: f"{LANDSAT8_STORAGE_DIR}{os.sep}{file_name}"

# Data Laoding

In [None]:
# get all files
landsat8_files = os.listdir(LANDSAT8_STORAGE_DIR)

# Filter to json metadata
landsat8_metadata_files = [file for file in landsat8_files if file.endswith(".json")]
print(f"Found {len(landsat8_metadata_files)} metadata files!")

# Data Extraction

Here we will be extracting the cloud cover vlaues for the Landsat 8 collcted metadata files

In [None]:
cloud_cover_list = []

for file_name in tqdm(landsat8_metadata_files):
    # get path 
    metadata_path = get_metadata_path(
        file_name = file_name
    )

    try:
        # read file
        with open(metadata_path, 'r') as meta_json:
            landsat8_image_metadata = json.load(meta_json)
            # extract cloud cover
            cloud_cover = landsat8_image_metadata['properties']['CLOUD_COVER_LAND']
            # add to list of cloud cover
            cloud_cover_list.append(cloud_cover)

            del landsat8_image_metadata
            del cloud_cover
    except:
        print(f"Error in reading JSON >>> {file_name}")

    del meta_json

In [None]:
cloud_cover_df = pd.DataFrame(
    data = {
        'Cloud Cover': cloud_cover_list
    }
)

mean = cloud_cover_df['Cloud Cover'].mean()

quartile_90 = cloud_cover_df['Cloud Cover'].quantile(0.90)

# Visualization

In [None]:
ax = cloud_cover_df.plot(
    kind = 'hist',
    bins = 100,
    figsize = (15, 7)
)

ax.set_xlabel(
    'Cloud Cover % over Land', 
    fontsize = 20
)

ax.tick_params(
    axis = 'x', 
    labelsize = 20
) 

ax.set_ylabel(
    "Frequency", 
    fontsize = 20
)

ax.tick_params(
    axis = 'y', 
    labelsize = 20
) 

plt.axvline(
    mean, 
    color='red', 
    linestyle='dashed', 
    linewidth=2, 
    label=f'Mean: {mean:.2f}% cover over land'
)

plt.axvline(
    quartile_90, 
    color='darkRed', 
    linestyle='dashed', 
    linewidth=2, 
    label=f'Quartile 90%: {quartile_90:.2f}% cover over land'
)

plt.legend(
    fontsize = 22
)

plt.savefig(
    f"../../assets/figures/distribution_of_cloud_cover_over_land.png", 
    transparent = True,
    bbox_inches = 'tight', # compress the content  
    pad_inches = 0.05, # have no extra margin
)

plt.show()