In [None]:
# pip install earthengine-api folium
# pip install pandas

In [None]:
import os
import ee
import folium
import geemap
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive

In [None]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
# ee.Initialize(project='sulaimanadeleye001_Lake_Victoria_Map_Kenya_Area') # where x is the project-ID you wrote down when you
ee.Initialize(project='ee-sulaimanadeleye001') # where x is the project-ID you wrote down when you

# Define the region of interest (ROI).
roi = ee.FeatureCollection('projects/rgee-gvasilop1/assets/LakeVictoria_Kenya')

In [None]:
# Mounting Google Drive to Notebook
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Select a date range
# start = '2019-01-01'
# end = '2023-01-01'
start = '2022-06-01'
end = '2023-01-01'


# Filter the Sentinel-2 collection to the ROI and date range.
image_collection = (ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")  # Atmospherically corrected collection.
    .filterBounds(roi)  # Filter to the ROI.
    .filterDate(start, end)  # Filter by date.
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)))  # Keep images with <10% cloud cover.

# Filter the ERA5 collection to the ROI and date range.
era5_collection = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")
era5_precip = era5_collection.filterDate(start, end).select('total_precipitation')
era5_temp = era5_collection.filterDate(start, end).select('temperature_2m')

# Generate a random set of sampling locations.
sampling_points = ee.FeatureCollection.randomPoints(roi, 250, 3)  # 250 random points with seed 3.


In [None]:
# Function to sample NDCI, NDTI, NDVI, temperature, and precipitation for each image.
def sample_image(img):
    ndci = img.normalizedDifference(['B5', 'B4']).rename('ndci')
    ndti = img.normalizedDifference(['B4', 'B3']).rename('ndti')
    ndvi = img.normalizedDifference(['B8', 'B4']).rename('ndvi')
    date = ee.Date(img.get('system:time_start')).format('YYYY-MM-dd')
    time_of_day = ee.Date(img.get('system:time_start')).format('HH:mm:ss')
    img = img.addBands(ndci).addBands(ndti).addBands(ndvi)

    # Get the closest ERA5 images by date and time.
    era5_precip_img = era5_precip.filter(ee.Filter.calendarRange(img.date().get('year'), img.date().get('year'), 'year')) \
                                 .filter(ee.Filter.calendarRange(img.date().get('month'), img.date().get('month'), 'month')) \
                                 .filter(ee.Filter.calendarRange(img.date().get('day'), img.date().get('day'), 'DAY_OF_MONTH')) \
                                 .filter(ee.Filter.calendarRange(img.date().get('hour'), img.date().get('hour'), 'hour')).mean()
    era5_temp_img = era5_temp.filter(ee.Filter.calendarRange(img.date().get('year'), img.date().get('year'), 'year')) \
                             .filter(ee.Filter.calendarRange(img.date().get('month'), img.date().get('month'), 'month')) \
                             .filter(ee.Filter.calendarRange(img.date().get('day'), img.date().get('day'), 'DAY_OF_MONTH')) \
                             .filter(ee.Filter.calendarRange(img.date().get('hour'), img.date().get('hour'), 'hour')).mean()

    # Check if the ERA5 images exist.
    era5_precip_img = ee.Image(ee.Algorithms.If(era5_precip_img, era5_precip_img.clip(roi), ee.Image().rename('precipitation')))
    era5_temp_img = ee.Image(ee.Algorithms.If(era5_temp_img, era5_temp_img.clip(roi), ee.Image().rename('temperature')))

    # Convert temperature to Celsius and precipitation to mm.
    temperature = era5_temp_img.subtract(273.15).rename('temperature')
    precipitation = era5_precip_img.multiply(1000).rename('precipitation')  # Convert from meters to millimeters.
    img = img.addBands(temperature).addBands(precipitation)

    samples = img.sampleRegions(
        collection=sampling_points,
        geometries=True,
        scale=10
    ).map(lambda f: f.set('date', date).set('time', time_of_day))
    return samples

In [None]:
# Split the image collection into smaller batches
batch_size = 10
image_list = image_collection.toList(image_collection.size())
batches = [image_list.slice(i, i + batch_size) for i in range(0, image_collection.size().getInfo(), batch_size)]

# Process each batch and merge results
all_samples = []
for batch in batches:
    batch_images = ee.ImageCollection.fromImages(batch)
    sampled_images = batch_images.map(sample_image).flatten()
    all_samples.append(sampled_images)

# Merge all sampled points
combined = ee.FeatureCollection(all_samples).flatten()

# Add latitude, longitude, and unique ID to the samples.
def add_coordinates_and_id(feature, index):
    coords = feature.geometry().coordinates()
    return feature.set({
        'longitude': coords.get(0),
        'latitude': coords.get(1),
        'id': index
    })

In [None]:
# Check if the collection is empty
collection_size = combined.size().getInfo()

if collection_size > 0:
    # Convert the collection to a list
    features = combined.toList(collection_size)

    # Apply the function to add properties and create a list of features with unique IDs
    combined_features = [add_coordinates_and_id(ee.Feature(features.get(i)), i) for i in range(collection_size)]

    # Convert the list of features back to a FeatureCollection
    combined = ee.FeatureCollection(combined_features)
else:
    # Handle the empty collection case
    print("The collection is empty. No features to process.")
    combined = ee.FeatureCollection([])  # Return an empty FeatureCollection

The collection is empty. No features to process.


In [None]:
# Define visualization parameters for NDCI
ndci_vis_params = {
    'min': -1,
    'max': 1,
    'palette': ['blue', 'white', 'green']
}


# Create a folium map.
Map = geemap.Map(center=[-0.4, 34.4], zoom=10)

# Mosaic the image collection for visualization purposes and clip to ROI.
mosaic_image = image_collection.mosaic().clip(roi)

# Add the NDCI, NDTI, NDVI layers to the map.
Map.addLayer(mosaic_image.normalizedDifference(['B5', 'B4']).rename('ndci'), ndci_vis_params, 'NDCI')


# Add sampling points to the map.
Map.addLayer(sampling_points.style(**{'color': 'FF0000'}), {}, 'Sampling Points')

# Add layer control to the map.
Map.addLayerControl()

# Display the map.
Map

Map(center=[-0.4, 34.4], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(…

In [None]:
# Define visualization parameters for NDTI
ndti_vis_params = {
    'min': -1,
    'max': 1,
    'palette': ['00FFFF', '0000FF']
}

# Create a folium map.
Map = geemap.Map(center=[-0.4, 34.4], zoom=10)

# Mosaic the image collection for visualization purposes and clip to ROI.
mosaic_image = image_collection.mosaic().clip(roi)

# Add the NDCI, NDTI, NDVI layers to the map.
Map.addLayer(mosaic_image.normalizedDifference(['B4', 'B3']).rename('ndti'), ndti_vis_params, 'NDTI')

# # Add temperature and precipitation layers to the map.
# temperature_image = era5_temp.mean().subtract(273.15).clip(roi)
# precipitation_image = era5_precip.mean().clip(roi)

# Add sampling points to the map.
Map.addLayer(sampling_points.style(**{'color': 'FF0000'}), {}, 'Sampling Points')

# Add layer control to the map.
Map.addLayerControl()

# Display the map.
Map



Map(center=[-0.4, 34.4], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(…

In [None]:
# Define visualization parameters for NDVI
ndvi_vis_params = {
    'min': -1,
    'max': 1,
    'palette': ['blue', 'white', 'green']
}


# Create a folium map.
Map = geemap.Map(center=[-0.4, 34.4], zoom=10)

# Mosaic the image collection for visualization purposes and clip to ROI.
mosaic_image = image_collection.mosaic().clip(roi)

# Add the NDCI, NDTI, NDVI layers to the map.
Map.addLayer(mosaic_image.normalizedDifference(['B8', 'B4']).rename('ndvi'), ndvi_vis_params, 'NDVI')

# Add sampling points to the map.
Map.addLayer(sampling_points.style(**{'color': 'FF0000'}), {}, 'Sampling Points')

# Add layer control to the map.
Map.addLayerControl()

# Display the map.
Map

Map(center=[-0.4, 34.4], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(…

In [None]:
# Define visualization parameters for Temperature
temperature_vis_params = {
    'min': 0,
    'max': 40,
    'palette': ['blue', 'green', 'red']
}

# Create a folium map.
Map = geemap.Map(center=[-0.4, 34.4], zoom=10)

# Mosaic the image collection for visualization purposes and clip to ROI.
mosaic_image = image_collection.mosaic().clip(roi)


# Add temperature and precipitation layers to the map.
temperature_image = era5_temp.mean().subtract(273.15).clip(roi)

Map.addLayer(temperature_image, temperature_vis_params, 'Temperature')

# Add sampling points to the map.
Map.addLayer(sampling_points.style(**{'color': 'FF0000'}), {}, 'Sampling Points')

# Add layer control to the map.
Map.addLayerControl()

# Display the map.
Map



Map(center=[-0.4, 34.4], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(…

In [None]:
# Define visualization parameters for Precipitation.
precipitation_vis_params = {
    'min': 0,
    'max': 0.1,
    'palette': ['white', 'blue']
}

# Create a folium map.
Map = geemap.Map(center=[-0.4, 34.4], zoom=10)

# Mosaic the image collection for visualization purposes and clip to ROI.
mosaic_image = image_collection.mosaic().clip(roi)


# Add temperature and precipitation layers to the map.
precipitation_image = era5_precip.mean().clip(roi)

Map.addLayer(precipitation_image, precipitation_vis_params, 'Precipitation')

# Add sampling points to the map.
Map.addLayer(sampling_points.style(**{'color': 'FF0000'}), {}, 'Sampling Points')

# Add layer control to the map.
Map.addLayerControl()

# Display the map.
Map

Map(center=[-0.4, 34.4], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(…

In [None]:
# Export the combined sampled points to Google Drive.
task_combined = ee.batch.Export.table.toDrive(
    collection=combined,
    description='2022_to_2023_2',
    folder='Project_Dataset',
    selectors=[ 'id', 'ndci', 'ndti', 'temperature', 'longitude', 'latitude', 'date'],  # Select columns to export
    fileFormat='CSV'
)

# Start the export task.
task_combined.start()

print("Export task started. Check your Google Drive for the file.")


Export task started. Check your Google Drive for the file.


In [None]:
# Check the status of the export task for the dataset
while task_combined.active():
    print("Export task is still active. Waiting...")
    time.sleep(10)  # Wait for 10 seconds

# Check if the export task has an error
if task_combined.status()['state'] == 'FAILED':
    print("Export task failed:", task_combined.status()['error_message'])
else:
    print("Export task completed successfully!")

Export task is still active. Waiting...
Export task completed successfully!


In [None]:
# List to the CSV file paths
csv_files = [
    '/content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2019_to_2020.csv',
    '/content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2020_to_2021_1.csv',
    '/content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2020_to_2021_2.csv',
    '/content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2021_to_2022_1.csv',
    '/content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2021_to_2022_2.csv',
    '/content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2022_to_2023_1.csv',
    '/content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2022_to_2023_2.csv'
]

# Read and concatenate the CSV files
dataframes = []
for file in csv_files:
    if os.path.isfile(file):
        try:
            df = pd.read_csv(file)
            dataframes.append(df)
            print(f"Successfully read: {file}")
        except Exception as e:
            print(f"Error reading {file}: {e}")
    else:
        print(f"File not found: {file}")

if dataframes:
    combined_df = pd.concat(dataframes, ignore_index=True)
    combined_df.reset_index(drop=True, inplace=True)
    combined_df.to_csv('/content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/water_quality_parameters_data_lake_victoria.csv', index=False)
    print("CSV files combined successfully.")
else:
    print("No files were read successfully.")


Successfully read: /content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2019_to_2020.csv
Successfully read: /content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2020_to_2021_1.csv
Successfully read: /content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2020_to_2021_2.csv
Successfully read: /content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2021_to_2022_1.csv
Successfully read: /content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2021_to_2022_2.csv
Successfully read: /content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2022_to_2023_1.csv
Successfully read: /content/drive/My Drive/Colab Notebooks/AI and DS Project/Project_Dataset/2022_to_2023_2.csv
CSV files combined successfully.


In [None]:
# Print the combined dataframe
combined_df

Unnamed: 0,id,ndci,ndti,ndvi,temperature,precipitation,date,time,longitude,latitude
0,0,0.101512,-0.427785,-0.280000,25.474069,0.004637,2019-01-02,08:10:26,33.950132,-0.991945
1,1,0.166667,-0.361446,-0.258907,25.474069,0.004637,2019-01-02,08:10:26,33.958209,-0.952864
2,2,0.053957,-0.414905,-0.335025,25.351022,0.023651,2019-01-02,08:10:26,33.961437,-0.925996
3,3,-0.028939,-0.320595,-0.221374,25.351022,0.023651,2019-01-02,08:10:26,33.985340,-0.923366
4,4,0.062706,-0.336449,-0.193277,25.351022,0.023651,2019-01-02,08:10:26,33.969162,-0.914868
...,...,...,...,...,...,...,...,...,...,...
23344,2536,0.082067,-0.248756,-0.157088,20.946375,7.576302,2022-12-22,08:10:14,33.984872,-0.353019
23345,2537,0.057124,-0.018336,-0.051889,22.983484,4.909933,2022-12-22,08:10:14,33.979658,-0.844578
23346,2538,0.067010,-0.095000,-0.156550,23.272546,4.826784,2022-12-22,08:10:14,33.982546,-0.892068
23347,2539,0.055742,-0.055108,-0.043027,23.272546,4.826784,2022-12-22,08:10:14,33.981031,-0.937841


In [None]:
# Display first few rows
combined_df.head()

Unnamed: 0,id,ndci,ndti,ndvi,temperature,precipitation,date,time,longitude,latitude
0,0,0.101512,-0.427785,-0.28,25.474069,0.004637,2019-01-02,08:10:26,33.950132,-0.991945
1,1,0.166667,-0.361446,-0.258907,25.474069,0.004637,2019-01-02,08:10:26,33.958209,-0.952864
2,2,0.053957,-0.414905,-0.335025,25.351022,0.023651,2019-01-02,08:10:26,33.961437,-0.925996
3,3,-0.028939,-0.320595,-0.221374,25.351022,0.023651,2019-01-02,08:10:26,33.98534,-0.923366
4,4,0.062706,-0.336449,-0.193277,25.351022,0.023651,2019-01-02,08:10:26,33.969162,-0.914868


In [None]:
# Display data types and non-null counts
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2541 entries, 0 to 2540
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             2541 non-null   int64  
 1   ndci           2541 non-null   float64
 2   ndti           2541 non-null   float64
 3   ndvi           2541 non-null   float64
 4   temperature    2541 non-null   float64
 5   precipitation  2541 non-null   float64
 6   date           2541 non-null   object 
 7   time           2541 non-null   object 
 8   longitude      2541 non-null   float64
 9   latitude       2541 non-null   float64
dtypes: float64(7), int64(1), object(2)
memory usage: 198.6+ KB


In [None]:
# Summary statistics
combined_df.describe()

Unnamed: 0,id,ndci,ndti,ndvi,temperature,precipitation,longitude,latitude
count,23349.0,23349.0,23349.0,23349.0,23349.0,23349.0,23349.0,23349.0
mean,1896.626879,0.032464,-0.213198,-0.235647,24.481165,0.838118,34.183471,-0.457424
std,1347.826667,0.103447,0.107922,0.193206,1.314232,1.703644,0.272232,0.325797
min,0.0,-0.811321,-0.983333,-1.0,20.917078,0.0,33.923439,-0.991945
25%,833.0,-0.024055,-0.284738,-0.341615,23.525354,0.001013,33.9827,-0.80305
50%,1667.0,0.00939,-0.217742,-0.229268,24.294382,0.124156,34.054342,-0.374527
75%,2634.0,0.053061,-0.144444,-0.148688,25.21235,0.85064,34.380654,-0.239875
max,5809.0,0.846154,0.310591,0.908096,31.393823,15.16735,34.834401,0.182955


In [None]:
# Check for missing values
combined_df.isnull().sum()
# Handling missing values drop or fill
# df = df.dropna()  # or use df.fillna(method='ffill') or other methods

Unnamed: 0,0
id,0
ndci,0
ndti,0
ndvi,0
temperature,0
precipitation,0
date,0
time,0
longitude,0
latitude,0


In [None]:
# CODE EDITED BY
# SULAIMAN ADELEYE