In [None]:
geometry = [Point(lon, lat) for lon, lat in zip(occurrence_data['decimalLongitude'], occurrence_data['decimalLatitude'])]
geo_df = gpd.GeoDataFrame(occurrence_data, geometry=geometry)

# Set the coordinate reference system (CRS) to WGS84 (Lat/Lon)
geo_df.set_crs('EPSG:4326', inplace=True)


In [None]:
import rasterio

def extract_temperature_from_raster(raster_path, geo_df):
    extracted_temperatures = []
    
    with rasterio.open(raster_path) as src:
        for _, row in geo_df.iterrows():
            # Extract the longitude and latitude from the occurrence
            lon, lat = row['decimalLongitude'], row['decimalLatitude']
            
            # Get the row and column indices in the raster corresponding to the point
            row_idx, col_idx = src.index(lon, lat)
            
            # Extract the temperature value from the raster at the point's location
            temperature = src.read(1)[row_idx, col_idx]  # Read the first band (temperature)
            
            # Append the temperature value to the list
            extracted_temperatures.append(temperature)
    
    # Add the extracted temperature values to the GeoDataFrame
    geo_df['extracted_temperature'] = extracted_temperatures
    return geo_df


In [None]:
# List of months (2010-2018)
months = [
    '2010-01', '2010-02', '2010-03', '2010-04', '2010-05', '2010-06', '2010-07', '2010-08', '2010-09',
    '2010-10', '2010-11', '2010-12', '2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',
    '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12', '2012-01', '2012-02', '2012-03', 
    '2012-04', '2012-05', '2012-06', '2012-07', '2012-08', '2012-09', '2012-10', '2012-11', '2012-12', 
    '2013-01', '2013-02', '2013-03', '2013-04', '2013-05', '2013-06', '2013-07', '2013-08', '2013-09', 
    '2013-10', '2013-11', '2013-12', '2014-01', '2014-02', '2014-03', '2014-04', '2014-05', '2014-06', 
    '2014-07', '2014-08', '2014-09', '2014-10', '2014-11', '2014-12', '2015-01', '2015-02', '2015-03',
    '2015-04', '2015-05', '2015-06', '2015-07', '2015-08', '2015-09', '2015-10', '2015-11', '2015-12',
    '2016-01', '2016-02', '2016-03', '2016-04', '2016-05', '2016-06', '2016-07', '2016-08', '2016-09',
    '2016-10', '2016-11', '2016-12', '2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
    '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01', '2018-02', '2018-03',
    '2018-04', '2018-05', '2018-06', '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12'
]

# Path to the folder where your GeoTIFF files are stored
raster_folder = '/kaggle/input/highresolution-geotiff-images-of-climatic-data'

# Loop over all months and extract temperature data
for month in months:
    raster_path = f'{raster_folder}/wc2.1_2.5m_tmax_{month}.tif'
    
    # Extract the temperature for the current month
    geo_df_with_temp = extract_temperature_from_raster(raster_path, geo_df)
    
    # Save the results to a CSV file for the current month
    geo_df_with_temp.to_csv(f'species_with_temperature_{month}.csv', index=False)
    print(f"Processed and saved temperatures for {month}")


In [None]:
import pandas as pd
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import Point

# Example species occurrence data (replace with your data)
occurrence_data = pd.DataFrame({
    'latitude': [10.5, 15.6, 20.2],  # Example latitudes
    'longitude': [100.2, 102.3, 104.4],  # Example longitudes
    'species': ['species_1', 'species_2', 'species_3']
})

# Create a GeoDataFrame from latitude and longitude
geometry = [Point(xy) for xy in zip(occurrence_data["longitude"], occurrence_data["latitude"])]
geo_df = gpd.GeoDataFrame(occurrence_data, geometry=geometry)

# Set the coordinate reference system (CRS)
geo_df.set_crs(epsg=4326, inplace=True)  # WGS84

# Path to the temperature data (GeoTIFF file)
raster_path = "/kaggle/input/highresolution-geotiff-images-of-climatic-data/wc2.1_2.5m_tmax_2010-01.tif"  # Change this to your GeoTIFF path

# List to store extracted temperature values
extracted_values = []

# Open the raster file (temperature data)
with rasterio.open(raster_path) as src:
    for _, row in geo_df.iterrows():
        lon, lat = row['longitude'], row['latitude']
        
        # Convert the latitude and longitude to row and column of the raster
        row_idx, col_idx = src.index(lon, lat)
        
        # Extract the temperature value at the point (from the first band of the GeoTIFF)
        temp_value = src.read(1)[row_idx, col_idx]  # Assuming temperature is in the first band
        extracted_values.append(temp_value)

# Add the extracted temperature values to the GeoDataFrame
geo_df['extracted_temperature'] = extracted_values

# Check the extracted temperature values
print(geo_df[['longitude', 'latitude', 'extracted_temperature']])


In [None]:
# Display the column names of geo_df to confirm if 'decimalLatitude' and 'decimalLongitude' are present
print(geo_df.columns)


In [None]:
import seaborn as sns

def plot_species_temp_heatmap(species_df):
    """
    Plot a heatmap showing the relationship between species density and temperature.
    
    Args:
    - species_df: DataFrame containing species occurrences and extracted temperature data.
    """
    # Group the species data by geographical regions (grid cells) and calculate the species density
    grid_size = 0.1
    species_df['grid_lon'] = np.floor(species_df['decimalLongitude'] / grid_size) * grid_size
    species_df['grid_lat'] = np.floor(species_df['decimalLatitude'] / grid_size) * grid_size
    
    species_density = species_df.groupby(['grid_lon', 'grid_lat']).size().reset_index(name='species_density')
    
    # Merge the temperature data with the species density
    species_density = species_density.merge(species_df[['grid_lon', 'grid_lat', 'extracted_temperature']].drop_duplicates(), on=['grid_lon', 'grid_lat'])
    
    # Create a heatmap of species density vs temperature
    plt.figure(figsize=(10, 8))
    sns.heatmap(data=species_density.pivot_table(index='grid_lat', columns='grid_lon', values='species_density', aggfunc='sum'), cmap='viridis')
    plt.title("Species Density Heatmap with Temperature Overlay", fontsize=15)
    plt.xlabel("Longitude")
    plt.ylabel("Latitude")
    plt.show()

# Example usage
plot_species_temp_heatmap(species_df)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

def plot_species_richness_kde(species_df):
    """
    Plot species richness as a Kernel Density Estimate (KDE) plot.
    
    Args:
    - species_df: DataFrame containing species occurrences.
    """
    plt.figure(figsize=(10, 8))
    
    # Create KDE plot
    kde = sns.kdeplot(x=species_df['decimalLongitude'], y=species_df['decimalLatitude'], 
                      cmap='viridis', fill=True, levels=10)
    
    # Title and labels
    plt.title("Species Richness (KDE)", fontsize=15)
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    
    # Add a color bar
    sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=np.min(species_df[['decimalLongitude', 'decimalLatitude']].values), vmax=np.max(species_df[['decimalLongitude', 'decimalLatitude']].values)))
    sm.set_array([])
    plt.colorbar(sm, label="Species Richness")
    
    # Show the plot
    plt.show()

# Example usage
plot_species_richness_kde(species_df)


In [None]:
import seaborn as sns
import numpy as np

def plot_temp_vs_density(species_df):
  """
  Plot a heatmap of temperature vs species density.

  Args:
      species_df: DataFrame containing species occurrences and temperature data.
  """
  grid_size = 0.1
  species_df['grid_lon'] = np.floor(species_df['decimalLongitude'] / grid_size) * grid_size
  species_df['grid_lat'] = np.floor(species_df['decimalLatitude'] / grid_size) * grid_size

  # Calculate species density
  species_density = species_df.groupby(['grid_lon', 'grid_lat']).size().reset_index(name='species_density')

  # Merge with temperature data (assuming unique temperature for each grid cell)
  species_density = species_density.merge(species_df[['grid_lon', 'grid_lat', 'extracted_temperature']].drop_duplicates(), on=['grid_lon', 'grid_lat'])

  # Create a pivot table for the heatmap
  heatmap_df = species_density.pivot_table(index='grid_lat', columns='extracted_temperature', values='species_density')

  # Create the heatmap
  plt.figure(figsize=(10, 8))
  sns.heatmap(heatmap_df, cmap='viridis', cbar_kws={'label': 'Species Density'})
  plt.title('Species Density by Temperature')
  plt.xlabel('Temperature (°C)')
  plt.ylabel('Latitude')
  plt.show()

# Example usage
plot_temp_vs_density(species_df)

In [None]:
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt

def plot_cumulative_species_distribution(species_df):
  """
  Plot the cumulative species distribution across all months as a heatmap.

  Args:
      species_df: DataFrame containing species occurrences.
  """

  # Aggregate species occurrences by summing up their presence (count) across all months
  species_data = species_df.groupby(['decimalLongitude', 'decimalLatitude', 'scientificName']).size().reset_index(name='species_count')

  # Create a pivot table for the heatmap
  heatmap_df = species_data.pivot_table(index='decimalLatitude', columns='decimalLongitude', values='species_count')

  # Create the heatmap
  plt.figure(figsize=(10, 8))
  sns.heatmap(heatmap_df, cmap='viridis', cbar_kws={'label': 'Species Count'})
  plt.title('Cumulative Species Distribution Across All Months')
  plt.show()

# Example usage
plot_cumulative_species_distribution(species_df)