FRP

- mean, median, std, min, max for frp values for year and month

- total observations for year, month and day

In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import xarray as xr

In [20]:
# Load the dataset
df = pd.read_csv(r"E:\IPMA\FRP\FRP_2000_2024.csv")

# Display the first few rows of the dataset to check the structure
df.head()

# Convert the 'acq_date' column to datetime format
df['acq_date'] = pd.to_datetime(df['acq_date'])

# Extract the year, month, and day from the 'acq_date' column
df['year'] = df['acq_date'].dt.year
df['month'] = df['acq_date'].dt.month
df['day'] = df['acq_date'].dt.day

# Display the dataset to ensure the new columns were added correctly
print(df.head())

df

   latitude  longitude  brightness  scan  track   acq_date  acq_time  \
0   36.8527    35.3594       306.0   2.1    1.4 2000-11-01       815   
1   49.5459    23.5306       326.6   1.0    1.0 2000-11-01       950   
2   49.5480    23.5170       347.1   1.0    1.0 2000-11-01       950   
3   48.7390    29.1738       306.8   1.5    1.2 2000-11-01       950   
4   49.5695    23.4383       303.8   1.0    1.0 2000-11-01       950   

  satellite instrument  confidence  version  bright_t31   frp daynight  type  \
0     Terra      MODIS          60     6.03       294.4  13.2        D     0   
1     Terra      MODIS          84     6.03       286.4  21.9        D     0   
2     Terra      MODIS          95     6.03       291.3  53.6        D     0   
3     Terra      MODIS          33     6.03       287.7  13.7        D     0   
4     Terra      MODIS          57     6.03       286.8   5.6        D     0   

   year  month  day  
0  2000     11    1  
1  2000     11    1  
2  2000     11    1 

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,year,month,day
0,36.8527,35.3594,306.0,2.1,1.4,2000-11-01,815,Terra,MODIS,60,6.03,294.4,13.2,D,0,2000,11,1
1,49.5459,23.5306,326.6,1.0,1.0,2000-11-01,950,Terra,MODIS,84,6.03,286.4,21.9,D,0,2000,11,1
2,49.5480,23.5170,347.1,1.0,1.0,2000-11-01,950,Terra,MODIS,95,6.03,291.3,53.6,D,0,2000,11,1
3,48.7390,29.1738,306.8,1.5,1.2,2000-11-01,950,Terra,MODIS,33,6.03,287.7,13.7,D,0,2000,11,1
4,49.5695,23.4383,303.8,1.0,1.0,2000-11-01,950,Terra,MODIS,57,6.03,286.8,5.6,D,0,2000,11,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022350,44.5394,3.0130,300.9,1.2,1.1,2024-12-31,941,Terra,MODIS,43,61.03,276.7,9.9,D,0,2024,12,31
2022351,40.3944,33.5275,311.0,1.2,1.1,2024-12-31,1130,Aqua,MODIS,71,61.03,283.6,16.0,D,0,2024,12,31
2022352,40.8579,33.0911,300.0,1.2,1.1,2024-12-31,1130,Aqua,MODIS,22,61.03,280.7,8.5,D,0,2024,12,31
2022353,45.2580,31.6737,311.4,1.2,1.1,2024-12-31,1131,Aqua,MODIS,48,61.03,282.5,17.0,D,3,2024,12,31


Year

In [None]:
# Calculate the summary statistics (mean, median, std, min, max) for FRP by year
yearly_stats = df.groupby('year')['frp'].agg([
    ('mean', np.nanmean),
    ('median', np.nanmedian),
    ('std', np.nanstd),
    ('min', np.nanmin),
    ('max', np.nanmax)
]).reset_index()

# Count the number of observations per year
yearly_counts = df.groupby('year').size().reset_index(name='count')

#Merge stats and counts
yearly_stats = pd.merge(yearly_stats, yearly_counts, on='year')

# Display the yearly statistics table
print(yearly_stats.head())

yearly_stats

In [None]:
# Convert the pandas DataFrame to an xarray Dataset
ds = xr.Dataset.from_dataframe(yearly_stats.set_index('year'))

# Save the dataset to a NetCDF file
ds.to_netcdf(r"E:\IPMA\FRP\yearly_frp_stats.nc")

print("NetCDF file saved successfully.")

In [None]:
# Set seaborn style
sns.set_theme(style="whitegrid")

# Create a figure with 5 subplots (stacked vertically)
fig, axes = plt.subplots(5, 1, figsize=(12, 18), sharex=True)

# Define statistics and labels
stats = ['mean', 'median', 'std', 'min', 'max']
titles = ['Mean FRP', 'Median FRP', 'Standard Deviation of FRP', 'Minimum FRP', 'Maximum FRP']
markers = ['o', 's', '^', 'v', 'd']  # Different markers for distinction

# Loop through each statistic and create a separate plot
for i, stat in enumerate(stats):
    sns.lineplot(data=yearly_stats, x='year', y=stat, marker=markers[i], linestyle='-', ax=axes[i])
    axes[i].set_title(titles[i])
    axes[i].set_ylabel('FRP Value')
    axes[i].grid(True)

# Ensure all years are displayed on the x-axis
years = yearly_stats['year'].unique()
axes[-1].set_xticks(years)
axes[-1].set_xticklabels(years, rotation=45)  # Rotate labels for better visibility

# Set common X-axis label
axes[-1].set_xlabel('Year')

# Adjust layout and show plots
plt.tight_layout()
plt.show()


Month

In [None]:
# Calculate the summary statistics (mean, median, std, min, max) for FRP by month
monthly_stats = df.groupby(['year', 'month'])['frp'].agg([
    ('mean', np.nanmean),
    ('median', np.nanmedian),
    ('std', np.nanstd),
    ('min', np.nanmin),
    ('max', np.nanmax)
]).reset_index()

# Count the number of observations per month (group by year and month)
monthly_counts = df.groupby(['year', 'month']).size().reset_index(name='count')

# Merge stats and counts
monthly_stats = pd.merge(monthly_stats, monthly_counts, on=['year', 'month'])

# Display the yearly statistics table
print(monthly_stats.head())

monthly_stats

In [None]:
# Convert the pandas DataFrame to an xarray Dataset
ds_monthly = xr.Dataset.from_dataframe(monthly_stats.set_index(['year', 'month']))

# Save the dataset to a NetCDF file
ds_monthly.to_netcdf(r"E:\IPMA\FRP\monthly_frp_stats.nc")

print("NetCDF file saved successfully.")

In [None]:
# Set seaborn style
sns.set_theme(style="whitegrid")

# Choose the year you want to visualize
selected_year = 2020  # Change this value to see different years

# Filter the data for the selected year
filtered_stats = monthly_stats[monthly_stats['year'] == selected_year]

# Create a new datetime column for proper plotting (Year-Month format)
filtered_stats['date'] = pd.to_datetime(filtered_stats[['year', 'month']].assign(day=1))

# Create figure with 5 subplots (stacked vertically)
fig, axes = plt.subplots(5, 1, figsize=(12, 18), sharex=True)

# Define statistics and labels
stats = ['mean', 'median', 'std', 'min', 'max']
titles = [f'Mean FRP ({selected_year})', f'Median FRP ({selected_year})', 
          f'Standard Deviation of FRP ({selected_year})', 
          f'Minimum FRP ({selected_year})', f'Maximum FRP ({selected_year})']
markers = ['o', 's', '^', 'v', 'd']  # Different markers for distinction

# Loop through each statistic and create a separate plot
for i, stat in enumerate(stats):
    sns.lineplot(data=filtered_stats, x='date', y=stat, marker=markers[i], linestyle='-', ax=axes[i])
    axes[i].set_title(titles[i])
    axes[i].set_ylabel('FRP Value')
    axes[i].grid(True)

# Format x-axis to display months properly
axes[-1].set_xlabel('Month')
axes[-1].xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b'))  # Format as month name (Jan, Feb, etc.)
plt.xticks(rotation=45)  # Rotate labels for better visibility

# Adjust layout and show plots
plt.tight_layout()
plt.show()


Day

In [None]:
# Calculate the summary statistics (mean, median, std, min, max) for FRP by day
daily_stats = df.groupby(['year', 'month', 'day'])['frp'].agg([
    ('mean', np.nanmean),
    ('median', np.nanmedian),
    ('std', np.nanstd),
    ('min', np.nanmin),
    ('max', np.nanmax)
]).reset_index()

# Count the number of observations per day (group by year, month, and day)
daily_counts = df.groupby(['year', 'month', 'day']).size().reset_index(name='count')

# Merge stats and counts
daily_stats = pd.merge(daily_stats, daily_counts, on=['year', 'month', 'day'])

# Display the yearly statistics table
print(daily_stats.head())

daily_stats

In [None]:
# Convert the pandas DataFrame to an xarray Dataset
ds_daily = xr.Dataset.from_dataframe(daily_stats.set_index(['year', 'month', 'day']))

# Save the dataset to a NetCDF file
ds_daily.to_netcdf(r"E:\IPMA\FRP\daily_frp_stats.nc")

print("NetCDF file saved successfully.")