In [1]:
import re
import os
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from netCDF4 import Dataset
import geopandas as gpd
from shapely.geometry import Point, box
import pandas as pd
import calendar
from datetime import datetime, timedelta
import seaborn as sns


from tqdm import tqdm
import rasterio
import rioxarray
from rasterio.mask import mask
from rasterio.plot import show
from shapely.geometry import mapping

### Load In MERRA-2

In [6]:
import netCDF4 as nc

dataset_path = '/global/scratch/users/liuwenjin021011/data/MERRA-2_05x0625_monthly_1980-2017.nc'
data = nc.Dataset(dataset_path)
print(data.variables.keys())  

dict_keys(['time', 'longitudes', 'latitudes', 'Area', 'EmisNO_Soil', 'EmisSALA_Natural', 'EmisSALC_Natural', 'ISOP_MEGAN', 'ACET_MEGAN', 'ALD2_MEGAN', 'C2H4_MEGAN', 'EOH_MEGAN', 'PRPE_MEGAN', 'MTPA_MEGAN', 'MTPO_MEGAN', 'LIMO_MEGAN', 'SESQ_MEGAN'])


### Fix Time Variable To Filter Out Year 2017

In [16]:
# Select variables of interest
vars_of_interest = ['Area', 'EmisNO_Soil']
selected_data = data[vars_of_interest]

# Convert to DataFrame
df = selected_data.to_dataframe().reset_index()

In [22]:
def convert_month_number_to_date(month_num, start_year=1980):
    # Ensure month_num is an integer
    month_num = int(month_num)

    # Calculate the target year and month from the month_num
    years = month_num // 12
    months = month_num % 12
    target_year = start_year + years
    target_month = months + 1  # Add 1 because months are 1-indexed

    # Construct the date string
    date_str = f"{target_year}-{target_month:02d}"

    return date_str

# Apply the conversion function to the 'time' column
# Ensure the 'time' column is treated as integers if it is not already
df['updated time'] = df['time'].astype(int).apply(convert_month_number_to_date)
df_2017 = df[df['updated time'].str.startswith('2017')]

In [51]:
df_2017[df_2017['EmisNO_Soil'] > 0]

Unnamed: 0,lon,lat,time,Area,EmisNO_Soil,updated time
142260,0,311,444.0,1.610984e+09,2.530285e-10,2017-01
142261,0,311,445.0,1.610984e+09,1.577752e-10,2017-02
142262,0,311,446.0,1.610984e+09,3.249730e-10,2017-03
142263,0,311,447.0,1.610984e+09,6.777385e-10,2017-04
142264,0,311,448.0,1.610984e+09,9.562591e-10,2017-05
...,...,...,...,...,...,...
94801939,575,323,451.0,1.236792e+09,2.934200e-08,2017-08
94801940,575,323,452.0,1.236792e+09,1.695232e-08,2017-09
94801941,575,323,453.0,1.236792e+09,1.056157e-08,2017-10
94801942,575,323,454.0,1.236792e+09,7.628094e-09,2017-11


### Fix Lat & Lon to Filter Out Area of Interest, China

In [68]:
def process_merra_data(df, lon_min=73, lon_max=135, lat_min=18, lat_max=53):

    # Convert latitude and longitude to degrees
    df['latitude'] = (df['lat'] - 180) / 2.0
    df['longitude'] = (df['lon'] * 0.625) - 180
   
    # Filter data for China's boundaries
    filtered_df = df[
        (df['lon'] >= lon_min) & (df['lon'] <= lon_max) &
        (df['lat'] >= lat_min) & (df['lat'] <= lat_max)
    ]

    return filtered_df


# Apply the function to your DataFrame
df_china = process_merra_data(df_2017)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['latitude'] = (df['lat'] - 180) / 2.0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['longitude'] = (df['lon'] * 0.625) - 180


In [69]:
df_china

Unnamed: 0,lon,lat,time,Area,EmisNO_Soil,updated time,latitude,longitude
66768420,73.565217,18.0,444.0,3.670185e+09,1.876576e-05,2017-01,-81.0,-134.021739
66768421,73.565217,18.0,445.0,3.670185e+09,2.732446e-05,2017-02,-81.0,-134.021739
66768422,73.565217,18.0,446.0,3.670185e+09,2.886158e-05,2017-03,-81.0,-134.021739
66768423,73.565217,18.0,447.0,3.670185e+09,4.648263e-05,2017-04,-81.0,-134.021739
66768424,73.565217,18.0,448.0,3.670185e+09,1.215730e-04,2017-05,-81.0,-134.021739
...,...,...,...,...,...,...,...,...
82932715,134.921739,53.0,451.0,2.329673e+09,7.851023e-08,2017-08,-63.5,-95.673913
82932716,134.921739,53.0,452.0,2.329673e+09,3.518085e-08,2017-09,-63.5,-95.673913
82932717,134.921739,53.0,453.0,2.329673e+09,5.605743e-09,2017-10,-63.5,-95.673913
82932718,134.921739,53.0,454.0,2.329673e+09,1.702514e-10,2017-11,-63.5,-95.673913


In [71]:
df_china.to_csv('/global/scratch/users/liuwenjin021011/data/Validation-MERRA-2.csv', index = False, header = True)