# Clean Script: SSP119 - RCP 1.9

In [10]:
# Reading .nc files from Colab/Thesis

# Importing libraries
import os
# Mount Google Drive (if you want to access files from Google Drive)
from google.colab import drive
drive.mount('/content/drive')
# Set the desired working directory
os.chdir('/content/drive/My Drive/Thesis')
# Verify the current working directory
print("Current working directory:", os.getcwd())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Current working directory: /content/drive/My Drive/Thesis


In [11]:
!ls

 first_tas_Amon_GFDL-ESM4_ssp119_r1i1p1f1_gr1_201501-210012.nc
 mexico.json
 script_clean_nc_files.ipynb
'Temperature fluctuations in Mexico: Scenario: SSP1 - RCP 2.6.csv'
 Untitled0.ipynb


In [12]:
# Installing netCDF4
!pip install netCDF4



In [13]:
# Importing libraries for data analysis
from netCDF4 import num2date
from netCDF4 import Dataset
import datetime
import pandas as pd
import numpy as np

In [14]:
# Importing libraries for netcdf_file
from scipy.io import netcdf_file
import warnings
warnings.filterwarnings('ignore')
import netCDF4 as nc

# Reading .nc file: 'NETCDF3_CLASSIC'
ncFileName = 'first_tas_Amon_GFDL-ESM4_ssp119_r1i1p1f1_gr1_201501-210012.nc'
fileFormat = 'NETCDF3_CLASSIC'
modeType   = 'r'
ssp1_df = nc.Dataset(ncFileName, mode=modeType, format=fileFormat)
# Printing
#ssp1_df

In [15]:
# Printing information of interest
print(f"• Title: {ssp1_df.title}\n• Frequency: {ssp1_df.frequency}\n• Institution: {ssp1_df.institution}\n• Table ID: {ssp1_df.table_id}\n• Institution ID: {ssp1_df.institution_id}\n• Institution: {ssp1_df.institution}\n• Experiment: {ssp1_df.experiment}\n• Experiment ID: {ssp1_df.experiment_id}\n• Activity ID: {ssp1_df.activity_id}\n• MIP Era: {ssp1_df.mip_era}\n• Variable ID: {ssp1_df.variable_id}\n• Further Info: {ssp1_df.further_info_url}")

• Title: NOAA GFDL GFDL-ESM4 model output prepared for CMIP6 low-end scenario reaching 1.9 W m-2, based on SSP1
• Frequency: mon
• Institution: National Oceanic and Atmospheric Administration, Geophysical Fluid Dynamics Laboratory, Princeton, NJ 08540, USA
• Table ID: Amon
• Institution ID: NOAA-GFDL
• Institution: National Oceanic and Atmospheric Administration, Geophysical Fluid Dynamics Laboratory, Princeton, NJ 08540, USA
• Experiment: low-end scenario reaching 1.9 W m-2, based on SSP1
• Experiment ID: ssp119
• Activity ID: ScenarioMIP
• MIP Era: CMIP6
• Variable ID: tas
• Further Info: https://furtherinfo.es-doc.org/CMIP6.NOAA-GFDL.GFDL-ESM4.ssp119.none.r1i1p1f1


In [16]:
# Printing dimensions
ssp1_df.dimensions

{'bnds': <class 'netCDF4._netCDF4.Dimension'>: name = 'bnds', size = 2,
 'lat': <class 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 180,
 'lon': <class 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 288,
 'time': <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'time', size = 1032}

In [17]:
# Printing variables
ssp1_df.variables

{'bnds': <class 'netCDF4._netCDF4.Variable'>
 float64 bnds(bnds)
     long_name: vertex number
 unlimited dimensions: 
 current shape = (2,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'height': <class 'netCDF4._netCDF4.Variable'>
 float64 height()
     long_name: height
     units: m
     missing_value: 1e+20
     _FillValue: 1e+20
     cell_methods: time: point scalar_axis: mean
     axis: Z
     positive: up
     standard_name: height
     description: ~2 m standard surface air temperature and surface humidity  height
 unlimited dimensions: 
 current shape = ()
 filling on,
 'lat': <class 'netCDF4._netCDF4.Variable'>
 float64 lat(lat)
     long_name: latitude
     units: degrees_north
     axis: Y
     bounds: lat_bnds
     standard_name: latitude
     cell_methods: time: point
 unlimited dimensions: 
 current shape = (180,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'lat_bnds': <class 'netCDF4._netCDF4.Variable'>
 float64 lat_bnds(lat, bnds)

In [18]:
# Printing keys

ssp1_df.variables.keys()

dict_keys(['bnds', 'height', 'lat', 'lat_bnds', 'lon', 'lon_bnds', 'tas', 'time', 'time_bnds'])

# Latitude

In [19]:
# Dimensions:     ('lat',)
# Shape:          current shape = (180,)
lat_new = ssp1_df.variables['lat'].units
lat_new_data = ssp1_df.variables['lat'][:]
lat_new
lat_new_data

masked_array(data=[-89.5, -88.5, -87.5, -86.5, -85.5, -84.5, -83.5, -82.5,
                   -81.5, -80.5, -79.5, -78.5, -77.5, -76.5, -75.5, -74.5,
                   -73.5, -72.5, -71.5, -70.5, -69.5, -68.5, -67.5, -66.5,
                   -65.5, -64.5, -63.5, -62.5, -61.5, -60.5, -59.5, -58.5,
                   -57.5, -56.5, -55.5, -54.5, -53.5, -52.5, -51.5, -50.5,
                   -49.5, -48.5, -47.5, -46.5, -45.5, -44.5, -43.5, -42.5,
                   -41.5, -40.5, -39.5, -38.5, -37.5, -36.5, -35.5, -34.5,
                   -33.5, -32.5, -31.5, -30.5, -29.5, -28.5, -27.5, -26.5,
                   -25.5, -24.5, -23.5, -22.5, -21.5, -20.5, -19.5, -18.5,
                   -17.5, -16.5, -15.5, -14.5, -13.5, -12.5, -11.5, -10.5,
                    -9.5,  -8.5,  -7.5,  -6.5,  -5.5,  -4.5,  -3.5,  -2.5,
                    -1.5,  -0.5,   0.5,   1.5,   2.5,   3.5,   4.5,   5.5,
                     6.5,   7.5,   8.5,   9.5,  10.5,  11.5,  12.5,  13.5,
                    14.5,

In [20]:
# Formatting Latitude values

# Convert to the format ranging from -90° to 90°
lat_values_formatted = np.where(lat_new_data > 90, lat_new_data - 360, lat_new_data)

# Print the first ten values
#print(lat_values_formatted[:10])
np.max(lat_values_formatted) # 89.5
np.min(lat_values_formatted) # -89.5

-89.5

# Longitude

In [21]:
# Dimensions:     ('lon',)
# Shape:          current shape = (288,)
lon_new = ssp1_df.variables['lon'].units
lon_new_data = ssp1_df.variables['lon'][:]
lon_new
lon_new_data

masked_array(data=[  0.625,   1.875,   3.125,   4.375,   5.625,   6.875,
                     8.125,   9.375,  10.625,  11.875,  13.125,  14.375,
                    15.625,  16.875,  18.125,  19.375,  20.625,  21.875,
                    23.125,  24.375,  25.625,  26.875,  28.125,  29.375,
                    30.625,  31.875,  33.125,  34.375,  35.625,  36.875,
                    38.125,  39.375,  40.625,  41.875,  43.125,  44.375,
                    45.625,  46.875,  48.125,  49.375,  50.625,  51.875,
                    53.125,  54.375,  55.625,  56.875,  58.125,  59.375,
                    60.625,  61.875,  63.125,  64.375,  65.625,  66.875,
                    68.125,  69.375,  70.625,  71.875,  73.125,  74.375,
                    75.625,  76.875,  78.125,  79.375,  80.625,  81.875,
                    83.125,  84.375,  85.625,  86.875,  88.125,  89.375,
                    90.625,  91.875,  93.125,  94.375,  95.625,  96.875,
                    98.125,  99.375, 100.625, 101.8

In [22]:
# Formatting Longitude values

# Convert to the format ranging from -180° to 180°
lon_values_formatted = np.where(lon_new_data > 180, lon_new_data - 360, lon_new_data)

# Print the first ten values
# print(lon_new_data[:10])
np.max(lon_values_formatted) # 179.375
np.min(lon_values_formatted) # -179.375

-179.375

# Time

In [23]:
# Dimensions:     ('time',)
# Shape:          current shape = (1032,)
time_new = ssp1_df.variables['time'].units
time_new_data = ssp1_df.variables['time'][:]
time_new
time_new_data

masked_array(data=[60240.5, 60270. , 60299.5, ..., 91538.5, 91569. ,
                   91599.5],
             mask=False,
       fill_value=1e+20)

In [24]:
# Checking time

time_new_data[:10]

masked_array(data=[60240.5, 60270. , 60299.5, 60330. , 60360.5, 60391. ,
                   60421.5, 60452.5, 60483. , 60513.5],
             mask=False,
       fill_value=1e+20)

# Temperature

In [25]:
# Dimensions:     ('time', 'lat', 'lon')
# Shape:          current shape = (1032, 180, 288)
tas_new = ssp1_df.variables['tas'].units
tas_new_data = ssp1_df.variables['tas'][:]
tas_new
tas_new_data

masked_array(
  data=[[[240.85387, 240.83304, 240.8122 , ..., 240.91638, 240.89554,
          240.87471],
         [241.50543, 241.46115, 241.4168 , ..., 241.63788, 241.5938 ,
          241.54965],
         [242.27605, 242.1947 , 242.11356, ..., 242.52104, 242.43921,
          242.35754],
         ...,
         [239.44476, 239.47845, 239.51222, ..., 239.34407, 239.37756,
          239.41113],
         [238.9793 , 239.0004 , 239.02142, ..., 238.91556, 238.93686,
          238.95811],
         [238.52213, 238.5314 , 238.54068, ..., 238.49428, 238.50356,
          238.51285]],

        [[230.4097 , 230.38121, 230.3527 , ..., 230.49521, 230.4667 ,
          230.4382 ],
         [231.6247 , 231.56201, 231.49916, ..., 231.81194, 231.74966,
          231.68724],
         [232.70442, 232.6117 , 232.51886, ..., 232.98201, 232.88959,
          232.79706],
         ...,
         [240.30452, 240.3312 , 240.3578 , ..., 240.22398, 240.2509 ,
          240.27774],
         [240.08385, 240.099  , 240.

# Temperature: converting from Kelvin to Celsius

In [31]:
# tas_new_data

# Convert temperature values from Kelvin to Celsius
tas_new_celsius = tas_new_data - 273.15

In [32]:
# Printing new tas max and min values

np.max(tas_new_celsius) # 44.43277
np.min(tas_new_celsius) # -73.75969

-73.75969

In [33]:
# Printing new tas celsius
tas_new_celsius

masked_array(
  data=[[[-32.296127 , -32.316956 , -32.3378   , ..., -32.233612 ,
          -32.254456 , -32.275284 ],
         [-31.644562 , -31.688843 , -31.7332   , ..., -31.512115 ,
          -31.556198 , -31.600342 ],
         [-30.873947 , -30.955292 , -31.036438 , ..., -30.628952 ,
          -30.710785 , -30.79245  ],
         ...,
         [-33.70523  , -33.67154  , -33.63777  , ..., -33.805923 ,
          -33.77243  , -33.73886  ],
         [-34.1707   , -34.149597 , -34.12857  , ..., -34.234436 ,
          -34.213135 , -34.19188  ],
         [-34.62787  , -34.61859  , -34.609314 , ..., -34.655716 ,
          -34.64644  , -34.637146 ]],

        [[-42.740295 , -42.768784 , -42.797287 , ..., -42.654785 ,
          -42.68329  , -42.711792 ],
         [-41.5253   , -41.587982 , -41.650833 , ..., -41.33806  ,
          -41.40033  , -41.462753 ],
         [-40.44557  , -40.5383   , -40.631134 , ..., -40.167984 ,
          -40.260406 , -40.352936 ],
         ...,
         [-32.845474

# Mexico: 23.6345° N, 102.5528° W
1. Getting the squared difference to get positive values based on its coordinates
2. Getting the argmin to get the indices that correspond to the country.
3. Open the square difference array to check for the indices.
4. Look into tas shape: current shape = (1032, 180, 288)
5. Look into tas dimensions: dimensions = ('time', 'lat', 'lon')
6. Take tas_new_data and input Country's values


In [26]:
# Mexico: Latitude and Longitude

lat_mex = 23.6345
lon_mex = 102.5528

# Squarred difference: lat
sq_dif_lat_mex = (lat_values_formatted - lat_mex)**2
sq_dif_lat_mex
# Squarred difference: lon
sq_dif_lon_mex = (lon_values_formatted - lon_mex)**2
sq_dif_lon_mex

array([1.03892764e+04, 1.01360194e+04, 9.88588741e+03, 9.63888041e+03,
       9.39499841e+03, 9.15424141e+03, 8.91660941e+03, 8.68210241e+03,
       8.45072041e+03, 8.22246341e+03, 7.99733141e+03, 7.77532441e+03,
       7.55644241e+03, 7.34068541e+03, 7.12805341e+03, 6.91854641e+03,
       6.71216441e+03, 6.50890741e+03, 6.30877541e+03, 6.11176841e+03,
       5.91788641e+03, 5.72712941e+03, 5.53949741e+03, 5.35499041e+03,
       5.17360841e+03, 4.99535141e+03, 4.82021941e+03, 4.64821241e+03,
       4.47933041e+03, 4.31357341e+03, 4.15094141e+03, 3.99143441e+03,
       3.83505241e+03, 3.68179541e+03, 3.53166341e+03, 3.38465641e+03,
       3.24077441e+03, 3.10001741e+03, 2.96238541e+03, 2.82787841e+03,
       2.69649641e+03, 2.56823941e+03, 2.44310741e+03, 2.32110041e+03,
       2.20221841e+03, 2.08646141e+03, 1.97382941e+03, 1.86432241e+03,
       1.75794041e+03, 1.65468341e+03, 1.55455141e+03, 1.45754441e+03,
       1.36366241e+03, 1.27290541e+03, 1.18527341e+03, 1.10076641e+03,
      

In [27]:
# Getting the indices from the squared difference
# This will get the closest matching point to the actual lat and lon

min_lat_mex = sq_dif_lat_mex.argmin() # 113
min_lat_mex
min_lon_mex = sq_dif_lon_mex.argmin() # 82
min_lon_mex

82

In [28]:
# Open the square difference array to check for the indices

sq_dif_lat_mex[113] # 0.018090249999999777

# Open the square difference array to check for the indices

sq_dif_lon_mex[82] # 0.32741283999999443

0.32741283999999443

In [29]:
# tas_new
  # current shape = (1032, 180, 288)
  # Dimensions  ('time', 'lat', 'lon')

In [34]:
# tas_new_celsius example in time 0 for Mexico (113, 82)
tas_new_celsius[0,113,82]

7.393738

# Extracting all values for Mexico

In [39]:
# # Defining the reference date
# reference_date = datetime.datetime(1850, 1, 1)

# # Creating an empty list to store the converted dates
# converted_dates = []

# # For loop: dates
# for dates in ssp1_df.variables['time']:
#     # Geting the actual value
#     dates = dates.item()
#     # Calculating date by adding the number of days to the reference date
#     date = reference_date + datetime.timedelta(days = dates)
#     # Extracting string: "YYYY-MM-DD"
#     date_str = date.strftime('%Y-%m-%d')
#     # Appending
#     converted_dates.append(date_str)

# # Converting to pandas Dataframe
# dates_df = pd.DataFrame({'Date': converted_dates})

# # Printing DataFrame
# dates_df.head()   # 2014-12-07
# # dates_df.tail() # 2100-10-16

Unnamed: 0,Date
0,2014-12-07
1,2015-01-06
2,2015-02-04
3,2015-03-07
4,2015-04-06


In [40]:
# # Creating column for storing temperature values
# dates_df['Temperature Celsius'] = 0
# mex_df = dates_df

# # Arranging temperatures into the dataframe
# dt = np.arange(0, ssp1_df.variables['time'].size)

# for time_index in dt:
#   temperature = tas_new_celsius[time_index, min_lat_mex, min_lon_mex]
#   mex_df.at[time_index, 'Temperature Celsius'] = temperature

# # Printing Dataframe
# mex_df.head()

Unnamed: 0,Date,Temperature Celsius
0,2014-12-07,7.393738
1,2015-01-06,8.372314
2,2015-02-04,12.221893
3,2015-03-07,15.784515
4,2015-04-06,17.13916


In [41]:
# # Exporting Dataframe to a CSV file
# mex_df.to_csv("Temperature fluctuations in Mexico: Scenario: SSP1 - RCP 2.6.csv")

In [42]:
# import numpy as np
# import pandas as pd
# import datetime

# def create_temperature_dataframe(lat, lon, tas_new_celsius, ssp1_df):
#     # Squarred difference: lat
#     sq_dif_lat = (lat_values_formatted - lat)**2
#     # Squarred difference: lon
#     sq_dif_lon = (lon_values_formatted - lon)**2

#     # Getting the indices from the squared difference
#     min_lat_idx = sq_dif_lat.argmin()
#     min_lon_idx = sq_dif_lon.argmin()

#     # Defining the reference date
#     reference_date = datetime.datetime(1850, 1, 1)

#     # Creating an empty list to store the converted dates
#     converted_dates = []

#     # For loop: dates
#     for dates in ssp1_df.variables['time']:
#         # Getting the actual value
#         dates = dates.item()
#         # Calculating date by adding the number of days to the reference date
#         date = reference_date + datetime.timedelta(days=dates)
#         # Extracting string: "YYYY-MM-DD"
#         date_str = date.strftime('%Y-%m-%d')
#         # Appending
#         converted_dates.append(date_str)

#     # Converting to pandas DataFrame
#     dates_df = pd.DataFrame({'Date': converted_dates})

#     # Creating column for storing temperature values
#     dates_df['Temperature Celsius'] = 0

#     # Arranging temperatures into the DataFrame
#     dt = np.arange(0, ssp1_df.variables['time'].size)

#     for time_index in dt:
#         temperature = tas_new_celsius[time_index, min_lat_idx, min_lon_idx]
#         dates_df.at[time_index, 'Temperature Celsius'] = temperature

#     return dates_df

# # Usage example:
# lat_mex = 23.6345
# lon_mex = 102.5528
# mex_df = create_temperature_dataframe(lat_mex, lon_mex, tas_new_celsius, ssp1_df)
# import numpy as np
# import pandas as pd
# import datetime

# def create_temperature_dataframe(lat, lon, tas_new_celsius, ssp1_df):
#     # Squarred difference: lat
#     sq_dif_lat = (lat_values_formatted - lat)**2
#     # Squarred difference: lon
#     sq_dif_lon = (lon_values_formatted - lon)**2

#     # Getting the indices from the squared difference
#     min_lat_idx = sq_dif_lat.argmin()
#     min_lon_idx = sq_dif_lon.argmin()

#     # Defining the reference date
#     reference_date = datetime.datetime(1850, 1, 1)

#     # Creating an empty list to store the converted dates
#     converted_dates = []

#     # For loop: dates
#     for dates in ssp1_df.variables['time']:
#         # Getting the actual value
#         dates = dates.item()
#         # Calculating date by adding the number of days to the reference date
#         date = reference_date + datetime.timedelta(days=dates)
#         # Extracting string: "YYYY-MM-DD"
#         date_str = date.strftime('%Y-%m-%d')
#         # Appending
#         converted_dates.append(date_str)

#     # Converting to pandas DataFrame
#     dates_df = pd.DataFrame({'Date': converted_dates})

#     # Creating column for storing temperature values
#     dates_df['Temperature Celsius'] = 0

#     # Arranging temperatures into the DataFrame
#     dt = np.arange(0, ssp1_df.variables['time'].size)

#     for time_index in dt:
#         temperature = tas_new_celsius[time_index, min_lat_idx, min_lon_idx]
#         dates_df.at[time_index, 'Temperature Celsius'] = temperature

#     return dates_df

# # Usage example:
# lat_mex = 23.6345
# lon_mex = 102.5528
# mex_df = create_temperature_dataframe(lat_mex, lon_mex, tas_new_celsius, ssp1_df)


In [43]:
# mex_df

Unnamed: 0,Date,Temperature Celsius
0,2014-12-07,7.393738
1,2015-01-06,8.372314
2,2015-02-04,12.221893
3,2015-03-07,15.784515
4,2015-04-06,17.139160
...,...,...
1027,2100-06-16,21.033386
1028,2100-07-17,18.717957
1029,2100-08-16,14.453491
1030,2100-09-16,13.986450


# Function to get all four Countries CSV files

In [47]:
import os
import numpy as np
import pandas as pd
import datetime

def get_temperature_data(lat, lon, tas_new_celsius, lat_values_formatted, lon_values_formatted, ssp1_df, territory_name):
    # Squarred difference: lat
    sq_dif_lat = (lat_values_formatted - lat)**2
    # Squarred difference: lon
    sq_dif_lon = (lon_values_formatted - lon)**2

    # Getting the indices from the squared difference
    min_lat_idx = sq_dif_lat.argmin()
    min_lon_idx = sq_dif_lon.argmin()

    # Defining the reference date
    reference_date = datetime.datetime(1850, 1, 1)

    # Creating an empty list to store the converted dates
    converted_dates = []

    # For loop: dates
    for dates in ssp1_df.variables['time']:
        # Geting the actual value
        dates = dates.item()
        # Calculating date by adding the number of days to the reference date
        date = reference_date + datetime.timedelta(days=dates)
        # Extracting string: "YYYY-MM-DD"
        date_str = date.strftime('%Y-%m-%d')
        # Appending
        converted_dates.append(date_str)

    # Converting to pandas Dataframe
    dates_df = pd.DataFrame({'Date': converted_dates})

    # Creating column for storing temperature values
    dates_df['Temperature Celsius'] = 0

    # Arranging temperatures into the dataframe
    dt = np.arange(0, ssp1_df.variables['time'].size)
    for time_index in dt:
        temperature = tas_new_celsius[time_index, min_lat_idx, min_lon_idx]
        dates_df.at[time_index, 'Temperature Celsius'] = temperature

    # Create folder if it doesn't exist
    folder_name = "CSVs: Scenario: SSP119 - RCP 1.9"
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    # Exporting Dataframe to a CSV file in the folder
    file_name = f"{territory_name}_Temperature_{folder_name}.csv"
    file_path = os.path.join(folder_name, file_name)
    dates_df.to_csv(file_path)

    return dates_df

# EL Salvador: 13.7942° N, 88.8965° W
el_salvador_df = get_temperature_data(13.7942, -88.8965, tas_new_celsius, lat_values_formatted, lon_values_formatted, ssp1_df, "El_Salvador")

# Guatemala: 15.7835° N, 90.2308° W
guatemala_df = get_temperature_data(15.7835, -90.2308, tas_new_celsius, lat_values_formatted, lon_values_formatted, ssp1_df, "Guatemala")

# Honduras: 15.2000° N, 86.2419° W
honduras_df = get_temperature_data(15.2, -86.2419, tas_new_celsius, lat_values_formatted, lon_values_formatted, ssp1_df, "Honduras")

# Mexico: 23.6345° N, 102.5528° W
mexico_df = get_temperature_data(23.6345, -102.5528, tas_new_celsius, lat_values_formatted, lon_values_formatted, ssp1_df, "Mexico")
