## Comparative analysis of Nanjing Climate in 1969, 1996, 2023

In [4]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')  # Set before importing pyplot for GUI-based display
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# Read all NetCDF files (2021)
file_path = '2021_nanjing.nc'
ds = xr.open_dataset(file_path)

In [5]:
# Assign the proper time coordinate
ds = ds.rename({'valid_time': 'time'})

# Extract the temperature variable and convert from Kelvin to Celsius
temp_c = ds['t2m'] - 273.15

# Group by month and calculate the mean
monthly_mean = temp_c.groupby('time.month').mean(dim=['time', 'latitude', 'longitude'])

# Annual mean across all time, latitude, longitude
annual_mean = temp_c.mean(dim=['time', 'latitude', 'longitude'])

# Output results
print("Monthly Mean Temperatures (°C):")
for month, value in enumerate(monthly_mean.values, start=1):
    print(f"Month {month:02d}: {value:.2f}°C")

print(f"\nAnnual Mean Temperature (°C): {annual_mean.values:.2f}")

Monthly Mean Temperatures (°C):
Month 01: 4.18°C
Month 02: 9.71°C
Month 03: 11.72°C
Month 04: 16.04°C
Month 05: 22.10°C
Month 06: 26.41°C
Month 07: 28.05°C
Month 08: 27.36°C
Month 09: 25.81°C
Month 10: 18.35°C
Month 11: 11.94°C
Month 12: 6.16°C

Annual Mean Temperature (°C): 17.35


## Annual Temperature of Nanjing

In [6]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt

# Load the NetCDF file
file_path = '2021_nanjing.nc' # Change path if needed
ds = xr.open_dataset(file_path)

# Select subset for region around Nanjing: 33°N to 32°N, 117°E to 119°E
subset = ds['t2m'].sel(latitude=slice(33.0, 32.0), longitude=slice(117.0, 119.0))

# Calculate mean temperature over time
mean_temp = subset.mean(dim='valid_time')

# Create meshgrid for plotting
lon, lat = np.meshgrid(mean_temp.longitude.values, mean_temp.latitude.values)
temp_values = mean_temp.values # in Kelvin

In [7]:
# Plot 2D contour map
plt.figure(figsize=(10, 6))
contour = plt.contourf(lon, lat, temp_values, cmap='coolwarm')
plt.title('Mean Temperature (K) - Nanjing Region (2021)', fontsize=14)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
cbar = plt.colorbar(contour)
cbar.set_label('Temperature (K)')
plt.grid(True)
plt.show()

# Mean Temperature Analysis of Nanjing Region (2021)

## Discussion and Analysis:

### 1. Temperature Variation Across Latitudes and Longitudes:
- The temperature in the Nanjing region shows clear gradients across both latitude and longitude.
- The map demonstrates that temperature increases as the longitude moves from west (left side of the map) to east (right side). A noticeable trend is that the temperature at higher latitudes (towards the top of the map) tends to be cooler, with blues indicating lower temperatures.
- The transition from cooler (blue) to warmer (red) temperatures appears to follow the latitudinal and longitudinal shifts, where the lower latitudes and more easterly longitudes experience higher temperatures.

### 2. Hotspots and Coldspots:
- **Coldspots**: The cooler areas (depicted in blue) are concentrated towards the southwestern portion (near latitude 32.0, longitude 117.0), with temperatures dipping close to 289.75K.
- **Hotspots**: The warmer regions (in red) are found near the northeastern corner (latitude 32.5, longitude 118.75), showing temperatures reaching 290.65K.

### 3. Implications of Temperature Distribution:
- The temperature variation can provide insights into the climatic conditions across the region. Higher temperatures in the northeastern part of the map could be attributed to urban heat islands, geographical elevation, or human activities in the area that lead to a slight rise in temperature.
- Conversely, cooler temperatures in the southwestern regions may reflect geographical factors like proximity to water bodies, elevation, or less dense urbanization.

## Conclusion:
- The heatmap effectively captures the spatial variations in temperature across the Nanjing region, providing valuable insights into the regional climate for 2021.
- Understanding temperature distribution is crucial for urban planning, agricultural predictions, and energy demand forecasting, particularly in areas where climate extremes may impact daily life.
- Future studies could explore seasonal variations and longer-term trends to better assess the region’s climate patterns and predict future shifts due to global warming or local environmental changes.

This analysis gives a clear picture of how temperature varies spatially within the Nanjing region and offers actionable insights for various sectors.

In [8]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import calendar

# Load dataset
ds = xr.open_dataset('2021_nanjing.nc')

In [9]:
# Extract temperature variable (Kelvin)
temp_k = ds['t2m']

# Select closest point to Nanjing (32N, 118E)
temp_nanjing = temp_k.sel(latitude=32, longitude=118.0, method="nearest")

# Convert to Celsius
temp_c = temp_nanjing - 273.15

# Resample to monthly average (using 'ME' instead of deprecated 'M')
temp_monthly = temp_c.resample(valid_time='ME').mean()

# Convert to pandas Series for easier handling
temp_series = temp_monthly.to_series()

# Calculate annual mean temperature
annual_mean = temp_c.mean().values

# Create figure with two subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), gridspec_kw={'height_ratios': [3, 1]})

# Monthly temperature plot (top panel)
month_names = [calendar.month_abbr[m] for m in temp_series.index.month]
bars = ax1.bar(month_names, temp_series.values, color='skyblue', edgecolor='navy')

# Add data labels
for bar in bars:
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height,
             f'{height:.1f}°C',
             ha='center', va='bottom')

ax1.set_title('Monthly Average Temperature in Nanjing (2021)', fontsize=14)
ax1.set_ylabel('Temperature (°C)')
ax1.grid(axis='y', linestyle='--', alpha=0.7)

# Annual mean temperature visualization (bottom panel)
ax2.barh(['Annual Mean'], [annual_mean], color='salmon', edgecolor='darkred')
ax2.text(annual_mean, 0, f' {annual_mean:.2f}°C',
         va='center', ha='left', fontsize=12, color='darkred')
ax2.set_xlim([temp_series.min()-2, temp_series.max()+2])  # Match x-axis with top plot
ax2.set_xticks([])
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.spines['bottom'].set_visible(False)

# Add horizontal line showing annual mean to top plot
ax1.axhline(y=annual_mean, color='red', linestyle='--',
             label=f'Annual Mean: {annual_mean:.2f}°C')
ax1.legend(loc='upper right')

# Show plot
plt.tight_layout()
plt.show()


In [1]:
ds_1996 = xr.open_dataset(nc_file_1996)
ds_1969 = xr.open_dataset(nc_file_1969)
ds_2023 = xr.open_dataset(nc_file_2023)

# Function to extract Nanjing temperature data
def extract_nanjing_temp(ds):
    # Nanjing coordinates: 32.06°N, 118.79°E
    lat_idx = np.argmin(np.abs(ds.latitude.values - 32.06))
    lon_idx = np.argmin(np.abs(ds.longitude.values - 118.79))
    nanjing_temp = ds.t2m.isel(latitude=lat_idx, longitude=lon_idx)

    # Convert to DataFrame and process
    df = nanjing_temp.to_dataframe().reset_index()
    df = df.set_index('valid_time')
    df = df[['t2m']]
    df.columns = ['temperature']
    df['temperature'] = df['temperature'] - 273.15  # Kelvin to Celsius
    return df

# Extract data for all three years (1996, 1969, 2023)
df_1996 = extract_nanjing_temp(ds_1996)
df_1969 = extract_nanjing_temp(ds_1969)
df_2023 = extract_nanjing_temp(ds_2023)

# Add year column for visualization
df_1996['year'] = 1996
df_1969['year'] = 1969
df_2023['year'] = 2023

# Create a common time index (day of year)
df_1996['day_of_year'] = df_1996.index.dayofyear
df_1969['day_of_year'] = df_1969.index.dayofyear
df_2023['day_of_year'] = df_2023.index.dayofyear

# Plot all three years together
plt.figure(figsize=(15, 6))

# Plot temperature fluctuation for 1996 (blue)
plt.plot(df_1996['day_of_year'], df_1996['temperature'], label='1996', color='blue', alpha=0.7)

# Plot temperature fluctuation for 1969 (red)
plt.plot(df_1969['day_of_year'], df_1969['temperature'], label='1969', color='red', alpha=0.7)

# Plot temperature fluctuation for 2023 (green)
plt.plot(df_2023['day_of_year'], df_2023['temperature'], label='2023', color='green', alpha=0.7)

# Adding titles and labels
plt.title('Nanjing Temperature Comparison: 1969, 1996, and 2023')
plt.xlabel('Day of Year')
plt.ylabel('Temperature (°C)')
plt.legend()
plt.grid(True)

# Add month labels to x-axis
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
month_starts = [1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335]

# Set x-ticks to show the start of each month
plt.xticks(month_starts, months, rotation=45)

([<matplotlib.axis.XTick at 0x1f9a53816d0>,
  <matplotlib.axis.XTick at 0x1f9a3f5eb10>,
  <matplotlib.axis.XTick at 0x1f9a55c4690>,
  <matplotlib.axis.XTick at 0x1f9a55c5d90>,
  <matplotlib.axis.XTick at 0x1f9a55cd190>,
  <matplotlib.axis.XTick at 0x1f9a55cfa10>,
  <matplotlib.axis.XTick at 0x1f9a55d6210>,
  <matplotlib.axis.XTick at 0x1f9a55d8b10>,
  <matplotlib.axis.XTick at 0x1f9a55d9850>,
  <matplotlib.axis.XTick at 0x1f9a55dbfd0>,
  <matplotlib.axis.XTick at 0x1f9a55de810>,
  <matplotlib.axis.XTick at 0x1f9a55e4fd0>],
 [Text(1, 0, 'Jan'),
  Text(32, 0, 'Feb'),
  Text(60, 0, 'Mar'),
  Text(91, 0, 'Apr'),
  Text(121, 0, 'May'),
  Text(152, 0, 'Jun'),
  Text(182, 0, 'Jul'),
  Text(213, 0, 'Aug'),
  Text(244, 0, 'Sep'),
  Text(274, 0, 'Oct'),
  Text(305, 0, 'Nov'),
  Text(335, 0, 'Dec')])

In [2]:
# Adjust the x-axis tick intervals for better spacing
from matplotlib.ticker import MaxNLocator

# Set x-axis locator to have fewer ticks and wider spacing (e.g., every 30 days or based on data range)
plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True, prune='both'))

# Ensure the plot layout is tight and labels are displayed properly
plt.tight_layout()

# Show the plot using Matplotlib
plt.show()

In [None]:
# Now proceed with LSTM modeling for one of the years (1996 in this case)
# We'll use df_1996 for the LSTM model as in the original code

# Preprocess the data: normalize using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df_1996[['temperature']])

# Function to create sequences for time series prediction
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:(i + seq_length), 0])
        y.append(data[i + seq_length, 0])
    return np.array(X), np.array(y)

# Create sequences with length 3 for prediction
seq_length = 3  # Use 3 time steps to predict the next one
X, y = create_sequences(scaled_data, seq_length)

# Split the data into training and testing sets
# Use the last 10 days (240 hours) as the test set
train_size = len(X) - 240
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Convert numpy arrays to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test)

# Create DataLoader for batch processing during training
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Define the LSTM model (same as before)
class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=50, num_layers=1, output_size=1):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :])
        return out

# Initialize the model, loss function, and optimizer
model = LSTM(input_size=1, hidden_size=50, num_layers=1, output_size=1)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    for batch_X, batch_y in train_loader:
        outputs = model(batch_X.unsqueeze(2))
        loss = criterion(outputs, batch_y.unsqueeze(1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Make predictions
model.eval()
with torch.no_grad():
    train_predict = model(X_train.unsqueeze(2)).squeeze().numpy()
    test_predict = model(X_test.unsqueeze(2)).squeeze().numpy()

# Inverse transform the predictions and actual values
train_predict = scaler.inverse_transform(train_predict.reshape(-1, 1))
y_train = scaler.inverse_transform(y_train.numpy().reshape(-1, 1))
test_predict = scaler.inverse_transform(test_predict.reshape(-1, 1))
y_test = scaler.inverse_transform(y_test.numpy().reshape(-1, 1))

# Calculate Root Mean Squared Error (RMSE)
train_rmse = np.sqrt(np.mean((train_predict - y_train) ** 2))
test_rmse = np.sqrt(np.mean((test_predict - y_test) ** 2))
print(f'Train RMSE: {train_rmse:.2f}')
print(f'Test RMSE: {test_rmse:.2f}')

# Visualize the LSTM results for 1996
plt.figure(figsize=(15, 6))
plt.plot(df_1996.index[-240:], y_test, label='Actual')
plt.plot(df_1996.index[-240:], test_predict, label='Predicted')
plt.title('LSTM Model: Actual vs Predicted Temperature for Last 10 Days (1996)')
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.legend()
plt.grid(True)
plt.show()