In [1]:
#Importing necessary libraries
import xarray as xr #helps work with NetCDF files
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import plotly.graph_objects as go
from scipy.stats import linregress

In [None]:
#Getting the NetCDF file with temperature values of Nepal from 1940 to 2024
ds = xr.open_dataset(r"temp_data_1940_2024.nc") ##File about 3GB so not uploaded to github
print("Dataset:\n")
print(ds)

#Checking the variable
variable_names = list(ds.data_vars)
print("\nVariables in the dataset:", variable_names)

Dataset:

<xarray.Dataset> Size: 3GB
Dimensions:  (time: 745400, lat: 21, lon: 41)
Coordinates:
  * time     (time) datetime64[ns] 6MB 1940-01-01 ... 2024-12-31T23:00:00
  * lon      (lon) float64 328B 79.0 79.25 79.5 79.75 ... 88.25 88.5 88.75 89.0
  * lat      (lat) float64 168B 31.0 30.75 30.5 30.25 ... 26.75 26.5 26.25 26.0
Data variables:
    2t       (time, lat, lon) float32 3GB ...
Attributes:
    CDI:          Climate Data Interface version 2.4.0 (https://mpimet.mpg.de...
    Conventions:  CF-1.6
    institution:  European Centre for Medium-Range Weather Forecasts
    history:      Sat Aug 16 14:14:44 2025: cdo mergetime temp_data_1940.nc t...
    CDO:          Climate Data Operators version 2.4.0 (https://mpimet.mpg.de...

Variables in the dataset: ['2t']


In [3]:
# Removing conflicting values if they exist (_FillValue and missing_value)
ds.attrs.pop('_FillValue', None) 
ds.attrs.pop('missing_value', None) 

# Setting the data with consistent _FillValue and missing_value (will be removed later)
encoding = {'2t': {'_FillValue': -99.9,'missing_value': -99.9}}


In [4]:
dataset = ds.copy()

# Changing the time to datetime format and extracting just the temperature values (saved as 2t for 2m Temperature)
dataset['time'] = pd.to_datetime(dataset['time'].values) 
time_data = dataset['2t']

time_data

In [5]:
#Removing all -99.9 values from earlier
time_data_masked = time_data.where(time_data != -99.9)  

#Averaging the values for each gridded area giving us avgerage temp values for each grid
area_avg_time = time_data_masked.mean(dim=['lat', 'lon']) 

#Converting avg temp values to DataFrame
df_area_avg = area_avg_time.to_dataframe(name='area_avg_temp').reset_index()
df_area_avg = df_area_avg.dropna(subset=['area_avg_temp'])
df_area_avg

Unnamed: 0,time,area_avg_temp
0,1940-01-01 00:00:00,267.596619
1,1940-01-01 01:00:00,267.288513
2,1940-01-01 02:00:00,268.300842
3,1940-01-01 03:00:00,272.103973
4,1940-01-01 04:00:00,275.582275
...,...,...
745395,2024-12-31 19:00:00,271.555908
745396,2024-12-31 20:00:00,270.853851
745397,2024-12-31 21:00:00,270.584167
745398,2024-12-31 22:00:00,270.584717


In [6]:
#Getting the year and month columns from the date column
df_area_avg['year'] = df_area_avg['time'].dt.year
df_area_avg['month'] = df_area_avg['time'].dt.month

# #Getting the avg temp for each month of each year
df_monthly_temp = df_area_avg.groupby(['year', 'month'])['area_avg_temp'].mean().reset_index()
df_monthly_temp.rename(columns={'area_avg_temp': 'monthly_temperature_K'}, inplace=True)

# #Getting temperatures in C as well
df_monthly_temp['monthly_temperature_C'] = df_monthly_temp['monthly_temperature_K'] - 273.15
df_monthly_temp

Unnamed: 0,year,month,monthly_temperature_K,monthly_temperature_C
0,1940,1,272.830383,-0.319611
1,1940,2,272.948334,-0.201660
2,1940,3,275.880951,2.730957
3,1940,4,282.839996,9.690002
4,1940,5,287.461823,14.311829
...,...,...,...,...
1015,2024,8,291.154724,18.004730
1016,2024,9,289.970917,16.820923
1017,2024,10,286.332153,13.182159
1018,2024,11,281.473572,8.323578


In [None]:
#Plotting a graph that shows the temperatures for each month from 1940 to 2024

#Starting by initializing plotly figure to add parts to
fig = go.Figure()

month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun','Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] # To replace the numbers 1-12

#Lines for older years and making them all grey (1940–2019)
prev_years = list(range(1940, 2020))
for i, year in enumerate(sorted(prev_years)):
    #For each year, getting the 12 month temperatures
    yearly_data = df_monthly_temp[df_monthly_temp['year'] == year]
    #Getting actual months instead of numbers from 1-12
    months = [month_names[m-1] for m in yearly_data['month']]
    #Getting the temperature values (C) for selected month
    temperature = yearly_data['monthly_temperature_C']
    
    #Since all years from 1940 to 2019 have the same properties, adding only one legend entry for 1940–2019
    show_legend = True if i == 0 else False
    fig.add_trace(go.Scatter(
        x = months, y = temperature,
        mode = 'lines',
        line = dict(color = 'grey', width = 1.5),
        opacity = 0.3,
        name = "1940 – 2019" if show_legend else None,
        showlegend=show_legend,
        hovertemplate=
            '<b>Year:</b> %{text}<br>' +
            '<b>Month:</b> %{x}<br>' +
            '<b>Temp:</b> %{y:.2f} °C<extra></extra>',
        text=[str(year)] * len(months) 
    ))

#Lines for more recent years and making them different colors and more bolder (2020–2023, dashed thick lines)
new_years = [2020, 2021, 2022, 2023]
new_colors = {2020: "red", 2021: "blue", 2022: "green", 2023: "orange"}
for year in new_years:
    yearly_data = df_monthly_temp[df_monthly_temp['year'] == year]
    months = [month_names[m-1] for m in yearly_data['month']]
    temperature = yearly_data['monthly_temperature_C']
    
    fig.add_trace(go.Scatter(
        x = months, y = temperature,
        mode = 'lines',
        line = dict(dash = 'dash', width = 5, color = new_colors[year]),
        opacity = 0.7,
        name = str(year),
        hovertemplate=
            '<b>Year:</b> %{text}<br>' +
            '<b>Month:</b> %{x}<br>' +
            '<b>Temp:</b> %{y:.2f} °C<extra></extra>',
        text=[str(year)] * len(months) 
    ))

#Line for most recent year 2024, making it bold and black
year = 2024
yearly_data = df_monthly_temp[df_monthly_temp['year'] == year]
months = [month_names[m-1] for m in yearly_data['month']]
temperature = yearly_data['monthly_temperature_C']

fig.add_trace(go.Scatter(
    x = months, y = temperature,
    mode = 'lines',
    line = dict(dash = 'longdash', width = 8, color = 'black'),
    opacity = 0.9,
    name = str(year),
    hovertemplate=
            '<b>Year:</b> %{text}<br>' +
            '<b>Month:</b> %{x}<br>' +
            '<b>Temp:</b> %{y:.2f} °C<extra></extra>',
        text=[str(year)] * len(months) 
))

#Overall layout for the graph
fig.update_layout(
    title="Monthly Temperature of Nepal (1940 to 2024)",
    title_font=dict(size=24, family="Calibri", color="black"),  
    xaxis=dict(
        title="Month",
        title_font=dict(size=22, family="Calibri", color="black"), 
        tickfont=dict(size=18, family="Calibri", color="black")    
    ),
    yaxis=dict(
        title="Temperature (°C)",
        title_font=dict(size=22, family="Calibri", color="black"),
        tickfont=dict(size=18, family="Calibri", color="black")     
    ),
    legend=dict(
        title="Year",
        title_font=dict(size=18, family="Calibri", color="black"), 
        font=dict(size=16, family="Calibri", color="black")       
    ),
    template="plotly_white",
    width=1600, height=800
)

fig.show()


In [9]:
# df_monthly_temp.to_csv(r'year_monthly_temp_1940_2024.csv', index = False)

In [10]:
#Getting a datetime column for each year and month and their avg temps
df_monthly_temp['date'] = df_monthly_temp['year'].astype(str) + '-' + df_monthly_temp['month'].astype(str) + '-01'
df_monthly_temp['date'] = pd.to_datetime(df_monthly_temp['date'], format = '%Y-%m-%d')

#Getting a decade column (will be the start of the decade)
df_monthly_temp['decade'] = (df_monthly_temp['date'].dt.year // 10) * 10

#Getting the decade average temperatures in a new table
decade_avg = df_monthly_temp.groupby('decade')['monthly_temperature_C'].mean().reset_index()
decade_avg.rename(columns={'monthly_temperature_C':'avg_decade_temp_C'}, inplace = True)

# Create a "date" for the middle of the decade to plot the line smoothly
decade_avg['decade_date'] = pd.to_datetime(decade_avg['decade'].astype(str) + "-01-01")

decade_avg

Unnamed: 0,decade,avg_decade_temp_C,decade_date
0,1940,9.130596,1940-01-01
1,1950,9.208232,1950-01-01
2,1960,9.603133,1960-01-01
3,1970,9.961705,1970-01-01
4,1980,10.021585,1980-01-01
5,1990,10.187354,1990-01-01
6,2000,10.510238,2000-01-01
7,2010,10.653485,2010-01-01
8,2020,10.957559,2020-01-01


In [None]:
#Plotting a graph that shows the temperatures for each month from 1940 to 2024 in a single time series along with decade averages

fig = go.Figure()

#Getting date and temperature to plot the time series
temp = df_monthly_temp['monthly_temperature_C']
years = df_monthly_temp['date']

#Plotting the temperatures from 1940 to 2024
fig.add_trace(go.Scatter(
    x = years, y = temp,
    mode = 'lines',
    line = dict( width = 1, color = 'black'),
    opacity = 0.9,
    name = 'Temperature',
    hovertemplate=
            '<b>Date:</b> %{x}<br>' +
            '<b>Temperature:</b> %{y:.2f} °C<extra></extra>'
))

#Plotting straight lines for each decade averages

#Setting different colors for each decade average
decade_colors = {1940: "#2d8dd2", 1950: "#2ca02c", 1960: "#9467bd", 1970: "#17becf", 1980:"#5b37ea", 1990:"#9e6155", 2000:"#ecca0a", 2010:"#91fe5f", 2020:'#d62728', }  

#Plotting a straight line for each decade average temperature and making them each a different color
for _, row in decade_avg.iterrows():
    start_year = pd.to_datetime(f"{row['decade']}-01-01") #Start of the decade
    end_year = pd.to_datetime(f"{row['decade']+9}-12-31") #End of the decade
    fig.add_trace(go.Scatter(
        x = [start_year, end_year],
        y = [row['avg_decade_temp_C']] * 2,
        mode = 'lines',
        line = dict(width = 3, color = decade_colors[row['decade']]),
        name = f"{int(row['decade'])}s avg",
        hovertemplate = 
        f"<b>Decade:</b> {int(row['decade'])}s<br><b>Avg Temperature:</b> {row['avg_decade_temp_C']:.2f} °C<extra></extra>"
    ))

#Overall layout for the graph
fig.update_layout(
    title = "Temperature of Nepal (1940 to 2024)",
    title_font = dict(size = 24, family = "Calibri", color = "black"),  
    xaxis = dict(
        title = "Year",
        title_font = dict(size = 22, family = "Calibri", color = "black"),  
        tickfont = dict(size = 18, family = "Calibri", color = "black")
    ),
    yaxis = dict(
        title = "Temperature (°C)",
        title_font = dict(size = 22, family = "Calibri", color = "black"), 
        tickfont = dict(size = 18, family = "Calibri", color = "black")     
    ),
    legend = dict(
        title = "Legend",
        orientation = "h",
        yanchor = "bottom",
        y = 0.01,
        xanchor = "right",
        x = 0.999,
        title_font = dict(size = 18, family = "Calibri", color = "black"), 
        font = dict(size = 16, family = "Calibri", color = "black"),       
    bordercolor = "Black",
    borderwidth = 2
    ),
    template = "plotly_white",
    width = 1600, height = 500
)

fig.show()

In [12]:
#Getting average annual temperatures (in K and C)

df_annual_temp = df_monthly_temp.groupby('year')['monthly_temperature_C'].mean().reset_index()
df_annual_temp.rename(columns={'monthly_temperature_C': 'annual_temperature_C'}, inplace=True)
# df_annual_temp['annual_temperature_C'] = df_annual_temp['annual_temperature_K'] - 273.15

df_annual_temp

Unnamed: 0,year,annual_temperature_C
0,1940,8.745097
1,1941,8.953671
2,1942,8.888148
3,1943,8.943669
4,1944,9.542346
...,...,...
80,2020,10.323499
81,2021,10.918447
82,2022,11.137062
83,2023,10.999980


In [13]:
#Getting the x and y variables for regression
temp = df_annual_temp['annual_temperature_C']
years = df_annual_temp['year']

#Using linear regression to get required values
slope, intercept, r_value, p_value, std_err = linregress(years, temp)

#Getting the regression line using given variables
trend_line = slope * years + intercept

In [None]:
#Plotting a graph that shows the annual temperatures from 1940 to 2024 

fig = go.Figure()

#Getting date and temperature to plot the time series
temp = df_annual_temp['annual_temperature_C']
years = df_annual_temp['year']

#Plotting the temperatures from 1940 to 2024
fig.add_trace(go.Scatter(
    x = years, y = temp,
    mode = 'lines+markers',
    line = dict(width = 4, color = 'red'),
    marker=dict(
            color='white',
            size=11,
            line=dict(
                color='red',
                width=2
            )
        ),
    opacity = 0.9,
    name = 'Temperature',
    hovertemplate=
            '<b>Date:</b> %{x}<br>' +
            '<b>Temperature:</b> %{y:.2f} °C<extra></extra>'
))

#Plotting the trend line
fig.add_trace(go.Scatter(
    x = years, y = trend_line,
    mode = 'lines',
    line = dict(width = 3, color = 'black', dash = 'dash'),
    opacity = 0.7,
    name = 'trend line',
    hovertemplate=
            '<b>Date:</b> %{x}<br>'
            '<b>Predicted Temperature:</b> %{y:.2f} °C<extra></extra>'
))


#Overall layout for the graph
fig.update_layout(
    title = "Annual Average Temperature of Nepal (1940 to 2024) with Trend Line",
    title_font = dict(size = 24, family = "Calibri", color = "black"),  
    xaxis = dict(
        title = "Year",
        title_font = dict(size = 22, family = "Calibri", color = "black"),  
        tickfont = dict(size = 18, family = "Calibri", color = "black")
    ),
    yaxis = dict(
        title = "Temperature (°C)",
        title_font = dict(size = 22, family = "Calibri", color = "black"), 
        tickfont = dict(size = 18, family = "Calibri", color = "black")     
    ),
    legend = dict(
        # title = "Legend",
        orientation = "h",
        yanchor = "bottom",
        y = 0.01,
        xanchor = "right",
        x = 0.999,
        title_font = dict(size = 18, family = "Calibri", color = "black"), 
        font = dict(size = 16, family = "Calibri", color = "black"),       
    bordercolor = "Black",
    borderwidth = 2
    ),
    template = "plotly_white",
    width = 1600, height = 600
)

#Text to be added for trend line
trend_line_text = (
    "<b>Trend line variables:</b><br>"
    f"Slope: {slope:.5f} °C/year<br>"
    f"R²: {r_value**2:.3f}<br>"
    f"p-value: {p_value:.3e}<br>"
)

#Adding trend line variables as a box in the figure
fig.add_annotation(
    xref="paper", yref="paper", 
    x=0.01, y=0.95,             
    text=trend_line_text,
    showarrow=False,
    bordercolor="black",
    borderwidth=1,
    borderpad=5,
    bgcolor="white",
    opacity=0.9,
    font=dict(family="Calibri", size=14, color="black")
)

fig.show()