# Climate Change Data Analysis Based On Historical Temperature Data
----------------------------------------------------------------------

Data from NASA Goddard Institute for Space Studies (GISS) 

Packages to be imported 

In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
import plotly.graph_objs as go
import seaborn as sns

 ## Hemispheric Temperature Change

In [2]:
df_temp_hemisphere = pd.read_csv("data/historical_temperature/Hemispheric Temperature Change/temp_hemisphere.csv")
df_temp_hemisphere.info()
df_temp_hemisphere.shape


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 15 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Year     144 non-null    int64  
 1   Glob     144 non-null    float64
 2   NHem     144 non-null    float64
 3   SHem     144 non-null    float64
 4   24N-90N  144 non-null    float64
 5   24S-24N  144 non-null    float64
 6   90S-24S  144 non-null    float64
 7   64N-90N  144 non-null    float64
 8   44N-64N  144 non-null    float64
 9   24N-44N  144 non-null    float64
 10  EQU-24N  144 non-null    float64
 11  24S-EQU  144 non-null    float64
 12  44S-24S  144 non-null    float64
 13  64S-44S  144 non-null    float64
 14  90S-64S  144 non-null    float64
dtypes: float64(14), int64(1)
memory usage: 17.0 KB


(144, 15)

In [3]:
df_temp_hemisphere.head(59)

Unnamed: 0,Year,Glob,NHem,SHem,24N-90N,24S-24N,90S-24S,64N-90N,44N-64N,24N-44N,EQU-24N,24S-EQU,44S-24S,64S-44S,90S-64S
0,1880,-0.17,-0.28,-0.05,-0.38,-0.13,-0.02,-0.8,-0.52,-0.26,-0.14,-0.11,-0.04,0.05,0.67
1,1881,-0.09,-0.18,0.0,-0.36,0.1,-0.07,-0.93,-0.5,-0.19,0.11,0.1,-0.06,-0.07,0.6
2,1882,-0.11,-0.21,-0.01,-0.31,-0.04,0.01,-1.41,-0.31,-0.13,-0.04,-0.05,0.01,0.04,0.63
3,1883,-0.17,-0.28,-0.07,-0.34,-0.17,-0.01,-0.18,-0.58,-0.24,-0.17,-0.16,-0.04,0.07,0.5
4,1884,-0.28,-0.42,-0.15,-0.61,-0.14,-0.14,-1.3,-0.66,-0.45,-0.12,-0.17,-0.19,-0.02,0.65
5,1885,-0.33,-0.44,-0.24,-0.66,-0.13,-0.27,-1.5,-0.7,-0.46,-0.06,-0.21,-0.33,-0.15,0.82
6,1886,-0.31,-0.43,-0.2,-0.57,-0.2,-0.21,-1.58,-0.51,-0.4,-0.21,-0.19,-0.2,-0.2,0.59
7,1887,-0.36,-0.44,-0.29,-0.53,-0.3,-0.28,-1.76,-0.53,-0.29,-0.29,-0.31,-0.28,-0.26,0.32
8,1888,-0.17,-0.23,-0.12,-0.42,0.1,-0.29,-1.3,-0.44,-0.24,0.07,0.13,-0.31,-0.23,0.16
9,1889,-0.1,-0.17,-0.04,-0.29,0.08,-0.16,-0.68,-0.23,-0.27,0.02,0.14,-0.15,-0.15,0.45


In [4]:
df_temp_hemisphere.tail(50)

Unnamed: 0,Year,Glob,NHem,SHem,24N-90N,24S-24N,90S-24S,64N-90N,44N-64N,24N-44N,EQU-24N,24S-EQU,44S-24S,64S-44S,90S-64S
94,1974,-0.07,-0.18,0.03,-0.14,-0.2,0.18,-0.19,-0.07,-0.18,-0.23,-0.17,0.22,-0.08,0.65
95,1975,-0.01,-0.05,0.02,0.12,-0.24,0.16,0.1,0.39,-0.05,-0.31,-0.17,0.13,0.14,0.29
96,1976,-0.1,-0.21,0.01,-0.26,-0.11,0.09,-0.08,-0.32,-0.28,-0.13,-0.1,0.1,0.22,-0.24
97,1977,0.18,0.12,0.24,0.14,0.16,0.25,0.16,0.24,0.06,0.1,0.22,0.22,0.31,0.28
98,1978,0.07,0.02,0.11,-0.03,0.08,0.16,-0.16,0.04,-0.04,0.1,0.05,0.21,0.13,0.04
99,1979,0.16,0.08,0.25,-0.03,0.26,0.23,-0.61,0.07,0.09,0.26,0.27,0.29,0.33,-0.2
100,1980,0.26,0.17,0.35,0.06,0.31,0.39,0.26,0.02,0.02,0.33,0.29,0.35,0.37,0.59
101,1981,0.32,0.37,0.27,0.48,0.18,0.36,1.1,0.78,0.08,0.22,0.14,0.28,0.38,0.61
102,1982,0.14,0.06,0.23,-0.08,0.27,0.18,-0.4,0.1,-0.08,0.25,0.29,0.18,0.32,-0.11
103,1983,0.31,0.25,0.38,0.22,0.43,0.24,0.19,0.69,-0.07,0.29,0.58,0.21,0.41,0.05


In [5]:
df_temp_hemisphere.columns

Index(['Year', 'Glob', 'NHem', 'SHem', '24N-90N', '24S-24N', '90S-24S',
       '64N-90N', '44N-64N', '24N-44N', 'EQU-24N', '24S-EQU', '44S-24S',
       '64S-44S', '90S-64S'],
      dtype='object')

## Temperature Change for Three Latitude Bands

In [6]:

df_temp_three_latitude_bands = pd.read_csv("data/historical_temperature/Temperature Change for Three Latitude Bands/temp_three_latitude_bands.csv")
df_temp_three_latitude_bands.info()
df_temp_three_latitude_bands.shape


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 15 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Year     144 non-null    int64  
 1   Glob     144 non-null    float64
 2   NHem     144 non-null    float64
 3   SHem     144 non-null    float64
 4   24N-90N  144 non-null    float64
 5   24S-24N  144 non-null    float64
 6   90S-24S  144 non-null    float64
 7   64N-90N  144 non-null    float64
 8   44N-64N  144 non-null    float64
 9   24N-44N  144 non-null    float64
 10  EQU-24N  144 non-null    float64
 11  24S-EQU  144 non-null    float64
 12  44S-24S  144 non-null    float64
 13  64S-44S  144 non-null    float64
 14  90S-64S  144 non-null    float64
dtypes: float64(14), int64(1)
memory usage: 17.0 KB


(144, 15)

In [7]:
df_temp_three_latitude_bands.head(59)

Unnamed: 0,Year,Glob,NHem,SHem,24N-90N,24S-24N,90S-24S,64N-90N,44N-64N,24N-44N,EQU-24N,24S-EQU,44S-24S,64S-44S,90S-64S
0,1880,-0.17,-0.28,-0.05,-0.38,-0.13,-0.02,-0.8,-0.52,-0.26,-0.14,-0.11,-0.04,0.05,0.67
1,1881,-0.09,-0.18,0.0,-0.36,0.1,-0.07,-0.93,-0.5,-0.19,0.11,0.1,-0.06,-0.07,0.6
2,1882,-0.11,-0.21,-0.01,-0.31,-0.04,0.01,-1.41,-0.31,-0.13,-0.04,-0.05,0.01,0.04,0.63
3,1883,-0.17,-0.28,-0.07,-0.34,-0.17,-0.01,-0.18,-0.58,-0.24,-0.17,-0.16,-0.04,0.07,0.5
4,1884,-0.28,-0.42,-0.15,-0.61,-0.14,-0.14,-1.3,-0.66,-0.45,-0.12,-0.17,-0.19,-0.02,0.65
5,1885,-0.33,-0.44,-0.24,-0.66,-0.13,-0.27,-1.5,-0.7,-0.46,-0.06,-0.21,-0.33,-0.15,0.82
6,1886,-0.31,-0.43,-0.2,-0.57,-0.2,-0.21,-1.58,-0.51,-0.4,-0.21,-0.19,-0.2,-0.2,0.59
7,1887,-0.36,-0.44,-0.29,-0.53,-0.3,-0.28,-1.76,-0.53,-0.29,-0.29,-0.31,-0.28,-0.26,0.32
8,1888,-0.17,-0.23,-0.12,-0.42,0.1,-0.29,-1.3,-0.44,-0.24,0.07,0.13,-0.31,-0.23,0.16
9,1889,-0.1,-0.17,-0.04,-0.29,0.08,-0.16,-0.68,-0.23,-0.27,0.02,0.14,-0.15,-0.15,0.45


In [8]:
df_temp_three_latitude_bands.tail(100)

Unnamed: 0,Year,Glob,NHem,SHem,24N-90N,24S-24N,90S-24S,64N-90N,44N-64N,24N-44N,EQU-24N,24S-EQU,44S-24S,64S-44S,90S-64S
44,1924,-0.27,-0.18,-0.36,-0.12,-0.26,-0.43,0.56,-0.25,-0.23,-0.26,-0.25,-0.39,-0.45,-0.54
45,1925,-0.22,-0.13,-0.30,-0.03,-0.24,-0.37,-0.02,0.18,-0.18,-0.27,-0.21,-0.24,-0.55,-0.87
46,1926,-0.11,0.02,-0.22,0.02,0.02,-0.40,0.58,0.16,-0.22,0.01,0.02,-0.35,-0.45,-0.64
47,1927,-0.22,-0.13,-0.30,-0.13,-0.16,-0.38,0.04,-0.22,-0.13,-0.13,-0.18,-0.30,-0.45,-1.39
48,1928,-0.20,-0.10,-0.30,-0.07,-0.15,-0.39,0.68,-0.17,-0.22,-0.14,-0.17,-0.29,-0.48,-2.13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,2019,0.98,1.20,0.75,1.42,0.90,0.64,2.71,1.43,0.99,0.90,0.90,0.75,0.39,0.83
140,2020,1.01,1.35,0.68,1.67,0.86,0.57,2.88,1.81,1.19,0.88,0.84,0.58,0.39,0.89
141,2021,0.85,1.14,0.56,1.42,0.66,0.53,2.05,1.35,1.26,0.72,0.60,0.72,0.32,0.30
142,2022,0.89,1.16,0.62,1.52,0.57,0.70,2.34,1.50,1.27,0.62,0.51,0.79,0.38,1.09


In [9]:
df_temp_three_latitude_bands.columns

Index(['Year', 'Glob', 'NHem', 'SHem', '24N-90N', '24S-24N', '90S-24S',
       '64N-90N', '44N-64N', '24N-44N', 'EQU-24N', '24S-EQU', '44S-24S',
       '64S-44S', '90S-64S'],
      dtype='object')

In [10]:
df_temp_hemisphere.columns

Index(['Year', 'Glob', 'NHem', 'SHem', '24N-90N', '24S-24N', '90S-24S',
       '64N-90N', '44N-64N', '24N-44N', 'EQU-24N', '24S-EQU', '44S-24S',
       '64S-44S', '90S-64S'],
      dtype='object')

#### Create a merged dataframe with df_temp_hemisphere and df_temp_three_latitude_bands on the column year

In [11]:



merged_temp_hemisphere_latitude_df = pd.merge(df_temp_hemisphere, df_temp_three_latitude_bands, on='Year')


In [12]:
merged_temp_hemisphere_latitude_df.shape
merged_temp_hemisphere_latitude_df.columns
merged_temp_hemisphere_latitude_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 29 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Year       144 non-null    int64  
 1   Glob_x     144 non-null    float64
 2   NHem_x     144 non-null    float64
 3   SHem_x     144 non-null    float64
 4   24N-90N_x  144 non-null    float64
 5   24S-24N_x  144 non-null    float64
 6   90S-24S_x  144 non-null    float64
 7   64N-90N_x  144 non-null    float64
 8   44N-64N_x  144 non-null    float64
 9   24N-44N_x  144 non-null    float64
 10  EQU-24N_x  144 non-null    float64
 11  24S-EQU_x  144 non-null    float64
 12  44S-24S_x  144 non-null    float64
 13  64S-44S_x  144 non-null    float64
 14  90S-64S_x  144 non-null    float64
 15  Glob_y     144 non-null    float64
 16  NHem_y     144 non-null    float64
 17  SHem_y     144 non-null    float64
 18  24N-90N_y  144 non-null    float64
 19  24S-24N_y  144 non-null    float64
 20  90S-24S_y 

In [13]:
merged_temp_hemisphere_latitude_df.head(10)

Unnamed: 0,Year,Glob_x,NHem_x,SHem_x,24N-90N_x,24S-24N_x,90S-24S_x,64N-90N_x,44N-64N_x,24N-44N_x,...,24S-24N_y,90S-24S_y,64N-90N_y,44N-64N_y,24N-44N_y,EQU-24N_y,24S-EQU_y,44S-24S_y,64S-44S_y,90S-64S_y
0,1880,-0.17,-0.28,-0.05,-0.38,-0.13,-0.02,-0.8,-0.52,-0.26,...,-0.13,-0.02,-0.8,-0.52,-0.26,-0.14,-0.11,-0.04,0.05,0.67
1,1881,-0.09,-0.18,0.0,-0.36,0.1,-0.07,-0.93,-0.5,-0.19,...,0.1,-0.07,-0.93,-0.5,-0.19,0.11,0.1,-0.06,-0.07,0.6
2,1882,-0.11,-0.21,-0.01,-0.31,-0.04,0.01,-1.41,-0.31,-0.13,...,-0.04,0.01,-1.41,-0.31,-0.13,-0.04,-0.05,0.01,0.04,0.63
3,1883,-0.17,-0.28,-0.07,-0.34,-0.17,-0.01,-0.18,-0.58,-0.24,...,-0.17,-0.01,-0.18,-0.58,-0.24,-0.17,-0.16,-0.04,0.07,0.5
4,1884,-0.28,-0.42,-0.15,-0.61,-0.14,-0.14,-1.3,-0.66,-0.45,...,-0.14,-0.14,-1.3,-0.66,-0.45,-0.12,-0.17,-0.19,-0.02,0.65
5,1885,-0.33,-0.44,-0.24,-0.66,-0.13,-0.27,-1.5,-0.7,-0.46,...,-0.13,-0.27,-1.5,-0.7,-0.46,-0.06,-0.21,-0.33,-0.15,0.82
6,1886,-0.31,-0.43,-0.2,-0.57,-0.2,-0.21,-1.58,-0.51,-0.4,...,-0.2,-0.21,-1.58,-0.51,-0.4,-0.21,-0.19,-0.2,-0.2,0.59
7,1887,-0.36,-0.44,-0.29,-0.53,-0.3,-0.28,-1.76,-0.53,-0.29,...,-0.3,-0.28,-1.76,-0.53,-0.29,-0.29,-0.31,-0.28,-0.26,0.32
8,1888,-0.17,-0.23,-0.12,-0.42,0.1,-0.29,-1.3,-0.44,-0.24,...,0.1,-0.29,-1.3,-0.44,-0.24,0.07,0.13,-0.31,-0.23,0.16
9,1889,-0.1,-0.17,-0.04,-0.29,0.08,-0.16,-0.68,-0.23,-0.27,...,0.08,-0.16,-0.68,-0.23,-0.27,0.02,0.14,-0.15,-0.15,0.45


Filtering the rows for the years from  1991 to 2021 as the other module of the project is analyzing the data for the years 1991 and 2021

In [14]:


# Filter rows for the years from 1991 to 2021
merged_temp_hemisphere_latitude_df = merged_temp_hemisphere_latitude_df[(merged_temp_hemisphere_latitude_df['Year'] >= 1991) & (merged_temp_hemisphere_latitude_df['Year'] <= 2021)]


In [15]:
#### Renaming the columns 

In [16]:
# Define a function to rename columns
def rename_columns(column_name):
    if column_name == 'Year':
        return column_name
    elif column_name.endswith('_x'):
        return column_name[:-2] + '_h'
    elif column_name.endswith('_y'):
        return column_name[:-2] + '_l'
    else:
        return column_name

# Replace column names with suffix '_x' with '_h' and suffix '_y' with '_l'
merged_temp_hemisphere_latitude_df = merged_temp_hemisphere_latitude_df.rename(columns=rename_columns)

# Display the dataframe
merged_temp_hemisphere_latitude_df.head(59)


Unnamed: 0,Year,Glob_h,NHem_h,SHem_h,24N-90N_h,24S-24N_h,90S-24S_h,64N-90N_h,44N-64N_h,24N-44N_h,...,24S-24N_l,90S-24S_l,64N-90N_l,44N-64N_l,24N-44N_l,EQU-24N_l,24S-EQU_l,44S-24S_l,64S-44S_l,90S-64S_l
111,1991,0.41,0.41,0.4,0.47,0.36,0.39,0.76,0.66,0.26,...,0.36,0.39,0.76,0.66,0.26,0.32,0.4,0.31,0.29,0.96
112,1992,0.22,0.14,0.31,0.1,0.28,0.27,-0.16,0.44,-0.03,...,0.28,0.27,-0.16,0.44,-0.03,0.2,0.37,0.2,0.33,0.4
113,1993,0.23,0.19,0.28,0.13,0.32,0.22,0.55,0.29,-0.11,...,0.32,0.22,0.55,0.29,-0.11,0.28,0.35,0.27,0.39,-0.31
114,1994,0.31,0.37,0.26,0.43,0.31,0.2,0.35,0.5,0.42,...,0.31,0.2,0.35,0.5,0.42,0.28,0.34,0.26,0.23,-0.04
115,1995,0.45,0.58,0.32,0.7,0.42,0.22,1.32,0.98,0.32,...,0.42,0.22,1.32,0.98,0.32,0.4,0.45,0.29,0.15,0.15
116,1996,0.33,0.26,0.4,0.23,0.32,0.46,0.77,0.17,0.1,...,0.32,0.46,0.77,0.17,0.1,0.31,0.33,0.33,0.28,1.35
117,1997,0.46,0.52,0.41,0.54,0.51,0.33,0.77,0.9,0.23,...,0.51,0.33,0.77,0.9,0.23,0.5,0.52,0.42,0.36,-0.03
118,1998,0.61,0.71,0.51,0.78,0.69,0.32,0.87,0.94,0.66,...,0.69,0.32,0.87,0.94,0.66,0.6,0.79,0.39,0.31,0.12
119,1999,0.38,0.48,0.28,0.7,0.2,0.3,0.41,0.8,0.74,...,0.2,0.3,0.41,0.8,0.74,0.16,0.25,0.46,0.19,0.0
120,2000,0.39,0.5,0.29,0.7,0.25,0.29,0.97,0.8,0.55,...,0.25,0.29,0.97,0.8,0.55,0.21,0.28,0.42,0.1,0.26


<!-- Locally Weighted Scatterplot Smoothing (LOWESS) is a non-parametric regression method used to create a smooth curve through a set of data points, without assuming any specific functional form for the relationship between the variables.

In the context of the Northern Extratropics, LOWESS smoothing involves fitting a smooth curve to the annual mean temperature data points for that region. The curve is fit by iteratively re-weighted least squares, where each data point is given a weight based on its distance from the point where the smoothing is being evaluated.

So, when we talk about LOWESS smoothing for the Northern Extratropics, we are essentially fitting a smooth curve to the annual mean temperature data for that region, capturing the overall trend while reducing noise and variability. This smoothed curve can help visualize the underlying pattern in the temperature data over time. -->

#### Calculating the data for visualization for temperature change across Northern Hemisphere and Southern Hemisphere

In [17]:



# Calculate NH Annual Mean
NH_annual_mean = merged_temp_hemisphere_latitude_df[['Year', 'NHem_h']].groupby('Year').mean()

# Calculate SH Annual Mean
SH_annual_mean = merged_temp_hemisphere_latitude_df[['Year', 'SHem_h']].groupby('Year').mean()

# Apply Lowess smoothing to NH Annual Mean
nh_smoothed = sm.nonparametric.lowess(NH_annual_mean['NHem_h'], NH_annual_mean.index, frac=0.1)

# Apply Lowess smoothing to SH Annual Mean
sh_smoothed = sm.nonparametric.lowess(SH_annual_mean['SHem_h'], SH_annual_mean.index, frac=0.1)



#### Create  a line plot showing NH and SH Annual Mean temperatures along with their Lowess smoothing across Northern Hemisphere and Southern Hemisphere (The raw data points are also shown for reference)

In [27]:


# create traces for NH and SH Annual Mean along with their smoothed versions using go.Scatter
# For each trace, specify the x-coordinates as the years (NH_annual_mean.index and SH_annual_mean.index) and y-coordinates as the corresponding temperatures.
# Set the mode to 'lines' for the smoothed traces and 'markers' for the raw data.
# Each trace is added to the data list in the go.Figure constructor.
trace_nh = go.Scatter(x=NH_annual_mean.index, y=nh_smoothed[:, 1], mode='lines', name='NH Annual Mean (Smoothed)')
trace_sh = go.Scatter(x=SH_annual_mean.index, y=sh_smoothed[:, 1], mode='lines', name='SH Annual Mean (Smoothed)')
trace_nh_raw = go.Scatter(x=NH_annual_mean.index, y=NH_annual_mean['NHem_h'], mode='markers', name='NH Annual Mean (Raw)', marker=dict(color='blue'))
trace_sh_raw = go.Scatter(x=SH_annual_mean.index, y=SH_annual_mean['SHem_h'], mode='markers', name='SH Annual Mean (Raw)', marker=dict(color='red'))

# Create layout
layout = go.Layout(title='Northern Hemisphere  and Southern Hemisphere Annual Mean Temperature with Lowess Smoothing',
                   xaxis=dict(title='Year'),
                   yaxis=dict(title='Temperature (°C)'))

# Create figure
fig = go.Figure(data=[trace_nh, trace_sh, trace_nh_raw, trace_sh_raw], layout=layout)

# Show plot
fig.show()


#### Calculating  data for visualization of the temperature change across three latitude bands(Northern Extratropics,Tropics,Southern Extratropics)

In [19]:


# Calculate Annual Mean for Northern Extratropics (24N-90N), Southern Extratropics (90S-24S), and Tropics (24S-24N)
merged_temp_hemisphere_latitude_df['Annual_Mean_Northern_Extratropics'] = merged_temp_hemisphere_latitude_df[['24N-90N_l']].mean(axis=1)
merged_temp_hemisphere_latitude_df['Annual_Mean_Southern_Extratropics'] = merged_temp_hemisphere_latitude_df[['90S-24S_l']].mean(axis=1)
merged_temp_hemisphere_latitude_df['Annual_Mean_Tropics'] = merged_temp_hemisphere_latitude_df[['24S-24N_l']].mean(axis=1)

# Group by Year and calculate mean for each region
annual_mean_northern_extratropics = merged_temp_hemisphere_latitude_df.groupby('Year')['Annual_Mean_Northern_Extratropics'].mean()
annual_mean_southern_extratropics = merged_temp_hemisphere_latitude_df.groupby('Year')['Annual_Mean_Southern_Extratropics'].mean()
annual_mean_tropics = merged_temp_hemisphere_latitude_df.groupby('Year')['Annual_Mean_Tropics'].mean()

# Apply Lowess smoothing
northern_extratropics_smoothed = sm.nonparametric.lowess(annual_mean_northern_extratropics, annual_mean_northern_extratropics.index, frac=0.1)
southern_extratropics_smoothed = sm.nonparametric.lowess(annual_mean_southern_extratropics, annual_mean_southern_extratropics.index, frac=0.1)
tropics_smoothed = sm.nonparametric.lowess(annual_mean_tropics, annual_mean_tropics.index, frac=0.1)




#### Create  a line plot showing NH and SH Annual Mean temperatures along with their Lowess smoothing (The raw data points are also shown for reference) across three latitude bands (Northern Extratropics,Tropics,Southern Extratropics)

In [28]:


# Create traces for raw data and smoothed data
trace_northern = go.Scatter(x=annual_mean_northern_extratropics.index, y=annual_mean_northern_extratropics, mode='markers', name='Northern Extratropics (Raw)', marker=dict(color='blue'))
trace_southern = go.Scatter(x=annual_mean_southern_extratropics.index, y=annual_mean_southern_extratropics, mode='markers', name='Southern Extratropics (Raw)', marker=dict(color='red'))
trace_tropics = go.Scatter(x=annual_mean_tropics.index, y=annual_mean_tropics, mode='markers', name='Tropics (Raw)', marker=dict(color='green'))

trace_northern_smoothed = go.Scatter(x=northern_extratropics_smoothed[:, 0], y=northern_extratropics_smoothed[:, 1], mode='lines', name='Northern Extratropics (Smoothed)', line=dict(color='blue'))
trace_southern_smoothed = go.Scatter(x=southern_extratropics_smoothed[:, 0], y=southern_extratropics_smoothed[:, 1], mode='lines', name='Southern Extratropics (Smoothed)', line=dict(color='red'))
trace_tropics_smoothed = go.Scatter(x=tropics_smoothed[:, 0], y=tropics_smoothed[:, 1], mode='lines', name='Tropics (Smoothed)', line=dict(color='green'))

# Combine traces
data = [trace_northern, trace_southern, trace_tropics, trace_northern_smoothed, trace_southern_smoothed, trace_tropics_smoothed]

# Layout
layout = go.Layout(title='Annual Mean Temperatures Across Three Latitude Bands',
                   xaxis=dict(title='Year'),
                   yaxis=dict(title='Temperature (°C)'))

# Create figure
fig = go.Figure(data=data, layout=layout)

# Show plot
fig.show()


## Monthly Mean Global Surface Temperature

In [21]:
df_temp_global_monthly_mean = pd.read_csv("data/historical_temperature/Monthly Mean Global Surface Temperature/temp_global_monthly_mean.csv")
df_temp_global_monthly_mean.info()
df_temp_global_monthly_mean.shape
df_temp_global_monthly_mean.columns

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1729 entries, ('Year+Month', 'Station', 'Land+Ocean', 'Land_Only') to ('2023.96', '1.62', '1.37', '1.95')
Data columns (total 1 columns):
 #   Column                                   Non-Null Count  Dtype 
---  ------                                   --------------  ----- 
 0   Monthly Mean Global Surface Temperature  1729 non-null   object
dtypes: object(1)
memory usage: 131.1+ KB


Index(['Monthly Mean Global Surface Temperature'], dtype='object')

In [22]:
df_temp_global_monthly_mean.head(10)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Monthly Mean Global Surface Temperature
Year+Month,Station,Land+Ocean,Land_Only,Open_Ocean
1880.04,-0.29,-0.19,-0.82,-0.03
1880.13,-0.48,-0.24,-1.28,0.02
1880.21,-0.35,-0.09,-0.72,0.07
1880.29,-0.54,-0.16,-1.19,0.08
1880.38,-0.25,-0.10,-0.25,-0.05
1880.46,-0.42,-0.21,-0.23,-0.17
1880.54,-0.35,-0.18,-0.22,-0.18
1880.63,0.23,-0.10,-0.07,-0.11
1880.71,-0.39,-0.14,-0.60,-0.06


In [23]:
#### Create a Plotly line plot to show the Monthly Mean Global Surface Temperature 

In [24]:
# create a trace using go.Scatter with years_months_numeric as the x-axis data and
# land_ocean_temp_numeric as the y-axis data. The mode is set to 'lines' to create a line plot,
# and we specify the marker color as blue.
years_months_numeric = []
years_months = df_temp_global_monthly_mean.index
land_ocean_temp = df_temp_global_monthly_mean['Monthly Mean Global Surface Temperature']

for year_month in years_months:
    if isinstance(year_month, tuple):
        # Assuming the first element of the tuple is the year-month value
        if isinstance(year_month[0], str) and year_month[0].replace('.', '', 1).isdigit():
            year_month_value = float(year_month[0])
            years_months_numeric.append(year_month_value)
        else:
            print(f"Ignoring non-numeric value: {year_month[0]}")
    else:
        print("Ignoring non-tuple value")

print(years_months_numeric)


land_ocean_temp_numeric = []

for temp in land_ocean_temp:
    if isinstance(temp, str):
        # Check if the string represents a numeric value (including negative numbers)
        if temp.lstrip('-').replace('.', '', 1).isdigit():
            land_ocean_temp_numeric.append(float(temp))
        else:
            print(f"Ignoring non-numeric value: {temp}")
    else:
        print("Ignoring non-string value")

# Filter data for the years 1991 to 2021
filtered_years = []
filtered_temperatures = []

for year, temp in zip(years_months_numeric, land_ocean_temp_numeric):
    if 1991 <= year <= 2021:
        filtered_years.append(year)
        filtered_temperatures.append(temp)


# Create a trace for the line plot
trace = go.Scatter(x=filtered_years, y=filtered_temperatures, mode='lines', marker=dict(color='blue'), name='Temperature')

# Create layout for the plot
layout = go.Layout(
    title='Monthly Mean Global Surface Temperature (Land+Ocean) from year 1991 to 2021',
    xaxis=dict(title='Year'),
    yaxis=dict(title='Temperature (°C)'),
    hovermode='closest',
    template='plotly_white'
)

# Create figure object
fig = go.Figure(data=[trace], layout=layout)

# Show plot
fig.show()


Ignoring non-numeric value: Year+Month
[1880.04, 1880.13, 1880.21, 1880.29, 1880.38, 1880.46, 1880.54, 1880.63, 1880.71, 1880.79, 1880.88, 1880.96, 1881.04, 1881.13, 1881.21, 1881.29, 1881.38, 1881.46, 1881.54, 1881.63, 1881.71, 1881.79, 1881.88, 1881.96, 1882.04, 1882.13, 1882.21, 1882.29, 1882.38, 1882.46, 1882.54, 1882.63, 1882.71, 1882.79, 1882.88, 1882.96, 1883.04, 1883.13, 1883.21, 1883.29, 1883.38, 1883.46, 1883.54, 1883.63, 1883.71, 1883.79, 1883.88, 1883.96, 1884.04, 1884.13, 1884.21, 1884.29, 1884.38, 1884.46, 1884.54, 1884.63, 1884.71, 1884.79, 1884.88, 1884.96, 1885.04, 1885.13, 1885.21, 1885.29, 1885.38, 1885.46, 1885.54, 1885.63, 1885.71, 1885.79, 1885.88, 1885.96, 1886.04, 1886.13, 1886.21, 1886.29, 1886.38, 1886.46, 1886.54, 1886.63, 1886.71, 1886.79, 1886.88, 1886.96, 1887.04, 1887.13, 1887.21, 1887.29, 1887.38, 1887.46, 1887.54, 1887.63, 1887.71, 1887.79, 1887.88, 1887.96, 1888.04, 1888.13, 1888.21, 1888.29, 1888.38, 1888.46, 1888.54, 1888.63, 1888.71, 1888.79, 1888.8

In [25]:
# Save lists to a data file 
df = pd.DataFrame({'years': filtered_years, 'temperatures': filtered_temperatures})
df.to_csv('filtered_temperature_data.csv', index=False)
