# Climate Change Data Analysis Based On Historical Temperature Data
----------------------------------------------------------------------

Data from NASA Goddard Institute for Space Studies (GISS) 

Packages to be imported 

In [69]:
# Import the pandas library and alias it as pd for ease of use
import pandas as pd

# Import the statsmodels.api module and alias it as sm for ease of use
import statsmodels.api as sm

# Import the numpy library and alias it as np for ease of use
import numpy as np

# Import the graph_objs module from the plotly library to create interactive graphs
import plotly.graph_objs as go

# Import the seaborn library for statistical data visualization
import seaborn as sns

# Import the os module to perform operating system related tasks
import os




 ## Hemispheric Temperature Change

In [70]:
# Get the current working directory
current_directory = os.getcwd()

# Get the parent directory (directory before the current directory)
parent_directory = os.path.dirname(current_directory)


In [71]:
# Read the CSV file into a pandas DataFrame
df_temp_hemisphere = pd.read_csv(parent_directory+"\\data/historical_temperature/Hemispheric Temperature Change/temp_hemisphere.csv")
df_temp_hemisphere.info()
# Display the shape of the DataFrame (number of rows and columns)
df_temp_hemisphere.shape


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 15 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Year     144 non-null    int64  
 1   Glob     144 non-null    float64
 2   NHem     144 non-null    float64
 3   SHem     144 non-null    float64
 4   24N-90N  144 non-null    float64
 5   24S-24N  144 non-null    float64
 6   90S-24S  144 non-null    float64
 7   64N-90N  144 non-null    float64
 8   44N-64N  144 non-null    float64
 9   24N-44N  144 non-null    float64
 10  EQU-24N  144 non-null    float64
 11  24S-EQU  144 non-null    float64
 12  44S-24S  144 non-null    float64
 13  64S-44S  144 non-null    float64
 14  90S-64S  144 non-null    float64
dtypes: float64(14), int64(1)
memory usage: 17.0 KB


(144, 15)

In [72]:
# Display the first five rows
df_temp_hemisphere.head(5)

Unnamed: 0,Year,Glob,NHem,SHem,24N-90N,24S-24N,90S-24S,64N-90N,44N-64N,24N-44N,EQU-24N,24S-EQU,44S-24S,64S-44S,90S-64S
0,1880,-0.17,-0.28,-0.05,-0.38,-0.13,-0.02,-0.8,-0.52,-0.26,-0.14,-0.11,-0.04,0.05,0.67
1,1881,-0.09,-0.18,0.0,-0.36,0.1,-0.07,-0.93,-0.5,-0.19,0.11,0.1,-0.06,-0.07,0.6
2,1882,-0.11,-0.21,-0.01,-0.31,-0.04,0.01,-1.41,-0.31,-0.13,-0.04,-0.05,0.01,0.04,0.63
3,1883,-0.17,-0.28,-0.07,-0.34,-0.17,-0.01,-0.18,-0.58,-0.24,-0.17,-0.16,-0.04,0.07,0.5
4,1884,-0.28,-0.42,-0.15,-0.61,-0.14,-0.14,-1.3,-0.66,-0.45,-0.12,-0.17,-0.19,-0.02,0.65


In [73]:
# Display the last five rows
df_temp_hemisphere.tail(5)

Unnamed: 0,Year,Glob,NHem,SHem,24N-90N,24S-24N,90S-24S,64N-90N,44N-64N,24N-44N,EQU-24N,24S-EQU,44S-24S,64S-44S,90S-64S
139,2019,0.98,1.2,0.75,1.42,0.9,0.64,2.71,1.43,0.99,0.9,0.9,0.75,0.39,0.83
140,2020,1.01,1.35,0.68,1.67,0.86,0.57,2.88,1.81,1.19,0.88,0.84,0.58,0.39,0.89
141,2021,0.85,1.14,0.56,1.42,0.66,0.53,2.05,1.35,1.26,0.72,0.6,0.72,0.32,0.3
142,2022,0.89,1.16,0.62,1.52,0.57,0.7,2.34,1.5,1.27,0.62,0.51,0.79,0.38,1.09
143,2023,1.17,1.49,0.85,1.78,1.05,0.72,2.58,1.87,1.47,1.07,1.04,0.91,0.44,0.63


In [74]:
# Display the columns
df_temp_hemisphere.columns

Index(['Year', 'Glob', 'NHem', 'SHem', '24N-90N', '24S-24N', '90S-24S',
       '64N-90N', '44N-64N', '24N-44N', 'EQU-24N', '24S-EQU', '44S-24S',
       '64S-44S', '90S-64S'],
      dtype='object')

## Temperature Change for Three Latitude Bands

In [75]:
# Read the CSV file into a pandas DataFrame
df_temp_three_latitude_bands = pd.read_csv(parent_directory+"\\data/historical_temperature/Temperature Change for Three Latitude Bands/temp_three_latitude_bands.csv")
# Display the shape of the DataFrame (number of rows and columns)
df_temp_three_latitude_bands.shape


(144, 15)

In [76]:
# Display the first five rows
df_temp_three_latitude_bands.head(5)

Unnamed: 0,Year,Glob,NHem,SHem,24N-90N,24S-24N,90S-24S,64N-90N,44N-64N,24N-44N,EQU-24N,24S-EQU,44S-24S,64S-44S,90S-64S
0,1880,-0.17,-0.28,-0.05,-0.38,-0.13,-0.02,-0.8,-0.52,-0.26,-0.14,-0.11,-0.04,0.05,0.67
1,1881,-0.09,-0.18,0.0,-0.36,0.1,-0.07,-0.93,-0.5,-0.19,0.11,0.1,-0.06,-0.07,0.6
2,1882,-0.11,-0.21,-0.01,-0.31,-0.04,0.01,-1.41,-0.31,-0.13,-0.04,-0.05,0.01,0.04,0.63
3,1883,-0.17,-0.28,-0.07,-0.34,-0.17,-0.01,-0.18,-0.58,-0.24,-0.17,-0.16,-0.04,0.07,0.5
4,1884,-0.28,-0.42,-0.15,-0.61,-0.14,-0.14,-1.3,-0.66,-0.45,-0.12,-0.17,-0.19,-0.02,0.65


In [77]:
# Display the last five rows
df_temp_three_latitude_bands.tail(5)

Unnamed: 0,Year,Glob,NHem,SHem,24N-90N,24S-24N,90S-24S,64N-90N,44N-64N,24N-44N,EQU-24N,24S-EQU,44S-24S,64S-44S,90S-64S
139,2019,0.98,1.2,0.75,1.42,0.9,0.64,2.71,1.43,0.99,0.9,0.9,0.75,0.39,0.83
140,2020,1.01,1.35,0.68,1.67,0.86,0.57,2.88,1.81,1.19,0.88,0.84,0.58,0.39,0.89
141,2021,0.85,1.14,0.56,1.42,0.66,0.53,2.05,1.35,1.26,0.72,0.6,0.72,0.32,0.3
142,2022,0.89,1.16,0.62,1.52,0.57,0.7,2.34,1.5,1.27,0.62,0.51,0.79,0.38,1.09
143,2023,1.17,1.49,0.85,1.78,1.05,0.72,2.58,1.87,1.47,1.07,1.04,0.91,0.44,0.63


In [78]:
df_temp_three_latitude_bands.columns

Index(['Year', 'Glob', 'NHem', 'SHem', '24N-90N', '24S-24N', '90S-24S',
       '64N-90N', '44N-64N', '24N-44N', 'EQU-24N', '24S-EQU', '44S-24S',
       '64S-44S', '90S-64S'],
      dtype='object')

In [79]:
df_temp_hemisphere.columns

Index(['Year', 'Glob', 'NHem', 'SHem', '24N-90N', '24S-24N', '90S-24S',
       '64N-90N', '44N-64N', '24N-44N', 'EQU-24N', '24S-EQU', '44S-24S',
       '64S-44S', '90S-64S'],
      dtype='object')

#### Create a merged dataframe with df_temp_hemisphere and df_temp_three_latitude_bands on the column year

In [80]:



merged_temp_hemisphere_latitude_df = pd.merge(df_temp_hemisphere, df_temp_three_latitude_bands, on='Year')


Filtering the rows for the years from  1991 to 2021 as the other module of the project is analyzing the data for the years 1991 and 2021

In [81]:


# Filter rows for the years from 1991 to 2021
merged_temp_hemisphere_latitude_df = merged_temp_hemisphere_latitude_df[(merged_temp_hemisphere_latitude_df['Year'] >= 1991) & (merged_temp_hemisphere_latitude_df['Year'] <= 2021)]


In [82]:
# Renaming the columns 
# Define a function to rename columns
def rename_columns(column_name):
    if column_name == 'Year':
        return column_name
    elif column_name.endswith('_x'):
        return column_name[:-2] + '_h'
    elif column_name.endswith('_y'):
        return column_name[:-2] + '_l'
    else:
        return column_name

# Replace column names with suffix '_x' with '_h' and suffix '_y' with '_l'
merged_temp_hemisphere_latitude_df = merged_temp_hemisphere_latitude_df.rename(columns=rename_columns)



<!-- Locally Weighted Scatterplot Smoothing (LOWESS) is a non-parametric regression method used to create a smooth curve through a set of data points, without assuming any specific functional form for the relationship between the variables.

In the context of the Northern Extratropics, LOWESS smoothing involves fitting a smooth curve to the annual mean temperature data points for that region. The curve is fit by iteratively re-weighted least squares, where each data point is given a weight based on its distance from the point where the smoothing is being evaluated.

So, when we talk about LOWESS smoothing for the Northern Extratropics, we are essentially fitting a smooth curve to the annual mean temperature data for that region, capturing the overall trend while reducing noise and variability. This smoothed curve can help visualize the underlying pattern in the temperature data over time. -->

#### Calculating the data for visualization for temperature change across Northern Hemisphere and Southern Hemisphere

In [83]:



# Calculate NH Annual Mean
NH_annual_mean = merged_temp_hemisphere_latitude_df[['Year', 'NHem_h']].groupby('Year').mean()

# Calculate SH Annual Mean
SH_annual_mean = merged_temp_hemisphere_latitude_df[['Year', 'SHem_h']].groupby('Year').mean()

# Apply Lowess smoothing to NH Annual Mean
nh_smoothed = sm.nonparametric.lowess(NH_annual_mean['NHem_h'], NH_annual_mean.index, frac=0.1)

# Apply Lowess smoothing to SH Annual Mean
sh_smoothed = sm.nonparametric.lowess(SH_annual_mean['SHem_h'], SH_annual_mean.index, frac=0.1)



#### Create  a line plot showing NH and SH Annual Mean temperatures along with their Lowess smoothing across Northern Hemisphere and Southern Hemisphere (The raw data points are also shown for reference)

In [84]:


# create traces for NH and SH Annual Mean along with their smoothed versions using go.Scatter
# For each trace, specify the x-coordinates as the years (NH_annual_mean.index and SH_annual_mean.index) and y-coordinates as the corresponding temperatures.
# Set the mode to 'lines' for the smoothed traces and 'markers' for the raw data.
# Each trace is added to the data list in the go.Figure constructor.
trace_nh = go.Scatter(x=NH_annual_mean.index, y=nh_smoothed[:, 1], mode='lines', name='NH Annual Mean (Smoothed)')
trace_sh = go.Scatter(x=SH_annual_mean.index, y=sh_smoothed[:, 1], mode='lines', name='SH Annual Mean (Smoothed)')
trace_nh_raw = go.Scatter(x=NH_annual_mean.index, y=NH_annual_mean['NHem_h'], mode='markers', name='NH Annual Mean (Raw)', marker=dict(color='blue'))
trace_sh_raw = go.Scatter(x=SH_annual_mean.index, y=SH_annual_mean['SHem_h'], mode='markers', name='SH Annual Mean (Raw)', marker=dict(color='red'))

# Create layout
layout = go.Layout(title='Northern Hemisphere  and Southern Hemisphere Annual Mean Temperature with Lowess Smoothing',
                   xaxis=dict(title='Year'),
                   yaxis=dict(title='Temperature (°C)'))

# Create figure
fig = go.Figure(data=[trace_nh, trace_sh, trace_nh_raw, trace_sh_raw], layout=layout)

# Show plot
fig.show()


#### Calculating  data for visualization of the temperature change across three latitude bands(Northern Extratropics,Tropics,Southern Extratropics)

In [85]:


# Calculate Annual Mean for Northern Extratropics (24N-90N), Southern Extratropics (90S-24S), and Tropics (24S-24N)
merged_temp_hemisphere_latitude_df['Annual_Mean_Northern_Extratropics'] = merged_temp_hemisphere_latitude_df[['24N-90N_l']].mean(axis=1)
merged_temp_hemisphere_latitude_df['Annual_Mean_Southern_Extratropics'] = merged_temp_hemisphere_latitude_df[['90S-24S_l']].mean(axis=1)
merged_temp_hemisphere_latitude_df['Annual_Mean_Tropics'] = merged_temp_hemisphere_latitude_df[['24S-24N_l']].mean(axis=1)

# Group by Year and calculate mean for each region
annual_mean_northern_extratropics = merged_temp_hemisphere_latitude_df.groupby('Year')['Annual_Mean_Northern_Extratropics'].mean()
annual_mean_southern_extratropics = merged_temp_hemisphere_latitude_df.groupby('Year')['Annual_Mean_Southern_Extratropics'].mean()
annual_mean_tropics = merged_temp_hemisphere_latitude_df.groupby('Year')['Annual_Mean_Tropics'].mean()

# Apply Lowess smoothing
northern_extratropics_smoothed = sm.nonparametric.lowess(annual_mean_northern_extratropics, annual_mean_northern_extratropics.index, frac=0.1)
southern_extratropics_smoothed = sm.nonparametric.lowess(annual_mean_southern_extratropics, annual_mean_southern_extratropics.index, frac=0.1)
tropics_smoothed = sm.nonparametric.lowess(annual_mean_tropics, annual_mean_tropics.index, frac=0.1)




#### Create  a line plot showing NH and SH Annual Mean temperatures along with their Lowess smoothing (The raw data points are also shown for reference) across three latitude bands (Northern Extratropics,Tropics,Southern Extratropics)

In [86]:


# Create traces for raw data and smoothed data
trace_northern = go.Scatter(x=annual_mean_northern_extratropics.index, y=annual_mean_northern_extratropics, mode='markers', name='Northern Extratropics (Raw)', marker=dict(color='blue'))
trace_southern = go.Scatter(x=annual_mean_southern_extratropics.index, y=annual_mean_southern_extratropics, mode='markers', name='Southern Extratropics (Raw)', marker=dict(color='red'))
trace_tropics = go.Scatter(x=annual_mean_tropics.index, y=annual_mean_tropics, mode='markers', name='Tropics (Raw)', marker=dict(color='green'))

trace_northern_smoothed = go.Scatter(x=northern_extratropics_smoothed[:, 0], y=northern_extratropics_smoothed[:, 1], mode='lines', name='Northern Extratropics (Smoothed)', line=dict(color='blue'))
trace_southern_smoothed = go.Scatter(x=southern_extratropics_smoothed[:, 0], y=southern_extratropics_smoothed[:, 1], mode='lines', name='Southern Extratropics (Smoothed)', line=dict(color='red'))
trace_tropics_smoothed = go.Scatter(x=tropics_smoothed[:, 0], y=tropics_smoothed[:, 1], mode='lines', name='Tropics (Smoothed)', line=dict(color='green'))

# Combine traces
data = [trace_northern, trace_southern, trace_tropics, trace_northern_smoothed, trace_southern_smoothed, trace_tropics_smoothed]

# Layout
layout = go.Layout(title='Annual Mean Temperatures Across Three Latitude Bands',
                   xaxis=dict(title='Year'),
                   yaxis=dict(title='Temperature (°C)'))

# Create figure
fig = go.Figure(data=data, layout=layout)

# Show plot
fig.show()


## Monthly Mean Global Surface Temperature

In [87]:
# Read the CSV file into a pandas DataFrame
df_temp_global_monthly_mean = pd.read_csv(parent_directory+"\\data/historical_temperature/Monthly Mean Global Surface Temperature/temp_global_monthly_mean.csv")
df_temp_global_monthly_mean.info()
df_temp_global_monthly_mean.shape
df_temp_global_monthly_mean.columns

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1729 entries, ('Year+Month', 'Station', 'Land+Ocean', 'Land_Only') to ('2023.96', '1.62', '1.37', '1.95')
Data columns (total 1 columns):
 #   Column                                   Non-Null Count  Dtype 
---  ------                                   --------------  ----- 
 0   Monthly Mean Global Surface Temperature  1729 non-null   object
dtypes: object(1)
memory usage: 131.1+ KB


Index(['Monthly Mean Global Surface Temperature'], dtype='object')

#### Create a Plotly line plot to show the Monthly Mean Global Surface Temperature

In [88]:
# create a trace using go.Scatter with years_months_numeric as the x-axis data and
# land_ocean_temp_numeric as the y-axis data. The mode is set to 'lines' to create a line plot,
# and we specify the marker color as blue.
years_months_numeric = []
years_months = df_temp_global_monthly_mean.index
land_ocean_temp = df_temp_global_monthly_mean['Monthly Mean Global Surface Temperature']

for year_month in years_months:
    if isinstance(year_month, tuple):
        # Assuming the first element of the tuple is the year-month value
        if isinstance(year_month[0], str) and year_month[0].replace('.', '', 1).isdigit():
            year_month_value = float(year_month[0])
            years_months_numeric.append(year_month_value)
        else:
            print(f"Ignoring non-numeric value: {year_month[0]}")
    else:
        print("Ignoring non-tuple value")

# print(years_months_numeric)


land_ocean_temp_numeric = []

for temp in land_ocean_temp:
    if isinstance(temp, str):
        # Check if the string represents a numeric value (including negative numbers)
        if temp.lstrip('-').replace('.', '', 1).isdigit():
            land_ocean_temp_numeric.append(float(temp))
        else:
            print(f"Ignoring non-numeric value: {temp}")
    else:
        print("Ignoring non-string value")

# Filter data for the years 1991 to 2021
filtered_years = []
filtered_temperatures = []

for year, temp in zip(years_months_numeric, land_ocean_temp_numeric):
    if 1991 <= year <= 2021:
        filtered_years.append(year)
        filtered_temperatures.append(temp)


# Create a trace for the line plot
trace = go.Scatter(x=filtered_years, y=filtered_temperatures, mode='lines', marker=dict(color='blue'), name='Temperature')

# Create layout for the plot
layout = go.Layout(
    title='Monthly Mean Global Surface Temperature (Land+Ocean) from year 1991 to 2021',
    xaxis=dict(title='Year'),
    yaxis=dict(title='Temperature (°C)'),
    hovermode='closest',
    template='plotly_white'
)

# Create figure object
fig = go.Figure(data=[trace], layout=layout)

# Show plot
fig.show()


Ignoring non-numeric value: Year+Month
Ignoring non-numeric value: Open_Ocean
