In [1]:
#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
matplotlib.use('Qt5Agg')
sns.set_style("whitegrid")
import calendar

# DATA PREPROCESSING

### MSTA

In [2]:
MSTA= pd.read_excel('Data_36544086.xlsx', sheet_name='MSTA',
                                header=0, 
                                usecols=['Time', 'Anomaly (deg C)'],
                                parse_dates=True).squeeze()

In [3]:
MSTA['Time'] = pd.to_datetime(MSTA['Time'])
MSTA = MSTA.set_index('Time')

In [4]:
MSTA

Unnamed: 0_level_0,Anomaly (deg C)
Time,Unnamed: 1_level_1
1850-01-01,-0.674564
1850-02-01,-0.333416
1850-03-01,-0.591323
1850-04-01,-0.588721
1850-05-01,-0.508817
...,...
2024-08-01,1.239584
2024-09-01,1.144937
2024-10-01,1.199982
2024-11-01,1.225049


In [5]:
# Extract data from the year 1950 onwards
MSTA_1950_onwards = MSTA[MSTA.index >= '1950-01-01']
MSTA_1950_onwards

Unnamed: 0_level_0,Anomaly (deg C)
Time,Unnamed: 1_level_1
1950-01-01,-0.300044
1950-02-01,-0.370036
1950-03-01,-0.216438
1950-04-01,-0.243616
1950-05-01,-0.137298
...,...
2024-08-01,1.239584
2024-09-01,1.144937
2024-10-01,1.199982
2024-11-01,1.225049


### Calendar Adjustment

In [6]:
#Calendar adjustment
# Create empty column for days in the month
def calendar_adjustment(df):
    df["Days"] = np.nan
    # Fill empty columns with the days per calendar month
    for date in df.index:
        df.loc[date, "Days"] = calendar.monthrange(date.year, date.month)[1]
    # Perform calendar adjustment
    df["Adjusted Data"] = (df.iloc[:, 0] * 365.25 / (12 * df["Days"])).round(2)


calendar_adjustment(MSTA_1950_onwards)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Days"] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Adjusted Data"] = (df.iloc[:, 0] * 365.25 / (12 * df["Days"])).round(2)


In [7]:
MSTA_1950_onwards= MSTA_1950_onwards.drop(columns=['Days', 'Anomaly (deg C)'])

In [8]:
def rename_columns(df, df_name):
    df = df.rename(columns={'Adjusted Data': df_name})
    return df

In [9]:
MSTA_1950_onwards= rename_columns(MSTA_1950_onwards, 'Anomaly (adjusted deg C)')#


In [10]:
# Add frequency to the index
def frequency_add(i):
    i.index = pd.DatetimeIndex(i.index.values, freq=i.index.inferred_freq)

frequency_add(MSTA_1950_onwards)

# ACF/PACF plots

In [11]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [12]:
# ACF and PACF plots
fig, ax = plt.subplots(2, 1, figsize=(10, 7))

# ACF plot 
plot_acf(MSTA_1950_onwards, title='ACF of MSTA (1950 onwards)', lags=100, ax=ax[0])

# PACF plot 
plot_pacf(MSTA_1950_onwards, title='PACF of MSTA_1950_onwards ', lags=100, ax=ax[1])
plt.title('PACF of MSTA (1950 onwards)', fontsize=20)
plt.tight_layout()
plt.show()
