In [432]:
# Programing for data analytics project 2

# Question

• Analyse CO2 vs Temperature Anomaly from 800kyrs – present.
• Examine one other (paleo/modern) features (e.g. CH4 or polar ice-coverage)
• Examine Irish context:
o Climate change signals: (see Maynooth study: The emergence of a climate change
signal in long-term Irish meteorological observations - ScienceDirect)
• Fuse and analyse data from various data sources and format fused data set as a pandas
dataframe and export to csv and json formats
• For all of the above variables, analyse the data, the trends and the relationships between
them (temporal leads/lags/frequency analysis).
• Predict global temperature anomaly over next few decades (synthesise data) and compare to
published climate models if atmospheric CO2 trends continue
• Comment on accelerated warming based on very latest features (e.g. temperature/polar-icecoverage)
Use a Jupyter notebook for your analysis and track your progress using GitHub.
Use an academic referencing style

# Import Required Libraries:

In [433]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


load data from c02 luthi

In [434]:
import pandas as pd

# Load CO2 data from LUTHI
CO2_LUTHI_path = 'C:\\Users\\fifoa\\OneDrive\\Desktop\\ATU\\PFDA-PROJECT-2\\CO2 LUTHI.xls'
CO2_LUTHI_sheet1 = pd.read_excel(CO2_LUTHI_path, sheet_name='1.  new CO2 data')
CO2_LUTHI_sheet2 = pd.read_excel(CO2_LUTHI_path, sheet_name='2.  Vostok-TD-Dome C')

# Extract relevant columns

In [435]:

monnin_luthi = CO2_LUTHI_sheet2.iloc[6:189, 1:3]
pepin_luthi = CO2_LUTHI_sheet2.iloc[19:353, 5:7]
siegenthaler_1_luthi = CO2_LUTHI_sheet1.iloc[6:26, 16:18]
siegenthaler_2_luthi = CO2_LUTHI_sheet2.iloc[6:328, 12:14]
luthi_luthi = CO2_LUTHI_sheet1.iloc[16:253, 1:3]

# Rename columns

In [436]:

monnin_luthi.rename(columns={'Unnamed: 1': 'yr_bp', 'Unnamed: 2': 'co2_ppmv'}, inplace=True)
pepin_luthi.rename(columns={'Unnamed: 5': 'yr_bp', 'Unnamed: 6': 'co2_ppmv'}, inplace=True)
siegenthaler_1_luthi.rename(columns={'Unnamed: 16': 'yr_bp', 'Unnamed: 17': 'co2_ppmv'}, inplace=True)
siegenthaler_2_luthi.rename(columns={'Unnamed: 12': 'yr_bp', 'Unnamed: 13': 'co2_ppmv'}, inplace=True)
luthi_luthi.rename(columns={'Unnamed: 1': 'yr_bp', 'Unnamed: 2': 'co2_ppmv'}, inplace=True)

# Load CO2 data from IPCC

In [437]:

CO2_IPCC_path = 'C:\\Users\\fifoa\\OneDrive\\Desktop\\ATU\\PFDA-PROJECT-2\\CO2 IPCC.xlsx'
CO2_IPCC_data = pd.read_excel(CO2_IPCC_path, sheet_name='all records')

# Extract relevant columns

In [438]:

rubino = CO2_IPCC_data.iloc[90:, [83, 86]]
macfarling = CO2_IPCC_data.iloc[137:, 68:70]
monnin = CO2_IPCC_data.iloc[25:120, 2:4]
marcott = CO2_IPCC_data.iloc[31:321, 98:100]
ahn = CO2_IPCC_data.iloc[7:202, 89:91]
bereiter = CO2_IPCC_data.iloc[28:106, 34:36]
bereiter_2 = CO2_IPCC_data.iloc[60:154, 39:41]
schneider = CO2_IPCC_data.iloc[6:, 65:67]
petit = CO2_IPCC_data.iloc[124:348, 7:9]
siegenthaler = CO2_IPCC_data.iloc[6:26, 20:22]
siegenthaler_2 = CO2_IPCC_data.iloc[6:226, 15:17]
bereiter_3 = CO2_IPCC_data.iloc[37:, 102:104]

# Rename columns

In [439]:

rubino.rename(columns={'Unnamed: 83': 'yr_bp', 'Unnamed: 86': 'co2_ppmv'}, inplace=True)
macfarling.rename(columns={'Law Dome (0-2 kyr BP)': 'yr_bp', 'Unnamed: 69': 'co2_ppmv'}, inplace=True)
monnin.rename(columns={'Unnamed: 2': 'yr_bp', 'Unnamed: 3': 'co2_ppmv'}, inplace=True)
marcott.rename(columns={'Unnamed: 98': 'yr_bp', 'Unnamed: 99': 'co2_ppmv'}, inplace=True)
ahn.rename(columns={'Unnamed: 89': 'yr_bp', 'Unnamed: 90': 'co2_ppmv'}, inplace=True)
bereiter.rename(columns={'Unnamed: 34': 'yr_bp', 'Unnamed: 35': 'co2_ppmv'}, inplace=True)
bereiter_2.rename(columns={'Unnamed: 39': 'yr_bp', 'Unnamed: 40': 'co2_ppmv'}, inplace=True)
schneider.rename(columns={'Unnamed: 65': 'yr_bp', 'Unnamed: 66': 'co2_ppmv'}, inplace=True)
petit.rename(columns={'Unnamed: 7': 'yr_bp', 'Unnamed: 8': 'co2_ppmv'}, inplace=True)
siegenthaler.rename(columns={'Unnamed: 20': 'yr_bp', 'Unnamed: 21': 'co2_ppmv'}, inplace=True)
siegenthaler_2.rename(columns={'Unnamed: 15': 'yr_bp', 'Unnamed: 16': 'co2_ppmv'}, inplace=True)
bereiter_3.rename(columns={'Unnamed: 102': 'yr_bp', 'Unnamed: 103': 'co2_ppmv'}, inplace=True)

# Merge datasets

In [440]:
merged_data = pd.concat([monnin_luthi, pepin_luthi, siegenthaler_1_luthi, siegenthaler_2_luthi, luthi_luthi,
                         rubino, macfarling, monnin, marcott, ahn, bereiter, bereiter_2, schneider, petit,
                         siegenthaler, siegenthaler_2, bereiter_3], ignore_index=True)

# Explore the merged dataset

In [441]:
# Explore the merged dataset
print(merged_data.head())

  yr_bp co2_ppmv
0   137    280.4
1   268    274.9
2   279    277.9
3   395    279.1
4   404    281.9


# CLENAING out non-DataFrame (EMPTY DATA FRAMES) objects from the merged_data,THIS WILL SKIP EMPTY DATAFRAMES

In [442]:
# List of DataFrames in the 'merged_data' variable
merged_data = [monnin_luthi, pepin_luthi, siegenthaler_1_luthi, siegenthaler_2_luthi, luthi_luthi,
               rubino, macfarling, monnin, marcott, ahn, bereiter, bereiter_2, schneider, petit,
               siegenthaler, siegenthaler_2, bereiter_3]

# Filter out non-DataFrame objects
merged_data = [df for df in merged_data if isinstance(df, pd.DataFrame)]


Generating a column to compute the number of years before 2023 

In [443]:
import pandas as pd

def process_dataframe(df):
    # Print the original column names
    print(f"Original Column Names for DataFrame: {df.columns}")

    # Check if the DataFrame is not empty
    if not df.empty:
        # Rename columns
        df.rename(columns={'Unnamed: 1': 'yr_bp', 'Unnamed: 2': 'co2_ppmv'}, inplace=True)

        # Print the column names after renaming
        print(f"Column Names After Renaming for DataFrame: {df.columns}")

        # Create a column that calculates the number of years before 2023
        df['years_before_2023'] = 2023 - df['yr_bp']

        # Create a column that calculates the year
        df['calculated_year'] = 2023 + df['years_before_2023']

        # Drop rows with null values
        df.dropna(inplace=True)
    else:
        print("DataFrame is empty. Skipping processing.")



runing  loop to apply the process_dataframe function to each DataFrame and print the head of one of the processed DataFrames:

In [444]:
# Apply the processing function to all DataFrames in the merged dataset
for df in merged_data:
    process_dataframe(df)

# Example: Print the head of one of the processed DataFrames
print(monnin_luthi.head())


Original Column Names for DataFrame: Index(['yr_bp', 'co2_ppmv'], dtype='object')
Column Names After Renaming for DataFrame: Index(['yr_bp', 'co2_ppmv'], dtype='object')
Original Column Names for DataFrame: Index(['yr_bp', 'co2_ppmv'], dtype='object')
Column Names After Renaming for DataFrame: Index(['yr_bp', 'co2_ppmv'], dtype='object')
Original Column Names for DataFrame: Index([], dtype='object')
DataFrame is empty. Skipping processing.
Original Column Names for DataFrame: Index(['yr_bp', 'co2_ppmv'], dtype='object')
Column Names After Renaming for DataFrame: Index(['yr_bp', 'co2_ppmv'], dtype='object')
Original Column Names for DataFrame: Index(['yr_bp', 'co2_ppmv'], dtype='object')
Column Names After Renaming for DataFrame: Index(['yr_bp', 'co2_ppmv'], dtype='object')
Original Column Names for DataFrame: Index(['yr_bp', 'co2_ppmv'], dtype='object')
Column Names After Renaming for DataFrame: Index(['yr_bp', 'co2_ppmv'], dtype='object')
Original Column Names for DataFrame: Index(['y

# CO2 Data 1979 - 2023 
Data is the most recent uptill date  data is gotten from https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_gl.txt accesed 01/01/2024

Loading in the file

In [445]:
CO2_naoo_path = 'https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_gl.txt'

# print colunms columns

In [446]:
import pandas as pd

# Define the URL
CO2_naoo_path = 'https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_gl.txt'

# Read the data into a DataFrame
co2_data = pd.read_csv(CO2_naoo_path, delim_whitespace=True, skiprows=72, header=None, names=['Year', 'Month', 'Interpolated', 'Trend', '#Days'])

# Print the first 5 rows of the DataFrame
print(co2_data.head())





             Year   Month  Interpolated   Trend  #Days
1981 9   1981.708  337.98          0.11  340.36   0.08
     10  1981.792  339.07          0.09  340.42   0.08
     11  1981.875  340.18          0.11  340.50   0.07
     12  1981.958  340.75          0.09  340.52   0.07
1982 1   1982.042  341.37          0.08  340.74   0.06


# Read the data into a DataFrame

In [447]:
import pandas as pd
co2_data = pd.read_csv(CO2_naoo_path, delim_whitespace=True, skiprows=72, header=None, names=['Year', 'Month', 'Interpolated', 'Trend', '#Days'])


rename columns 

In [448]:
# Rename columns
co2_data.rename(columns={'Year': 'year', 'Interpolated': 'co2_ppmv', 'Trend': 'unc'}, inplace=True)

# Print the DataFrame with renamed columns
print(co2_data[['year', 'Month', 'co2_ppmv', 'unc', '#Days']].head())


             year   Month  co2_ppmv     unc  #Days
1981 9   1981.708  337.98      0.11  340.36   0.08
     10  1981.792  339.07      0.09  340.42   0.08
     11  1981.875  340.18      0.11  340.50   0.07
     12  1981.958  340.75      0.09  340.52   0.07
1982 1   1982.042  341.37      0.08  340.74   0.06
