# Programing for data analytics project 2

# Question

• Analyse CO2 vs Temperature Anomaly from 800kyrs – present.
• Examine one other (paleo/modern) features (e.g. CH4 or polar ice-coverage)
• Examine Irish context:
o Climate change signals: (see Maynooth study: The emergence of a climate change
signal in long-term Irish meteorological observations - ScienceDirect)
• Fuse and analyse data from various data sources and format fused data set as a pandas
dataframe and export to csv and json formats
• For all of the above variables, analyse the data, the trends and the relationships between
them (temporal leads/lags/frequency analysis).
• Predict global temperature anomaly over next few decades (synthesise data) and compare to
published climate models if atmospheric CO2 trends continue
• Comment on accelerated warming based on very latest features (e.g. temperature/polar-icecoverage)
Use a Jupyter notebook for your analysis and track your progress using GitHub.
Use an academic referencing style

# Import Required Libraries:

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression
from scipy import signal
from datetime import date


# import data

In [None]:
import pandas as pd

# Load CO2 IPCC data
CO2_IPCC_data = pd.read_csv(r'C:\Users\fifoa\OneDrive\Desktop\ATU\PFDA-PROJECT-2\CO2IPCC.csv')

# Load CO2 Luthi data
CO2_Luthi_data = pd.read_csv(r'C:\Users\fifoa\OneDrive\Desktop\ATU\PFDA-PROJECT-2\CO2Luthi.csv')

# Load Irish climate data
irish_climate_data = pd.read_csv(r'C:\Users\fifoa\OneDrive\Desktop\ATU\PFDA-PROJECT-2\irish climate.csv')

# Load Polar Ice data
POLAR_ICE_data = pd.read_csv(r'C:\Users\fifoa\OneDrive\Desktop\ATU\PFDA-PROJECT-2\POLAR ICE.csv')

# Load Temp Jouzel data
TEMP_JOUZEL_data = pd.read_csv(r'C:\Users\fifoa\OneDrive\Desktop\ATU\PFDA-PROJECT-2\TEMPJOUZEL.csv')

# Load Temp NOAA data
TEMP_noaa_data = pd.read_csv(r'C:\Users\fifoa\OneDrive\Desktop\ATU\PFDA-PROJECT-2\TEMPnoaa.csv')


# Check Data Samples:

In [None]:
print("CO2 IPCC Data Sample:")
print(CO2_IPCC_data.head())

print("\nCO2 Luthi Data Sample:")
print(CO2_Luthi_data.head())

print("\nIrish Climate Data Sample:")
print(irish_climate_data.head())

print("\nPolar Ice Data Sample:")
print(POLAR_ICE_data.head())

print("\nTemp Jouzel Data Sample:")
print(TEMP_JOUZEL_data.head())

print("\nTemp NOAA Data Sample:")
print(TEMP_noaa_data.head())



In [None]:
# Assuming 'timestamp' is the common column in all datasets
# Merge CO2 data
merged_data = pd.merge(CO2_IPCC_data, CO2_Luthi_data, on='timestamp', how='outer', suffixes=('_IPCC', '_Luthi'))

# Merge temperature data
merged_data = pd.merge(merged_data, TEMP_JOUZEL_data, on='timestamp', how='outer', suffixes=('_CO2', '_Jouzel'))
merged_data = pd.merge(merged_data, TEMP_noaa_data, on='timestamp', how='outer', suffixes=('_Jouzel', '_NOAA'))

# Merge Irish climate data
merged_data = pd.merge(merged_data, irish_climate_data, on='timestamp', how='outer', suffixes=('_Temperature', '_Irish'))

# Merge Polar Ice data
merged_data = pd.merge(merged_data, POLAR_ICE_data, on='timestamp', how='outer', suffixes=('_Irish', '_Polar'))

# Display the first few rows of the merged DataFrame
merged_data.head()


# IDENTIFYING NON NUMERIC DATA FOR POLAR ICE DATA

In [None]:
# Identify columns with non-numeric values
non_numeric_columns = POLAR_ICE_data.select_dtypes(exclude=['number']).columns
print("Columns with non-numeric values:", non_numeric_columns)

# Print unique values in those columns
for column in non_numeric_columns:
    unique_values = POLAR_ICE_data[column].unique()
    print(f"Unique values in {column}: {unique_values}")


In [None]:
# Example: Replace non-numeric values with NaN
POLAR_ICE_data[non_numeric_columns] = POLAR_ICE_data[non_numeric_columns].apply(pd.to_numeric, errors='coerce')

# Continue with z-score calculation
z_scores = zscore(POLAR_ICE_data)
outliers = (z_scores > 3) | (z_scores < -3)
POLAR_ICE_data = POLAR_ICE_data[~outliers.any(axis=1)]


In [None]:
# Check data types after handling non-numeric values
print(POLAR_ICE_data.dtypes)


In [None]:
from scipy.stats import zscore

z_scores = zscore(POLAR_ICE_data)
outliers = (z_scores > 3) | (z_scores < -3)
POLAR_ICE_data = POLAR_ICE_data[~outliers.any(axis=1)]

In [None]:
from scipy.stats import zscore

z_scores = zscore(TEMP_JOUZEL_data)
outliers = (z_scores > 3) | (z_scores < -3)
TEMP_JOUZEL_data = TEMP_JOUZEL_data[~outliers.any(axis=1)]

# identifying colums with non numeric values for data Tempnoaa

In [None]:
# Identify columns with non-numeric values
non_numeric_columns = TEMP_noaa_data.select_dtypes(exclude=['number']).columns
print("Columns with non-numeric values:", non_numeric_columns)

# Print unique values in those columns
for column in non_numeric_columns:
    unique_values = TEMP_noaa_data[column].unique()
    print(f"Unique values in {column}: {unique_values}")


In [None]:
# Example: Replace non-numeric values with NaN
TEMP_noaa_data[non_numeric_columns] = TEMP_noaa_data[non_numeric_columns].apply(pd.to_numeric, errors='coerce')

# Continue with z-score calculation
z_scores = zscore(TEMP_noaa_data)
outliers = (z_scores > 3) | (z_scores < -3)
TEMP_noaa_data = TEMP_noaa_data[~outliers.any(axis=1)]


In [None]:
# Check data types after handling non-numeric values
print(TEMP_noaa_data.dtypes)


In [None]:
from scipy.stats import zscore

z_scores = zscore(TEMP_noaa_data)
outliers = (z_scores > 3) | (z_scores < -3)
TEMP_noaa_data = TEMP_noaa_data[~outliers.any(axis=1)]

In [None]:
# Print the columns in each dataframe
print("Columns in CO2 IPCC Data:")
print(CO2_IPCC_data.columns)

print("\nColumns in CO2 Luthi Data:")
print(CO2_Luthi_data.columns)

print("\nColumns in Polar Ice Data:")
print(POLAR_ICE_data.columns)

print("\nColumns in Temp NOAA Data:")
print(TEMP_noaa_data.columns)

print("\nColumns in Temp Jouzel Data:")
print(TEMP_JOUZEL_data.columns)


In [None]:
# Filter CO2 IPCC data
CO2_IPCC_data = CO2_IPCC_data[CO2_IPCC_data['age'] >= 800000]

# Filter CO2 Luthi data
CO2_Luthi_data = CO2_Luthi_data[CO2_Luthi_data['age'] >= 800000]

# Filter Polar Ice data
POLAR_ICE_data = POLAR_ICE_data[POLAR_ICE_data['age'] >= 800000]

# Filter Temp NOAA data
TEMP_noaa_data = TEMP_noaa_data[TEMP_noaa_data['age'] >= 800000]

# Filter Temp Jouzel data
TEMP_JOUZEL_data = TEMP_JOUZEL_data[TEMP_JOUZEL_data['age'] >= 800000]


# data formating 

In [None]:
# Convert date columns to datetime format
CO2_IPCC_data['Date'] = pd.to_datetime(CO2_IPCC_data['Date'])

# Set the date column as the index
CO2_IPCC_data.set_index('Date', inplace=True)
