# Global Temperature Preprocessing

We process NASA’s **Global Land–Ocean Temperature Index** to compute **absolute global temperatures**.

- NASA anomalies are relative to **1951–1980 mean = 14.13 °C**.

In [1]:
# Load libraries

import pandas as pd
from pathlib import Path
import warnings

In [2]:
BASE_DIR = Path().resolve().parents[1]

# Load global temperature
temp_path = BASE_DIR / "data" / "raw" / "Global_TAVG_annual.txt"

BASELINE_TEMP_C = 14.13

In [5]:
warnings.filterwarnings("ignore")

df_temp = pd.read_csv(
    temp_path,
    delim_whitespace=True,
    comment="%",
    header=None,
    names=["year", "anomaly_c", "uncertainty_c", "five_year_avg", "five_year_unc"]
)

In [4]:
# Calculate absolute temperature
########################

# "Absolute temp = Anomaly + 1951–1980 mean (14.13°C)"
########################

df_temp["global_temp_c"] = df_temp["anomaly_c"] + BASELINE_TEMP_C

In [None]:
# Keep only necessary columns
df_temp = df_temp[["year", "anomaly_c","uncertainty_c", "global_temp_c"]]

# Save the final dataset
output_path = BASE_DIR / "data" / "pre_processed" / "global_temp_data.csv"
df_temp.to_csv(output_path, index=False)

In [27]:
print("Global Temperature dataset shape:", df_temp.shape)
print(df_temp[["year", "global_temp_c"]].drop_duplicates().head())

Global Temperature dataset shape: (175, 4)
   year  global_temp_c
0  1850         13.764
1  1851         13.791
2  1852         13.851
3  1853         13.945
4  1854         13.878
