Climate Explorer


1. Data collecting and clearing.



Downloading data online and clearing data with python.

Saving the data as csv file so that the data can be easily used by other teammates.

(1) Changing global temperature from monthly to yearly

In [None]:
import pandas as pd
import numpy as np

# 1. Read NASA GISS monthly temperature anomaly data (data with -.xx format will be automatically recognized as negative numbers)
url = r"../../csv_file/globe/original_tempera_difference.csv"
data = pd.read_csv(url, skiprows=1)  # Skip header row

# 2. Define monthly column names (J to D correspond to January-December)
month_columns = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# 3. Data preprocessing: Handle possible missing values and convert to numeric type
data[month_columns] = data[month_columns].replace('***', np.nan)  # Replace missing value markers
data[month_columns] = data[month_columns].astype(float)  # Convert to float

# 4. Calculate monthly absolute temperature (anomaly value + 13.9)
month_abs_columns = [f'{month}_Abs' for month in month_columns]  # Define monthly absolute temperature column names
data[month_abs_columns] = data[month_columns] + 13.9  # Generate monthly absolute temperatures

# 5. Calculate annual absolute temperature statistics
data['Annual_Mean_Absolute'] = data[month_abs_columns].mean(axis=1)    # Annual absolute temperature mean
data['Annual_Median_Absolute'] = data[month_abs_columns].median(axis=1)# Annual absolute temperature median
data['Annual_Std_Absolute'] = data[month_abs_columns].std(axis=1)      # Annual absolute temperature standard deviation

# 6. Filter key columns (only keep year and absolute temperature statistics) and retain two decimal places uniformly
result = data[['Year', 'Annual_Mean_Absolute', 'Annual_Median_Absolute', 'Annual_Std_Absolute']].copy()
result = result.round(2)  # Keep two decimal places for all numeric columns

# 7. Save results to CSV file
result.to_csv('../../csv_file/globe/annual_temperature_data.csv', index=False, float_format='%.2f')

# 8. Display first 10 rows of results
print("Processed annual absolute temperature data (absolute temperature statistics only):")
print(result.head(10))

(2) Collectin Global Co2's data and changing it from daily to monthly.

In [None]:
import pandas as pd

# 1. Read CSV file (comma-separated by default, no need to specify sep)
input_file = r"../../csv_file/globe/original_co2.csv"  # Replace with your actual file path
df = pd.read_csv(input_file, encoding="utf-8")  # Replace with "gbk" if an error occurs

# 2. Group by "year+month" and calculate monthly averages of smoothed and trend (keep 2 decimal places)
monthly_df = df.groupby(
    by=["year", "month"],  # Match column names in CSV header
    as_index=False
).agg(
    smoothed_monthly=("smoothed", lambda x: round(x.mean(), 2)),
    trend_monthly=("trend", lambda x: round(x.mean(), 2))
)

# 3. Save processed monthly data to new CSV
output_file = "../../csv_file/globe/final_co2_monthly.csv"
monthly_df.to_csv(output_file, index=False, encoding="utf-8")

print(f"Monthly average data saved to: {output_file}")
print("Preview of processed data:")
print(monthly_df.head())

(3) Collecting hemisphere's temperature data, comparing south-half and north-half yearly.

In [None]:
import pandas as pd
import numpy as np

# ---------------------- 1. Read NASA GISS monthly temperature data for Northern and Southern Hemispheres (public link) ----------------------
nh_url = "https://data.giss.nasa.gov/gistemp/tabledata_v3/NH.Ts+dSST.csv"  # Northern Hemisphere
sh_url = "https://data.giss.nasa.gov/gistemp/tabledata_v3/SH.Ts+dSST.csv"  # Southern Hemisphere

# Read data (skip description rows)
nh_data = pd.read_csv(nh_url, skiprows=1)
sh_data = pd.read_csv(sh_url, skiprows=1)

# ---------------------- 2. Data Preprocessing (unified logic) ----------------------
month_columns = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

def process_hemisphere_data(data):
    """Process data for a single hemisphere: cleaning, calculate absolute temperature, annual statistics"""
    data = data[['Year'] + month_columns].copy()
    data[month_columns] = data[month_columns].replace('***', np.nan).astype(float)
    # Calculate monthly absolute temperature
    month_abs_cols = [f'{m}_Abs' for m in month_columns]
    data[month_abs_cols] = data[month_columns] + 13.9
    # Calculate annual statistics
    data['Annual_Mean_Abs'] = data[month_abs_cols].mean(axis=1).round(2)
    data['Annual_Median_Abs'] = data[month_abs_cols].median(axis=1).round(2)
    data['Annual_Std_Abs'] = data[month_abs_cols].std(axis=1).round(2)
    return data[['Year', 'Annual_Mean_Abs', 'Annual_Median_Abs', 'Annual_Std_Abs']]

# Process Northern and Southern Hemisphere data
nh_processed = process_hemisphere_data(nh_data)
sh_processed = process_hemisphere_data(sh_data)

# ---------------------- 3. Merge into comparison data (fixed order: Northern first, then Southern) ----------------------
# Add hemisphere identifier
nh_processed['Hemisphere'] = 'Northern'
sh_processed['Hemisphere'] = 'Southern'

# Merge data: sort by year first, then by hemisphere in fixed order (Northern first, Southern second)
comparison_data = pd.concat([nh_processed, sh_processed], axis=0)
# Set Hemisphere as categorical type with specified order to ensure Northern comes first when sorting
comparison_data['Hemisphere'] = pd.Categorical(comparison_data['Hemisphere'],
                                               categories=['Northern', 'Southern'],
                                               ordered=True)
# Sort by Year first, then by Hemisphere (ensuring Northern comes before Southern for the same year)
comparison_data = comparison_data.sort_values(['Year', 'Hemisphere']).reset_index(drop=True)

# ---------------------- 4. Save as comparison CSV ----------------------
comparison_data.to_csv('south_and_north_ hemisphere_comparison.csv', index=False, float_format='%.2f')

# ---------------------- 5. Display sample results ----------------------
print("Annual temperature change comparison between Northern and Southern Hemispheres (sorted by year + hemisphere, first 10 rows):")
print(comparison_data.head(10))

(4) Collecting and clearing temperature data from different nations and regions.

In [None]:
import pandas as pd

# 1. Read CSV file
df = pd.read_csv("../../csv_file/region/GlobalLandTemperaturesByCountry.csv")
country = ""
target_country = country
temperature_col = "AverageTemperature"

# 2. Filter specified country + Remove missing values in temperature column
df_filtered = df[df["Country"] == target_country].dropna(subset=[temperature_col])

# 3. Process date: Extract year
df_filtered["dt"] = pd.to_datetime(df_filtered["dt"])
df_filtered["year"] = df_filtered["dt"].dt.year

# 4. Group by year, calculate statistics, and keep two decimal places
yearly_stats = df_filtered.groupby("year")[temperature_col].agg(
    mean="mean",
    median="median",
    standard_deviation="std"
).reset_index()

# 5. Keep two decimal places
yearly_stats = yearly_stats.round(2)

# Output results
print("Annual temperature statistics for the specified country:")
print(yearly_stats)

# Save as new CSV
yearly_stats.to_csv(f"../../csv_file/region/{country}_tempera.csv", index=False)

2. Visualizing the data.

(1)

(2)

(3)

3. Analysing the data within AI.

(1)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# 1. Load data
df = pd.read_csv("../csv_files/GlobalLandTemperaturesByCountry.csv")

# Keep necessary columns
df = df[['dt', 'AverageTemperature', 'Country']].dropna()

# Convert date
df['dt'] = pd.to_datetime(df['dt'])
df['Year'] = df['dt'].dt.year

# 2. Aggregate to country-level means
country_temp = (
    df.groupby('Country')['AverageTemperature']
      .mean()
      .reset_index()
)

# 3. Simulate regional CO2
# Global baseline ~400 ppm with regional variation
np.random.seed(42)
country_temp['CO2'] = 400 + np.random.normal(0, 15, size=len(country_temp))

# 4. Feature matrix
X = country_temp[['AverageTemperature', 'CO2']]

# Standardize for K-Means
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 5. Apply K-Means clustering
kmeans = KMeans(n_clusters=4, random_state=42)
country_temp['Cluster'] = kmeans.fit_predict(X_scaled)

# 6. Visualization
plt.figure(figsize=(8,6))
plt.scatter(
    country_temp['AverageTemperature'],
    country_temp['CO2'],
    c=country_temp['Cluster']
)
plt.xlabel("Average Temperature (°C)")
plt.ylabel("Simulated CO₂ (ppm)")
plt.title("Climate Clusters Based on Temperature and CO₂")
plt.grid(True)
plt.show()

# 7. Display output
print(country_temp.head())

(2)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load data
annual_temp = pd.read_csv("../csv_files/annual_temperature_data.csv")
co2 = pd.read_csv("../csv_files/final_co2_monthly.csv")
global_temp = pd.read_csv("../csv_files/GlobalLandTemperaturesByCountry.csv")

# 2. Preprocess global temperature data
global_temp = global_temp[['dt', 'AverageTemperature', 'Country']].dropna()
global_temp['dt'] = pd.to_datetime(global_temp['dt'])
global_temp['Year'] = global_temp['dt'].dt.year

# Country-level annual mean temperature
country_annual_temp = (
    global_temp.groupby(['Country', 'Year'])['AverageTemperature']
    .mean()
    .reset_index()
)

# 3. Preprocess CO2 data
co2_annual = (
    co2.groupby('year')['smoothed_monthly']
    .mean()
    .reset_index()
    .rename(columns={'year': 'Year', 'smoothed_monthly': 'CO2'})
)

# 4. Merge datasets
merged = country_annual_temp.merge(co2_annual, on='Year', how='left')

# Merge with final annual temperature data if Year exists
if 'Year' in annual_temp.columns:
    merged = merged.merge(
        annual_temp,
        on='Year',
        how='left',
        suffixes=('_Country', '_Global')
    )

# 5. Select numeric features
numeric_df = merged.select_dtypes(include=['float64', 'int64'])

print("Numeric features included in correlation analysis:")
print(numeric_df.columns)

# 6. Compute correlation
correlation_matrix = numeric_df.corr()

print("\nCorrelation Matrix:")
print(correlation_matrix)

# 7. Visualization
plt.figure(figsize=(11, 8))
sns.heatmap(
    correlation_matrix,
    annot=True,
    fmt=".2f",
    cmap="coolwarm",
    square=True
)
plt.title("Correlation Analysis: Temperature, CO₂, and Time")
plt.show()

(3)