In [2]:
import pandas as pd

# Load DataFrames
earthquakesD = pd.read_csv('datasets/Earthquakes 2024-06-20.tsv', sep='\t')
earthquakesYears = earthquakesD['Year'][earthquakesD['Year'] < 0].abs().astype(int)

eruptionD = pd.read_csv('datasets/eruptions.csv')
eruptionYears = eruptionD['start_year'][eruptionD['start_year'] < 0].abs().astype(int)

df = pd.read_csv('datasets/Temperature dataset 20000 years.csv')
filtered_df = df[['Age (yr BP)', 'Temperature Anomaly (°C) ']].rename(columns={'Age (yr BP)': 'Age', 'Temperature Anomaly (°C)': 'Temperature Anomaly'})

temp800D_df = pd.read_csv('datasets/Temperature dataset 800,000 years.csv')

temp800D_df.to_csv('preprocess/pre_Temperature_dataset_800000_years.csv', index=False)

# Define bins for the ages
bins = filtered_df['Age']

# Bin earthquake years
earthquake_counts = earthquakesYears.value_counts(bins=bins).sort_index()

# Bin eruption years
eruption_counts = eruptionYears.value_counts(bins=bins).sort_index()

# Add these counts to the filtered_df DataFrame
filtered_df['Earthquake Counts'] = filtered_df['Age'].map(earthquake_counts).fillna(0).astype(int)
filtered_df['Eruption Counts'] = filtered_df['Age'].map(eruption_counts).fillna(0).astype(int)

filtered_df.to_csv('preprocess/pre_Temperature_dataset_20000_years.csv', index=False)

In [None]:
df_emissions = pd.read_csv('datasets/emissions.csv')
df_electricity = pd.read_csv('datasets/electricity.csv')


merged_df = pd.merge(df_emissions, df_electricity, on=['Entity', 'Code', 'Year'], how='inner')

merged_df['Total Fossil Fuel Usage'] = merged_df[['Coal', 'Gas', 'Oil']].sum(axis=1)

filtered_df = merged_df[(merged_df['Total Fossil Fuel Usage'] > 0) & (merged_df['Year'] >= 2000)]

filtered_df.to_csv('preprocess/pre_fossil_fuel_emissions.csv', index=False)

correlation = filtered_df[['Total Fossil Fuel Usage', 'Annual CO₂ emissions (per capita)']].corr().loc['Total Fossil Fuel Usage', 'Annual CO₂ emissions (per capita)']
print(f'Correlation between Total Fossil Fuel Usage and CO₂ Emissions: {correlation:.2f}')