In [1]:
import pandas as pd

gas = pd.read_csv('gas.csv')
rad = pd.read_csv('radiation.csv')
sealvl = pd.read_csv('sealevel.csv')
temp = pd.read_csv('temp.csv')

In [5]:
import pandas as pd

# Load the dataset
gas = pd.read_csv('gas.csv')

# Step 1: Keep only 1989 to 2017
cols_to_keep = ['Country'] + [str(y) for y in range(1989, 2018)]
gas = gas[cols_to_keep]

# Step 2: Melt into long format
gas_long = gas.melt(id_vars='Country', var_name='year', value_name='gas_emission')


In [10]:
# Step 1: Drop 'Country' column
gas_numeric = gas.drop(columns='Country')

# Step 2: Compute average emission per year (mean of each column)
gas_avg = gas_numeric.mean(axis=0).reset_index()

# Step 3: Rename columns for clarity
gas_avg.columns = ['year', 'gas_emission']
gas_avg['year'] = gas_avg['year'].astype(int)


In [12]:
gas_avg['gas_emission'] = (gas_avg['gas_emission'] / 1e9).round(1)

# Rename the column
gas_avg.rename(columns={'gas_emission': 'gas_emission (billion metric tons)'}, inplace=True)

In [13]:
gas_avg

Unnamed: 0,year,gas_emission (billion metric tons)
0,1989,8.7
1,1990,8.9
2,1991,9.2
3,1992,9.4
4,1993,9.7
5,1994,9.9
6,1995,10.2
7,1996,10.5
8,1997,10.7
9,1998,11.0


In [29]:
gas_avg.to_csv('Gas.csv', index=False)

In [17]:
# Step 1: Filter years between 1989 and 2017
rad = rad[(rad['Year'] >= 1989) & (rad['Year'] <= 2017)]

# Step 2: Drop Month and Day
rad = rad.drop(columns=['Month', 'Day'])

# Step 3: Average by Year
rad_avg = rad.groupby('Year')['Estimated DGSR'].mean().reset_index()

# Step 4: Rename the column
rad_avg.rename(columns={
    'Year': 'year',
    'Estimated DGSR': 'Radiation amount (kWh/m²/day)'
}, inplace=True)


In [18]:
rad_avg

Unnamed: 0,year,Radiation amount (kWh/m²/day)
0,1989,9.166878
1,1990,11.534777
2,1991,11.894883
3,1992,11.287646
4,1993,12.12966
5,1994,11.987603
6,1995,11.860036
7,1996,11.47035
8,1997,11.443455
9,1998,11.980871


In [19]:
rad_avg.to_csv('rad.csv',index=False)

In [21]:
# Step 1: Keep only years from 1989 to 2017 (our common range)
sealvl = sealvl[(sealvl['year'] >= 1989) & (sealvl['year'] <= 2017)]

# Step 2: Drop the 'date' column
sealvl = sealvl.drop(columns=['date'])

# Step 3: Rename the sea level column
sealvl.rename(columns={'mmfrom1993-2008average': 'Sea level (mm)'}, inplace=True)

In [22]:
sealvl

Unnamed: 0,year,Sea level (mm)
109,1989,-25.147396
110,1990,-22.847396
111,1991,-22.814062
112,1992,-20.247396
113,1993,-22.780729
114,1994,-17.980729
115,1995,-15.580729
116,1996,-11.947396
117,1997,-2.647396
118,1998,-10.814063


In [23]:
sealvl.to_csv('slv.csv')

In [25]:
# Drop 'Code' and 'year' columns
temp = temp.drop(columns=['Code', 'year'])

# Group by 'Day' and calculate average of minimum and maximum temperature across all countries
temp_avg = temp.groupby('Day').agg({
    'Average surface temperature': 'mean',      # avg min temp
    'Average surface temperature.1': 'mean'     # avg max temp
}).reset_index()

# Rename columns to more readable format
temp_avg.rename(columns={
    'Average surface temperature': 'Avg Min Temperature (°C)',
    'Average surface temperature.1': 'Avg Max Temperature (°C)'
}, inplace=True)

In [27]:
temp_avg

Unnamed: 0,Day,Avg Min Temperature (°C),Avg Max Temperature (°C)
0,1940-01-15,11.458131,17.241399
1,1940-02-15,12.956817,17.241399
2,1940-03-15,14.661579,17.241399
3,1940-04-15,17.548147,17.241399
4,1940-05-15,19.489901,17.241399
...,...,...,...
1015,2024-08-15,23.351773,19.594957
1016,2024-09-15,22.003617,19.594957
1017,2024-10-15,19.911748,19.594957
1018,2024-11-15,17.186867,19.594957


In [28]:
temp_avg.to_csv('Temp.csv')

In [36]:
# Ensure 'Day' is datetime and extract year
temp_avg['Day'] = pd.to_datetime(temp_avg['Day'])
temp_avg['year'] = temp_avg['Day'].dt.year

# Group by year and take mean of each temperature column
temp_yearly = temp_avg.groupby('year').agg({
    'Avg Min Temperature (°C)': 'mean',
    'Avg Max Temperature (°C)': 'mean'
}).reset_index()

# Optional: Round for neatness
temp_yearly = temp_yearly.round(2)

print(temp_yearly.head())

   year  Avg Min Temperature (°C)  Avg Max Temperature (°C)
0  1940                     17.24                     17.24
1  1941                     17.56                     17.56
2  1942                     17.40                     17.40
3  1943                     17.47                     17.47
4  1944                     17.61                     17.61


In [38]:
# Ensure 'Day' is datetime and extract year
temp_avg['Day'] = pd.to_datetime(temp_avg['Day'])
temp_avg['year'] = temp_avg['Day'].dt.year

In [40]:
temp_yearly = temp_avg.groupby('year').agg({
    'Avg Min Temperature (°C)': 'mean',
    'Avg Max Temperature (°C)': 'mean'
}).reset_index()

In [43]:
temp_yearly

Unnamed: 0,year,temperature (°C)
0,1940,17.241399
1,1941,17.555396
2,1942,17.397509
3,1943,17.471349
4,1944,17.611624
...,...,...
80,2020,19.114152
81,2021,18.953754
82,2022,18.981230
83,2023,19.395925


In [42]:
# Drop one of the identical columns (keeping only max or min)
temp_yearly = temp_yearly.drop(columns=['Avg Max Temperature (°C)'])

# Rename the remaining column to 'temperature (°C)'
temp_yearly = temp_yearly.rename(columns={'Avg Min Temperature (°C)': 'temperature (°C)'})

In [44]:
temp_yearly.to_csv('temp_year.csv')

In [46]:
# Merge all datasets step by step
master = gas_avg.merge(rad_avg, on='year') \
                .merge(sealvl, on='year') \
                .merge(temp_yearly, on='year')

print(master.head())


   year  gas_emission (billion metric tons)  Radiation amount (kWh/m²/day)  \
0  1989                                 8.7                       9.166878   
1  1990                                 8.9                      11.534777   
2  1991                                 9.2                      11.894883   
3  1992                                 9.4                      11.287646   
4  1993                                 9.7                      12.129660   

   Sea level (mm)  temperature (°C)  
0      -25.147396         18.101305  
1      -22.847396         18.375573  
2      -22.814062         18.031093  
3      -20.247396         17.944039  
4      -22.780729         17.964859  


In [47]:
master.to_csv('final.csv')