In [27]:
import pandas as pd

In [28]:
seaice_df = pd.read_csv('Data/seaice.csv') # 연간 빙하 면적 변화 데이터셋
seaice_df

# 전처리

# 열 라벨 공백 제거
seaice_df.columns = seaice_df.columns.str.strip()

# 필요 없는 열 드롭
seaice_df.drop(['Month', 'Day', 'Source Data', 'Missing'], axis=1, inplace=True)

# 연도별 그룹화
# Year와 hemisphere 열을 기준으로 그룹화
seaice_df = seaice_df.groupby(['Year', 'hemisphere']).mean().reset_index()

seaice_df

Unnamed: 0,Year,hemisphere,Extent
0,1978,north,12.487000
1,1978,south,13.466676
2,1979,north,12.319560
3,1979,south,11.719137
4,1980,north,12.334148
...,...,...,...
79,2017,south,10.748926
80,2018,north,10.326964
81,2018,south,10.937660
82,2019,north,13.610510


In [29]:
global_temps_df = pd.read_csv('Data/global_temps.csv') # 연간 지구 평균 표면 온도 변화 데이터셋
global_temps_df

# 전처리

# 필요한 열만 선택
global_temps_df = global_temps_df[['Year', 'J-D']]

# 열 라벨 이름 수정
global_temps_df.rename(columns={'J-D': 'Temp'}, inplace=True)

global_temps_df

Unnamed: 0,Year,Temp
0,1880,-0.17
1,1881,-0.09
2,1882,-0.11
3,1883,-0.18
4,1884,-0.29
...,...,...
139,2019,0.98
140,2020,1.02
141,2021,0.85
142,2022,0.90


In [30]:
aggi_df = pd.read_csv('Data/AGGI_Table.csv') # 연간 대기 중 온실가스들의 농도 변화 데이터셋
aggi_df

# 전처리

# 열 라벨 이름 수정
aggi_df.rename(columns={'Total.1': 'CO2-eq', 'CFC*': 'CFC', 'HFCs*': 'HFCs', 'change **': 'change'}, inplace=True)

# 필요한 열만 선택
aggi_df = aggi_df[['Year', 'CO2', 'CFC', 'HCFCs', 'HFCs']]

# aggi_df
pd.concat([aggi_df.head(), aggi_df.tail()]) # 상위 5개, 하위 5개 행 결합해서 보기

Unnamed: 0,Year,CO2,CFC,HCFCs,HFCs
0,1979,1.025,0.175,0.008,0.001
1,1980,1.058,0.185,0.009,0.001
2,1981,1.076,0.195,0.01,0.001
3,1982,1.088,0.205,0.011,0.001
4,1983,1.114,0.215,0.012,0.001
39,2018,2.046,0.284,0.06,0.039
40,2019,2.079,0.282,0.061,0.041
41,2020,2.11,0.279,0.061,0.044
42,2021,2.14,0.276,0.061,0.046
43,2022,2.17,0.274,0.061,0.049


In [31]:
ozoneHole_df = pd.read_csv('Data/OzoneHole_Data.csv') # 연간 남극 상공의 오존 구멍 면적 및 최소 오존 농도 변화 데이터셋

# 전처리

# 필요한 열만 선택
ozoneHole_df = ozoneHole_df[['Year', 'Hole Area']]

# ozoneHole_df
pd.concat([ozoneHole_df.head(), ozoneHole_df.tail()])

Unnamed: 0,Year,Hole Area
0,1979,0.1
1,1980,1.4
2,1981,0.6
3,1982,4.8
4,1983,7.9
37,2017,17.4
38,2018,22.9
39,2019,9.3
40,2020,23.5
41,2021,23.3


In [32]:
# 병합
merged_df = seaice_df.merge(global_temps_df, on='Year').merge(aggi_df, on='Year').merge(ozoneHole_df, on='Year')
merged_df

Unnamed: 0,Year,hemisphere,Extent,Temp,CO2,CFC,HCFCs,HFCs,Hole Area
0,1979,north,12.319560,0.16,1.025,0.175,0.008,0.001,0.1
1,1979,south,11.719137,0.16,1.025,0.175,0.008,0.001,0.1
2,1980,north,12.334148,0.26,1.058,0.185,0.009,0.001,1.4
3,1980,south,11.236290,0.26,1.058,0.185,0.009,0.001,1.4
4,1981,north,12.135486,0.32,1.076,0.195,0.010,0.001,0.6
...,...,...,...,...,...,...,...,...,...
75,2017,south,10.748926,0.92,2.014,0.286,0.060,0.037,17.4
76,2018,north,10.326964,0.85,2.046,0.284,0.060,0.039,22.9
77,2018,south,10.937660,0.85,2.046,0.284,0.060,0.039,22.9
78,2019,north,13.610510,0.98,2.079,0.282,0.061,0.041,9.3


In [33]:
# 북극에 해당하는 행은 오존 관련 데이터 삭제

merged_df.loc[merged_df['hemisphere'] != 'south', ['Hole Area']] = None
merged_df

Unnamed: 0,Year,hemisphere,Extent,Temp,CO2,CFC,HCFCs,HFCs,Hole Area
0,1979,north,12.319560,0.16,1.025,0.175,0.008,0.001,
1,1979,south,11.719137,0.16,1.025,0.175,0.008,0.001,0.1
2,1980,north,12.334148,0.26,1.058,0.185,0.009,0.001,
3,1980,south,11.236290,0.26,1.058,0.185,0.009,0.001,1.4
4,1981,north,12.135486,0.32,1.076,0.195,0.010,0.001,
...,...,...,...,...,...,...,...,...,...
75,2017,south,10.748926,0.92,2.014,0.286,0.060,0.037,17.4
76,2018,north,10.326964,0.85,2.046,0.284,0.060,0.039,
77,2018,south,10.937660,0.85,2.046,0.284,0.060,0.039,22.9
78,2019,north,13.610510,0.98,2.079,0.282,0.061,0.041,


In [34]:
merged_df.to_csv('Data/merged_df.csv', index=False) # 모든 데이터셋을 병합한 프레임을 파일로 저장