In [1]:
import pandas as pd

df1 = pd.DataFrame({
    "id": [1, 2, 3],
    "customer_id" : [1,2,3],
    "customer_name": ["Robert", "Peter", "Dave"]
}, columns = ["id", "customer_id", "customer_name"])

df1

Unnamed: 0,id,customer_id,customer_name
0,1,1,Robert
1,2,2,Peter
2,3,3,Dave


In [2]:
df2 = pd.DataFrame({
    "id": [1, 2, 4],
    "order_id" : [100,200,300],
    "order_date": ["2025-02-01", "2025-02-02", "2025-02-03"]
}, columns = ["id", "order_id", "order_date"])

df2

Unnamed: 0,id,order_id,order_date
0,1,100,2025-02-01
1,2,200,2025-02-02
2,4,300,2025-02-03


In [None]:
# 서로 다른 데이터 프레임을 하나로 합치는 방법

# 1) concat() : 서로 다른 데이터프레임을 단순 연결 (*교차되는 특정 인덱스를 고려 x) -> 좌우 & 상하 (디폴트)
# 2) merge() : 특정 인덱스를 기준

In [3]:
pd.concat([df1, df2])

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1.0,Robert,,
1,2,2.0,Peter,,
2,3,3.0,Dave,,
0,1,,,100.0,2025-02-01
1,2,,,200.0,2025-02-02
2,4,,,300.0,2025-02-03


In [6]:
doc = pd.concat([df1,df2], axis=1)
doc

Unnamed: 0,id,customer_id,customer_name,id.1,order_id,order_date
0,1,1,Robert,1,100,2025-02-01
1,2,2,Peter,2,200,2025-02-02
2,3,3,Dave,4,300,2025-02-03


In [7]:
pd.merge(df1, df2)
# merge = inner join과 유사

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1,Robert,100,2025-02-01
1,2,2,Peter,200,2025-02-02


In [8]:
pd.merge(df1, df2, on="id", how="inner")

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1,Robert,100,2025-02-01
1,2,2,Peter,200,2025-02-02


In [9]:
pd.merge(df1, df2, on="id", how="outer")

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1.0,Robert,100.0,2025-02-01
1,2,2.0,Peter,200.0,2025-02-02
2,3,3.0,Dave,,
3,4,,,300.0,2025-02-03


In [10]:
pd.merge(df1, df2, on="id", how="right")

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1.0,Robert,100,2025-02-01
1,2,2.0,Peter,200,2025-02-02
2,4,,,300,2025-02-03


In [11]:
pd.merge(df1, df2, on="id", how="left")

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1,Robert,100.0,2025-02-01
1,2,2,Peter,200.0,2025-02-02
2,3,3,Dave,,


In [12]:
df1 = df1.set_index("id")
df1

Unnamed: 0_level_0,customer_id,customer_name
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,Robert
2,2,Peter
3,3,Dave


In [14]:
df2 = df2.set_index("id")
df2

Unnamed: 0_level_0,order_id,order_date
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,100,2025-02-01
2,200,2025-02-02
4,300,2025-02-03


In [15]:
pd.merge(df1, df2, left_index=True, right_index=True)

Unnamed: 0_level_0,customer_id,customer_name,order_id,order_date
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,Robert,100,2025-02-01
2,2,Peter,200,2025-02-02


In [16]:
pd.merge(df1, df2, how="outer", left_index=True, right_index=True)

Unnamed: 0_level_0,customer_id,customer_name,order_id,order_date
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.0,Robert,100.0,2025-02-01
2,2.0,Peter,200.0,2025-02-02
3,3.0,Dave,,
4,,,300.0,2025-02-03


In [17]:
pd.merge(df1, df2, how="right", left_index=True, right_index=True)

Unnamed: 0_level_0,customer_id,customer_name,order_id,order_date
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.0,Robert,100,2025-02-01
2,2.0,Peter,200,2025-02-02
4,,,300,2025-02-03


In [19]:
df = pd.DataFrame({
    "영어": [60, 70],
    "수학": [100, 50]
}, columns = ["영어", "수학"], index=["Dave", "David"])
df

Unnamed: 0,영어,수학
Dave,60,100
David,70,50


In [20]:
def func(df_data) :
    print(type(df_data))
    print(df_data.index)
    print(df_data.values)
    return df_data

In [21]:
df_func = df.apply(func, axis=0)

<class 'pandas.core.series.Series'>
Index(['Dave', 'David'], dtype='object')
[60 70]
<class 'pandas.core.series.Series'>
Index(['Dave', 'David'], dtype='object')
[100  50]


In [22]:
# 한줄한줄 함수를 적용시킴. axis=1로 두면 행 방향으로 적용시킨다.

Unnamed: 0,영어,수학
Dave,60,100
David,70,50


In [23]:
df = pd.DataFrame({
    "성별": ["남","남","남"],
    "이름": ["David","Dave","Dave"],
    "수학": [100, 50, 80],
    "국어": [80, 70, 50]
})

df

Unnamed: 0,성별,이름,수학,국어
0,남,David,100,80
1,남,Dave,50,70
2,남,Dave,80,50


In [25]:
selected_columns = ["이름", "수학", "국어"]

df = df[selected_columns]
/# f.groupby("이름").sum()
df.groupby("이름").mean()

Unnamed: 0_level_0,수학,국어
이름,Unnamed: 1_level_1,Unnamed: 2_level_1
Dave,65.0,60.0
David,100.0,80.0


FileNotFoundError: [WinError 3] 지정된 경로를 찾을 수 없습니다: 'COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/'