In [2]:
import pandas as pd

In [3]:
# 결손 데이터 처리하기

data = {'name':['kim','lee','choi', 'park'],
        'score' :[100,95,90,85],
        'grade' : ['A','A','B','B']
       }

df = pd.DataFrame(data)
df

Unnamed: 0,name,score,grade
0,kim,100,A
1,lee,95,A
2,choi,90,B
3,park,85,B


In [4]:
import numpy as np

In [5]:
df['point'] = np.nan
df # 숫자를 모를 경우 NaN으로 처리 가능

Unnamed: 0,name,score,grade,point
0,kim,100,A,
1,lee,95,A,
2,choi,90,B,
3,park,85,B,


In [6]:
# NaN값 체크
df.isnull()

Unnamed: 0,name,score,grade,point
0,False,False,False,True
1,False,False,False,True
2,False,False,False,True
3,False,False,False,True


In [7]:
# NaN 값을 0으로 치환
df.fillna(value=0, inplace=True)
df

Unnamed: 0,name,score,grade,point
0,kim,100,A,0.0
1,lee,95,A,0.0
2,choi,90,B,0.0
3,park,85,B,0.0


In [8]:
df['point2'] = [50, 70, 100, np.nan]
df

Unnamed: 0,name,score,grade,point,point2
0,kim,100,A,0.0,50.0
1,lee,95,A,0.0,70.0
2,choi,90,B,0.0,100.0
3,park,85,B,0.0,


In [9]:
# NaN을 다른 값들의 평균치 / 최댓값으로 대체
df.fillna(value=df['point2'].mean())

Unnamed: 0,name,score,grade,point,point2
0,kim,100,A,0.0,50.0
1,lee,95,A,0.0,70.0
2,choi,90,B,0.0,100.0
3,park,85,B,0.0,73.333333


In [23]:
# NaN이 있는 row 제거
df['point2'] = [50, 70, 100, np.nan]
df.dropna

In [24]:
df = df.T
df

Unnamed: 0,0,1,2,3
name,kim,lee,choi,park
score,100,95,90,85
grade,A,A,B,B
point,0.0,0.0,0.0,0.0
point2,50.0,70.0,100.0,


In [18]:
d1 = [{'name':'john', 'job':'teacher'},
        {'name':'Nate', 'job':'student'},
      {'name':'Fred', 'job':'developer'}
     ]
d2 = [{'name':'ED', 'job':'dentist'},
     {'name':'Jack', 'job':'farmer'},
     {'name':'Ted', 'job':'designer'}
     ]

In [19]:
df1 = pd.DataFrame(d1)
df2 = pd.DataFrame(d2, columns=['name', 'job'])

In [20]:
df1

Unnamed: 0,name,job
0,john,teacher
1,Nate,student
2,Fred,developer


In [21]:
df2

Unnamed: 0,name,job
0,ED,dentist
1,Jack,farmer
2,Ted,designer


In [26]:
frames = [df1, df2]
print(frames)

[   name        job
0  john    teacher
1  Nate    student
2  Fred  developer,    name       job
0    ED   dentist
1  Jack    farmer
2   Ted  designer]


In [27]:
# 열 방향으로 합치기
result = pd.concat(frames, ignore_index=True)
result

Unnamed: 0,name,job
0,john,teacher
1,Nate,student
2,Fred,developer
3,ED,dentist
4,Jack,farmer
5,Ted,designer


In [28]:
# 행 방향으로 합치기
result = pd.concat(frames, axis=1, ignore_index=True)
result

Unnamed: 0,0,1,2,3
0,john,teacher,ED,dentist
1,Nate,student,Jack,farmer
2,Fred,developer,Ted,designer


In [29]:
result = df1.append(df2, ignore_index=True) # append도 가능은 하나 concat 추천
result

  result = df1.append(df2, ignore_index=True)


Unnamed: 0,name,job
0,john,teacher
1,Nate,student
2,Fred,developer
3,ED,dentist
4,Jack,farmer
5,Ted,designer


In [30]:
m_df = pd.DataFrame({'고객번호' : [1001,1002,1003,1004,1005],
                     '이름' : ['AAA','BBB','CCC','DDD','EEE']}
                    , columns=['고객번호','이름'])
                   
m_df

Unnamed: 0,고객번호,이름
0,1001,AAA
1,1002,BBB
2,1003,CCC
3,1004,DDD
4,1005,EEE


In [31]:
b_df = pd.DataFrame({'고객번호' : [1001,1001,1003,1004,1006],
                     '금액' : [1000,2000,1500,500,700]}
                    , columns=['고객번호','금액'])
                   
b_df

Unnamed: 0,고객번호,금액
0,1001,1000
1,1001,2000
2,1003,1500
3,1004,500
4,1006,700


In [33]:
inner_result = pd.merge(m_df, b_df, how='inner')
inner_result

Unnamed: 0,고객번호,이름,금액
0,1001,AAA,1000
1,1001,AAA,2000
2,1003,CCC,1500
3,1004,DDD,500


In [35]:
l_result = pd.merge(m_df, b_df, how='left')
l_result

Unnamed: 0,고객번호,이름,금액
0,1001,AAA,1000.0
1,1001,AAA,2000.0
2,1002,BBB,
3,1003,CCC,1500.0
4,1004,DDD,500.0
5,1005,EEE,


In [36]:
r_result = pd.merge(m_df, b_df, how='right')
r_result

Unnamed: 0,고객번호,이름,금액
0,1001,AAA,1000
1,1001,AAA,2000
2,1003,CCC,1500
3,1004,DDD,500
4,1006,,700


In [37]:
outer_result = pd.merge(m_df, b_df, how='outer')
outer_result

Unnamed: 0,고객번호,이름,금액
0,1001,AAA,1000.0
1,1001,AAA,2000.0
2,1002,BBB,
3,1003,CCC,1500.0
4,1004,DDD,500.0
5,1005,EEE,
6,1006,,700.0
