## Row, Column Create  & Update

In [1]:
import pandas as pd

In [2]:
friend_dict_list = [{'name':'John', 'age':15, 'job':'student'},
    {'name':'Jenny', 'age':30, 'job':'developer'},
    {'name':'Nate', 'age':30, 'job':'teacher'}]

df = pd.DataFrame(friend_dict_list, columns=['name', 'age', 'job'])
df

Unnamed: 0,name,age,job
0,John,15,student
1,Jenny,30,developer
2,Nate,30,teacher


In [3]:
# create column
df['salary'] = 0
df

Unnamed: 0,name,age,job,salary
0,John,15,student,0
1,Jenny,30,developer,0
2,Nate,30,teacher,0


In [4]:
import numpy as np

In [5]:
df['salary'] = np.where(df['job'] != 'student', 'yes', 'no')
df

Unnamed: 0,name,age,job,salary
0,John,15,student,no
1,Jenny,30,developer,yes
2,Nate,30,teacher,yes


In [6]:
friend_dict_list = [{'name':'John', 'midterm':95, 'final':85},
    {'name':'Jenny', 'midterm':85, 'final':80},
    {'name':'Nate', 'midterm':30, 'final':10}]

df = pd.DataFrame(friend_dict_list, columns=['name', 'midterm', 'final'])
df

Unnamed: 0,name,midterm,final
0,John,95,85
1,Jenny,85,80
2,Nate,30,10


In [8]:
df['total'] = df.midterm + df.final
# df['total'] = df['midterm'] + df['final']
df

Unnamed: 0,name,midterm,final,total
0,John,95,85,180
1,Jenny,85,80,165
2,Nate,30,10,40


In [9]:
df['average'] = df.total / 2
df

Unnamed: 0,name,midterm,final,total,average
0,John,95,85,180,90.0
1,Jenny,85,80,165,82.5
2,Nate,30,10,40,20.0


In [11]:
grade = []

for row in df['average']:
    if row >= 90:
        grade.append('A')
    elif row >= 80:
        grade.append('B')
    else :
        grade.append('F')
    
df['grade'] = grade
df

Unnamed: 0,name,midterm,final,total,average,grade
0,John,95,85,180,90.0,A
1,Jenny,85,80,165,82.5,B
2,Nate,30,10,40,20.0,F


In [12]:
def pass_or_fail(row):
    if row != 'F':
        return 'Pass'
    else :
        return 'Fail'

In [13]:
# .apply -> 인자로 받은 함수에 각 row에 대입
df.grade = df.grade.apply(pass_or_fail)
df

Unnamed: 0,name,midterm,final,total,average,grade
0,John,95,85,180,90.0,Pass
1,Jenny,85,80,165,82.5,Pass
2,Nate,30,10,40,20.0,Fail


In [26]:
data_list = [
    {
     'yyyy_mm_dd' : '2000-03-04'   
    },
    {
     'yyyy_mm_dd' : '2020-01-07'   
    }
]

df = pd.DataFrame(data_list, columns = ['yyyy_mm_dd'])
df

Unnamed: 0,yyyy_mm_dd
0,2000-03-04
1,2020-01-07


In [28]:
def extract_year(row):
    return row.split('-')[0]

In [29]:
#df['year'] == df.year
df['year'] = df.yyyy_mm_dd.apply(extract_year)
df

Unnamed: 0,yyyy_mm_dd,year
0,2000-03-04,2000
1,2020-01-07,2020


In [30]:
friend_dict_list = [{'name':'John', 'midterm':95, 'final':85},
    {'name':'Jenny', 'midterm':85, 'final':80},
    {'name':'Nate', 'midterm':30, 'final':10}]

df = pd.DataFrame(friend_dict_list, columns=['name', 'midterm', 'final'])
df

Unnamed: 0,name,midterm,final
0,John,95,85
1,Jenny,85,80
2,Nate,30,10


In [31]:
df2 = pd.DataFrame([
    ['Ben', 50, 50]
], columns = ['name', 'midterm', 'final'])
df2

Unnamed: 0,name,midterm,final
0,Ben,50,50


In [33]:
# ignore_index : True - DataFrame의 순서에 맞게 index 변경, False - 기존 데이터가 가지고있던 index 그래도 사용
df.append(df2, ignore_index = True)

Unnamed: 0,name,midterm,final
0,John,95,85
1,Jenny,85,80
2,Nate,30,10
0,Ben,50,50


## Group By

In [34]:
student_list = [{'name': 'John', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Nate', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Abraham', 'major': "Physics", 'sex': "male"},
                {'name': 'Brian', 'major': "Psychology", 'sex': "male"},
                {'name': 'Janny', 'major': "Economics", 'sex': "female"},
                {'name': 'Yuna', 'major': "Economics", 'sex': "female"},
                {'name': 'Jeniffer', 'major': "Computer Science", 'sex': "female"},
                {'name': 'Edward', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Zara', 'major': "Psychology", 'sex': "female"},
                {'name': 'Wendy', 'major': "Economics", 'sex': "female"},
                {'name': 'Sera', 'major': "Psychology", 'sex': "female"}
         ]
df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])
df

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [35]:
groupby_major = df.groupby('major')

In [36]:
groupby_major.groups

{'Computer Science': [0, 1, 6, 7], 'Economics': [4, 5, 9], 'Physics': [2], 'Psychology': [3, 8, 10]}

In [37]:
for name, group in groupby_major:
    print(name + ' : ' + str(len(group)))
    print(group)
    print()

Computer Science : 4
       name             major     sex
0      John  Computer Science    male
1      Nate  Computer Science    male
6  Jeniffer  Computer Science  female
7    Edward  Computer Science    male

Economics : 3
    name      major     sex
4  Janny  Economics  female
5   Yuna  Economics  female
9  Wendy  Economics  female

Physics : 1
      name    major   sex
2  Abraham  Physics  male

Psychology : 3
     name       major     sex
3   Brian  Psychology    male
8    Zara  Psychology  female
10   Sera  Psychology  female



In [41]:
df_major_cnt = pd.DataFrame({'count': groupby_major.size()}).reset_index()
df_major_cnt

Unnamed: 0,major,count
0,Computer Science,4
1,Economics,3
2,Physics,1
3,Psychology,3


In [43]:
groupby_sex = df.groupby('sex')

In [45]:
for sex, group in groupby_sex:
    print(sex + ' : ' + str(len(group)))
    print(group)
    print()

female : 6
        name             major     sex
4      Janny         Economics  female
5       Yuna         Economics  female
6   Jeniffer  Computer Science  female
8       Zara        Psychology  female
9      Wendy         Economics  female
10      Sera        Psychology  female

male : 5
      name             major   sex
0     John  Computer Science  male
1     Nate  Computer Science  male
2  Abraham           Physics  male
3    Brian        Psychology  male
7   Edward  Computer Science  male



## Remove duplicate row in DataFrame

In [46]:
student_list = [{'name': 'John', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Nate', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Abraham', 'major': "Physics", 'sex': "male"},
                {'name': 'Brian', 'major': "Psychology", 'sex': "male"},
                {'name': 'Janny', 'major': "Economics", 'sex': "female"},
                {'name': 'Yuna', 'major': "Economics", 'sex': "female"},
                {'name': 'Jeniffer', 'major': "Computer Science", 'sex': "female"},
                {'name': 'Edward', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Zara', 'major': "Psychology", 'sex': "female"},
                {'name': 'Wendy', 'major': "Economics", 'sex': "female"},
                {'name': 'Sera', 'major': "Psychology", 'sex': "female"},
                {'name': 'John', 'major': "Computer Science", 'sex': "male"},
         ]
df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])
df

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [47]:
df.duplicated()

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11     True
dtype: bool

In [48]:
df.drop_duplicates()

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [52]:
student_list = [{'name': 'John', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Nate', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Abraham', 'major': "Physics", 'sex': "male"},
                {'name': 'Brian', 'major': "Psychology", 'sex': "male"},
                {'name': 'Janny', 'major': "Economics", 'sex': "female"},
                {'name': 'Yuna', 'major': "Economics", 'sex': "female"},
                {'name': 'Jeniffer', 'major': "Computer Science", 'sex': "female"},
                {'name': 'Edward', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Zara', 'major': "Psychology", 'sex': "female"},
                {'name': 'Wendy', 'major': "Economics", 'sex': "female"},
                {'name': 'Sera', 'major': "Psychology", 'sex': "female"},
                {'name': 'John', 'major': "Economics", 'sex': "male"},
         ]
df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])
df

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [53]:
#  .duplicated() -> 모든 행의 데이터가 같아야 True, .drop_duplicated()도 동일
df.duplicated()

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
dtype: bool

In [55]:
# 특정 columns의 값에 대한 중복 찾기
df.duplicated(['name'])

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11     True
dtype: bool

In [60]:
# keep 인자를 추가하여 중복 데이터 중 남길 대상을 선택 가능, default = 'first'
df.drop_duplicates(['name'], keep = 'first')

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [61]:
df.drop_duplicates(['name'], keep='last')

Unnamed: 0,name,major,sex
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female
10,Sera,Psychology,female


## Find and Replace NaN Value

In [62]:
student_list = [{'name': 'John', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Nate', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Abraham', 'major': "Physics", 'sex': "male"},
                {'name': 'Brian', 'major': "Psychology", 'sex': "male"},
                {'name': 'Janny', 'major': "Economics", 'sex': "female"},
                {'name': 'Yuna', 'major': "Economics", 'sex': "female"},
                {'name': 'Jeniffer', 'major': "Computer Science", 'sex': "female"},
                {'name': 'Edward', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Zara', 'major': "Psychology", 'sex': "female"},
                {'name': 'Wendy', 'major': "Economics", 'sex': "female"},
                {'name': 'Nate', 'major': None, 'sex': "male"},
                {'name': 'John', 'major': "Computer Science", 'sex': None},
         ]
df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])
df

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [65]:
df.shape

(12, 3)

In [66]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    12 non-null     object
 1   major   11 non-null     object
 2   sex     11 non-null     object
dtypes: object(3)
memory usage: 416.0+ bytes


In [67]:
df.isna() # == df.isnull()

Unnamed: 0,name,major,sex
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
5,False,False,False
6,False,False,False
7,False,False,False
8,False,False,False
9,False,False,False


In [68]:
df.isnull()

Unnamed: 0,name,major,sex
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
5,False,False,False
6,False,False,False
7,False,False,False
8,False,False,False
9,False,False,False


In [74]:
school_id_list = [{'name': 'John', 'job': "teacher", 'age': 40},
                {'name': 'Nate', 'job': "teacher", 'age': 35},
                {'name': 'Yuna', 'job': "teacher", 'age': 37},
                {'name': 'Abraham', 'job': "student", 'age': 10},
                {'name': 'Brian', 'job': "student", 'age': 12},
                {'name': 'Janny', 'job': "student", 'age': 11},
                {'name': 'Nate', 'job': "teacher", 'age': None},
                {'name': 'John', 'job': "student", 'age': None}
         ]
df = pd.DataFrame(school_id_list, columns = ['name', 'job', 'age'])
df

Unnamed: 0,name,job,age
0,John,teacher,40.0
1,Nate,teacher,35.0
2,Yuna,teacher,37.0
3,Abraham,student,10.0
4,Brian,student,12.0
5,Janny,student,11.0
6,Nate,teacher,
7,John,student,


In [76]:
df.age = df.age.fillna(0)
df

Unnamed: 0,name,job,age
0,John,teacher,40.0
1,Nate,teacher,35.0
2,Yuna,teacher,37.0
3,Abraham,student,10.0
4,Brian,student,12.0
5,Janny,student,11.0
6,Nate,teacher,0.0
7,John,student,0.0


In [77]:
school_id_list = [{'name': 'John', 'job': "teacher", 'age': 40},
                {'name': 'Nate', 'job': "teacher", 'age': 35},
                {'name': 'Yuna', 'job': "teacher", 'age': 37},
                {'name': 'Abraham', 'job': "student", 'age': 10},
                {'name': 'Brian', 'job': "student", 'age': 12},
                {'name': 'Janny', 'job': "student", 'age': 11},
                {'name': 'Nate', 'job': "teacher", 'age': None},
                {'name': 'John', 'job': "student", 'age': None}
         ]
df = pd.DataFrame(school_id_list, columns = ['name', 'job', 'age'])
df

Unnamed: 0,name,job,age
0,John,teacher,40.0
1,Nate,teacher,35.0
2,Yuna,teacher,37.0
3,Abraham,student,10.0
4,Brian,student,12.0
5,Janny,student,11.0
6,Nate,teacher,
7,John,student,


In [78]:
df['age'].fillna(df.groupby('job')['age'].transform('median'), inplace=True)
df

Unnamed: 0,name,job,age
0,John,teacher,40.0
1,Nate,teacher,35.0
2,Yuna,teacher,37.0
3,Abraham,student,10.0
4,Brian,student,12.0
5,Janny,student,11.0
6,Nate,teacher,37.0
7,John,student,11.0


## Apply

In [79]:
date_list = [{'yyyy-mm-dd':'2000-06-27'},
            {'yyyy-mm-dd':'2002-09-24'},
            {'yyyy-mm-dd':'2005-12-20'},]
df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])
df

Unnamed: 0,yyyy-mm-dd
0,2000-06-27
1,2002-09-24
2,2005-12-20


In [82]:
def extract_year(column):
    return column.split('-')[0]

In [83]:
df['year'] = df['yyyy-mm-dd'].apply(extract_year)
df

Unnamed: 0,yyyy-mm-dd,year
0,2000-06-27,2000
1,2002-09-24,2002
2,2005-12-20,2005


In [85]:
# apply 사용하면서 함수에 인자 할당해주기
def calc_age(year, current_year):
    return current_year - int(year)

In [87]:
df['age'] = df['year'].apply(calc_age, current_year=2021)
df

Unnamed: 0,yyyy-mm-dd,year,age
0,2000-06-27,2000,21
1,2002-09-24,2002,19
2,2005-12-20,2005,16


In [88]:
def get_introduce(age, prefix, suffix):
    return prefix + str(age) + suffix

In [89]:
df['introduce'] = df['age'].apply(get_introduce, prefix = 'I\'m ', suffix = ' years old.')
df

Unnamed: 0,yyyy-mm-dd,year,age,introduce
0,2000-06-27,2000,21,I'm 21 years old.
1,2002-09-24,2002,19,I'm 19 years old.
2,2005-12-20,2005,16,I'm 16 years old.


In [90]:
#여러가지 column을 apply 하기
def get_introduce2(row):
    return "I was born in " + str(row.year) + row.introduce

In [91]:
# axis = 1 을 활용하여 적절한 row의 값을 넣어준다.
df['introduce2'] = df.apply(get_introduce2, axis = 1)
df

Unnamed: 0,yyyy-mm-dd,year,age,introduce,introduce2
0,2000-06-27,2000,21,I'm 21 years old.,I was born in 2000I'm 21 years old.
1,2002-09-24,2002,19,I'm 19 years old.,I was born in 2002I'm 19 years old.
2,2005-12-20,2005,16,I'm 16 years old.,I was born in 2005I'm 16 years old.


In [95]:
date_list = [{'date':'2000-06-27'},
            {'date':'2002-09-24'},
            {'date':'2005-12-20'},]
df = pd.DataFrame(date_list, columns = ['date'])
df

Unnamed: 0,date
0,2000-06-27
1,2002-09-24
2,2005-12-20


In [97]:
def get_year(row):
    return row.split('-')[0]

In [99]:
# 연도만 추출, apply 와 사용방법, 결과가 같음
df['year'] = df['date'].map(get_year)
df

Unnamed: 0,date,year
0,2000-06-27,2000
1,2002-09-24,2002
2,2005-12-20,2005


In [100]:
job_list = [{'age':20, 'job':'student'},
           {'age':30, 'job':'developer'},
           {'age':30, 'job':'teacher'}]
df = pd.DataFrame(job_list)
df

Unnamed: 0,age,job
0,20,student
1,30,developer
2,30,teacher


In [102]:
# map 함수의 인자로 dict를 넘겨주어 특정 값으로 바꾸어 줄 수 있다.
df.job = df.job.map({"student":1, 'developer':2, 'teacher': 3})
df

Unnamed: 0,age,job
0,20,
1,30,
2,30,


### applymap()

In [103]:

x_y = [{'x': 5.5, 'y':-5.6, 'z':-1.1},
      {'x': -5.2, 'y':5.5, 'z':-2.2},
      {'x': -1.6, 'y':-4.5, 'z':-3.3},]
df = pd.DataFrame(x_y)
df

Unnamed: 0,x,y,z
0,5.5,-5.6,-1.1
1,-5.2,5.5,-2.2
2,-1.6,-4.5,-3.3


In [104]:
import numpy as np

In [105]:
# 모든 값에 대해 반올림(np.around)
df = df.applymap(np.around)
df

Unnamed: 0,x,y,z
0,6.0,-6.0,-1.0
1,-5.0,6.0,-2.0
2,-2.0,-4.0,-3.0


In [108]:
job_list = [{'name': 'John', 'job': "teacher"},
                {'name': 'Nate', 'job': "teacher"},
                {'name': 'Fred', 'job': "teacher"},
                {'name': 'Abraham', 'job': "student"},
                {'name': 'Brian', 'job': "student"},
                {'name': 'Janny', 'job': "developer"},
                {'name': 'Nate', 'job': "teacher"},
                {'name': 'Obrian', 'job': "dentist"},
                {'name': 'Yuna', 'job': "teacher"},
                {'name': 'Rob', 'job': "lawyer"},
                {'name': 'Brian', 'job': "student"},
                {'name': 'Matt', 'job': "student"},
                {'name': 'Wendy', 'job': "banker"},
                {'name': 'Edward', 'job': "teacher"},
                {'name': 'Ian', 'job': "teacher"},
                {'name': 'Chris', 'job': "banker"},
                {'name': 'Philip', 'job': "lawyer"},
                {'name': 'Janny', 'job': "basketball player"},
                {'name': 'Gwen', 'job': "teacher"},
                {'name': 'Jessy', 'job': "student"}
         ]
df = pd.DataFrame(job_list, columns = ['name', 'job'])
df

Unnamed: 0,name,job
0,John,teacher
1,Nate,teacher
2,Fred,teacher
3,Abraham,student
4,Brian,student
5,Janny,developer
6,Nate,teacher
7,Obrian,dentist
8,Yuna,teacher
9,Rob,lawyer


In [109]:
# 특정 column의 데이터 종류를 출력 (job column에는 아래의 리스트에 있는 값들 중에 하나가 들어가 있다.)
df.job.unique()

array(['teacher', 'student', 'developer', 'dentist', 'lawyer', 'banker',
       'basketball player'], dtype=object)

In [111]:
# 각 종류에 대한 데이터의 개수 출력하기
df.job.value_counts()

teacher              8
student              5
banker               2
lawyer               2
developer            1
basketball player    1
dentist              1
Name: job, dtype: int64

## Concat two DataFrame

### concat with column

In [112]:
l1 = [{'name': 'John', 'job': "teacher"},
      {'name': 'Nate', 'job': "student"},
      {'name': 'Fred', 'job': "developer"}]

l2 = [{'name': 'Ed', 'job': "dentist"},
      {'name': 'Jack', 'job': "farmer"},
      {'name': 'Ted', 'job': "designer"}]
         
df1 = pd.DataFrame(l1, columns = ['name', 'job'])
df2 = pd.DataFrame(l2, columns = ['name', 'job'])

In [113]:
df1

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer


In [114]:
df2

Unnamed: 0,name,job
0,Ed,dentist
1,Jack,farmer
2,Ted,designer


In [116]:
# concat 했을때, 기존 데이터가 가지고 있는 index를 그대로 사용한다.
result = pd.concat([df1, df2])
result

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
0,Ed,dentist
1,Jack,farmer
2,Ted,designer


In [118]:
# ignore_index를 추가하여 자연스럽게 인덱싱 될 수 있도록 한다.
result = pd.concat([df1, df2], ignore_index = True)
result

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
3,Ed,dentist
4,Jack,farmer
5,Ted,designer


In [120]:
# concat 과 append는 같은 결과를 출력, 동일하게 ignore_index를 사용하여 인덱싱할 수 있디
result = df1.append(df2)
result

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
0,Ed,dentist
1,Jack,farmer
2,Ted,designer


In [121]:
result = df1.append(df2, ignore_index = True)
result

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
3,Ed,dentist
4,Jack,farmer
5,Ted,designer


### concat with row

In [124]:
l1 = [{'name': 'John', 'job': "teacher"},
      {'name': 'Nate', 'job': "student"},
      {'name': 'Jack', 'job': "developer"}]

l2 = [{'age': 25, 'country': "U.S"},
      {'age': 30, 'country': "U.K"},
      {'age': 45, 'country': "Korea"}]
         
df1 = pd.DataFrame(l1, columns = ['name', 'job'])
df2 = pd.DataFrame(l2, columns = ['age', 'country'])

In [125]:
df1

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Jack,developer


In [126]:
df2

Unnamed: 0,age,country
0,25,U.S
1,30,U.K
2,45,Korea


In [131]:
#axis 인자를 활용해 row, column으로 적절하게 concat 할 수 있다. axis = 1 : row, axis = 0 : column
result = pd.concat([df1, df2], axis=1, ignore_index=True)
result

Unnamed: 0,0,1,2,3
0,John,teacher,25,U.S
1,Nate,student,30,U.K
2,Jack,developer,45,Korea


### Bonus : 두 개의 리스트를 DataFrame을 사용하여 합치기

In [132]:
label = [1,2,3,4,5]
prediction = [1,2,2,4,4]

In [133]:
comparison = pd.DataFrame({'label' : label, 'prediction' : prediction})
comparison

Unnamed: 0,label,prediction
0,1,1
1,2,2
2,3,2
3,4,4
4,5,4
