In [1]:
import pandas as pd

# apply 함수 적용

In [10]:
date_list = [
    {
        'yyyy-mm-dd' : '1991-09-26'
    },
    {
        'yyyy-mm-dd' : '1992-09-26'
    },
    {
        'yyyy-mm-dd' : '2000-11-26'
    }
    
]

df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])
df

Unnamed: 0,yyyy-mm-dd
0,1991-09-26
1,1992-09-26
2,2000-11-26


In [11]:
def extract_year(column):
    return column.split("-")[0]

In [12]:
df['year'] = df['yyyy-mm-dd'].apply(extract_year)

In [13]:
df

Unnamed: 0,yyyy-mm-dd,year
0,1991-09-26,1991
1,1992-09-26,1992
2,2000-11-26,2000


In [17]:
def get_korea_age(year, now):
    return now - int(year) + 1

In [18]:
df['age'] = df['year'].apply(get_korea_age, now = 2018)
df

Unnamed: 0,yyyy-mm-dd,year,age
0,1991-09-26,1991,28
1,1992-09-26,1992,27
2,2000-11-26,2000,19


In [19]:
def get_introduce(age, words):
    return words + str(age)

In [29]:
df['howold'] = df['age'].apply(get_introduce, words='My age is ')
df

Unnamed: 0,yyyy-mm-dd,year,age,howold
0,1991-09-26,1991,28,My age is 28
1,1992-09-26,1992,27,My age is 27
2,2000-11-26,2000,19,My age is 19


In [33]:
def intro_myself(row):
    return 'i was born' + str(row.year)

In [37]:
df.howold = df.apply(intro_myself, axis=1)

In [38]:
df

Unnamed: 0,yyyy-mm-dd,year,age,howold
0,1991-09-26,1991,28,i was born1991
1,1992-09-26,1992,27,i was born1992
2,2000-11-26,2000,19,i was born2000


# map 함수 적용

In [46]:
date_list = [
    {
        'date' : '1991-09-26'
    },
    {
        'date' : '1992-09-26'
    },
    {
        'date' : '2000-11-26'
    }
    
]

df = pd.DataFrame(date_list, columns = ['date'])
df

Unnamed: 0,date
0,1991-09-26
1,1992-09-26
2,2000-11-26


In [51]:
def extract_year(column):
    return column.split("-")[0]

In [52]:
df['year'] = df['date'].map(extract_year)
df

Unnamed: 0,date,year
0,1991-09-26,1991
1,1992-09-26,1992
2,2000-11-26,2000


map을 적용하여 원하는 값으로 변경

In [53]:
df.year = df.year.map({'1991':1, '1992':2, '2000':0})
df

Unnamed: 0,date,year
0,1991-09-26,1
1,1992-09-26,2
2,2000-11-26,0


# applymap함수 적용

In [56]:
import numpy as np

In [61]:
date_list = [
    {
        'score' : 79.1
    },
    {
        'score' : 85.7
    },
    {
        'score' : 97.3
    }
    
]

df = pd.DataFrame(date_list, columns = ['score'])
df

Unnamed: 0,score
0,79.1
1,85.7
2,97.3


소수점 반올림해버리는 np.around를 모든 column에 적용, 일부 적용은 map

In [63]:
df = df.applymap(np.around)
df

Unnamed: 0,score
0,79.0
1,86.0
2,97.0


# unique 값 및 개수 확인

In [64]:
import pandas as pd

In [65]:
data_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv' #Data URL을 입력

In [66]:
data_frame = pd.read_csv(data_url, sep=';') 
#csv 타입 데이터 로드, ';'로 separate하겠다.
# 만약 ';'이 아닌 tab으로 되어 있다면 sep = '\t'혹은 delimiter = '\t'로 처리


data_frame.head(2)#숫자 설정가능, default는 5

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5


In [68]:
data_frame.quality.unique() # 파이썬의 set같은 느낌

array([5, 6, 7, 4, 8, 3], dtype=int64)

In [71]:
data_frame.quality.value_counts() # 개수 세기

5    681
6    638
7    199
4     53
8     18
3     10
Name: quality, dtype: int64

# 프레임 합치기

In [74]:
list1 = [
        {'name' : 'wan', 'job' : 'master'},
        {'name' : 'beom', 'job' : 'student'},
        {'name' : 'seok', 'job' : 'student'}
]
list2 = [
        {'name' : 'tae', 'job' : 'student'},
        {'name' : 'gyu', 'job' : 'developer'},
        {'name' : 'ahn', 'job' : 'student'}
]
df1 = pd.DataFrame(list1, columns = ['name','job'])
df1

Unnamed: 0,name,job
0,wan,master
1,beom,student
2,seok,student


In [75]:
df2 = pd.DataFrame(list2, columns = ['name','job'])
df2

Unnamed: 0,name,job
0,tae,student
1,gyu,developer
2,ahn,student


In [78]:
result = pd.concat([df1, df2])

In [79]:
result

Unnamed: 0,name,job
0,wan,master
1,beom,student
2,seok,student
0,tae,student
1,gyu,developer
2,ahn,student


index 적용

In [80]:
result = pd.concat([df1, df2], ignore_index = True)

In [81]:
result

Unnamed: 0,name,job
0,wan,master
1,beom,student
2,seok,student
3,tae,student
4,gyu,developer
5,ahn,student


In [84]:
result = df1.append(df2, ignore_index = True)
result

Unnamed: 0,name,job
0,wan,master
1,beom,student
2,seok,student
3,tae,student
4,gyu,developer
5,ahn,student


In [87]:
result2 = pd.concat([df1, df2],axis=1, ignore_index = True)
result2

Unnamed: 0,0,1,2,3
0,wan,master,tae,student
1,beom,student,gyu,developer
2,seok,student,ahn,student
