# 영화 평점 분석 실습

In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

## 1. 영화 평점 데이터 적재 및 전처리

In [2]:
# 사용자 데이터 읽어오기
users = pd.read_csv('data/movielens/users.dat', sep = '::', engine = 'python',
                   names = ['사용자아이디', '성별','연령','직업','지역'])
users.head()

Unnamed: 0,사용자아이디,성별,연령,직업,지역
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [3]:
# 평점 데이터 읽어오기
ratings = pd.read_csv('data/movielens/ratings.dat', sep = '::', engine = 'python',
                   names = ['사용자아이디', '영화아이디','평점','타임스탬프'])
ratings.head()

Unnamed: 0,사용자아이디,영화아이디,평점,타임스탬프
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [4]:
# 영화데이터 읽어오기
movies = pd.read_csv('data/movielens/movies.dat', sep = '::', engine = 'python',
                   names = ['영화아이디','영화제목','장르'], encoding = 'latin-1')
movies.head()

Unnamed: 0,영화아이디,영화제목,장르
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [56]:
#3개의 데이터프레임을 하나로 합치기
data=pd.merge(users,ratings)
data=pd.merge(data,movies)

In [7]:
data.head()

Unnamed: 0,사용자아이디,성별,연령,직업,지역,영화아이디,평점,타임스탬프,영화제목,장르
0,1,F,1,10,48067,1193,5,978300760,One Flew Over the Cuckoo's Nest (1975),Drama
1,2,M,56,16,70072,1193,5,978298413,One Flew Over the Cuckoo's Nest (1975),Drama
2,12,M,25,12,32793,1193,4,978220179,One Flew Over the Cuckoo's Nest (1975),Drama
3,15,M,25,7,22903,1193,4,978199279,One Flew Over the Cuckoo's Nest (1975),Drama
4,17,M,50,1,95350,1193,5,978158471,One Flew Over the Cuckoo's Nest (1975),Drama


## 2. 보고 싶은 영화 찾기
영화들의 평점 평균을 구하여, 사람들에게 인정받는 (평점이 높은) 영화 찾기

In [17]:
# 영화들의 평점 평균을 구하여, 평점이 높은 영화 찾기 
영화별평점=data.pivot_table(index='영화제목',aggfunc='mean',values='평점')

In [18]:
영화별평점.nlargest(10,'평점')

Unnamed: 0_level_0,평점
영화제목,Unnamed: 1_level_1
"Baby, The (1973)",5.0
Bittersweet Motel (2000),5.0
Follow the Bitch (1998),5.0
"Gate of Heavenly Peace, The (1995)",5.0
Lured (1947),5.0
One Little Indian (1973),5.0
Schlafes Bruder (Brother of Sleep) (1995),5.0
Smashing Time (1967),5.0
Song of Freedom (1936),5.0
Ulysses (Ulisse) (1954),5.0


In [14]:
data.pivot_table(index=['영화아이디','영화제목'],aggfunc='mean',values='평점').sort_values(by='평점',ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,평점
영화아이디,영화제목,Unnamed: 2_level_1
989,Schlafes Bruder (Brother of Sleep) (1995),5.0
3881,Bittersweet Motel (2000),5.0
1830,Follow the Bitch (1998),5.0
3382,Song of Freedom (1936),5.0
787,"Gate of Heavenly Peace, The (1995)",5.0
...,...,...
826,Diebinnen (1995),1.0
3228,Wirey Spindell (1999),1.0
2845,White Boys (1999),1.0
3209,"Loves of Carmen, The (1948)",1.0


평균 평점이 만점인 영화들이 최상위에 위치함. 
일반적으로 평점이 만점인 경우는 대부분 평점의 개수가 매우 적은 경우이므로, 이를 확인하기 위해 평점의 개수도 함께 구해본다. 

In [21]:
영화별평점=data.pivot_table(index='영화제목',aggfunc=['mean','count'],values=['평점'])
영화별평점.head()

Unnamed: 0_level_0,mean,count
Unnamed: 0_level_1,평점,평점
영화제목,Unnamed: 1_level_2,Unnamed: 2_level_2
"$1,000,000 Duck (1971)",3.027027,37
'Night Mother (1986),3.371429,70
'Til There Was You (1997),2.692308,52
"'burbs, The (1989)",2.910891,303
...And Justice for All (1979),3.713568,199


In [23]:
영화별평점.nlargest(10,('mean', '평점'))

Unnamed: 0_level_0,mean,count
Unnamed: 0_level_1,평점,평점
영화제목,Unnamed: 1_level_2,Unnamed: 2_level_2
"Baby, The (1973)",5.0,1
Bittersweet Motel (2000),5.0,1
Follow the Bitch (1998),5.0,1
"Gate of Heavenly Peace, The (1995)",5.0,3
Lured (1947),5.0,1
One Little Indian (1973),5.0,1
Schlafes Bruder (Brother of Sleep) (1995),5.0,1
Smashing Time (1967),5.0,2
Song of Freedom (1936),5.0,1
Ulysses (Ulisse) (1954),5.0,1


In [25]:
영화별평점.columns = ['평균','개수']
영화별평점.head()

Unnamed: 0_level_0,평균,개수
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1
"$1,000,000 Duck (1971)",3.027027,37
'Night Mother (1986),3.371429,70
'Til There Was You (1997),2.692308,52
"'burbs, The (1989)",2.910891,303
...And Justice for All (1979),3.713568,199


In [26]:
# 평점평균이 4.5이상이고, 평점개수가 1000개 이상인 영화 찾기
영화별평점[(영화별평점.평균>=4.5)&(영화별평점.개수>=1000)]

Unnamed: 0_level_0,평균,개수
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1
"Godfather, The (1972)",4.524966,2223
Schindler's List (1993),4.510417,2304
"Shawshank Redemption, The (1994)",4.554558,2227
"Usual Suspects, The (1995)",4.517106,1783


## [실습 #1] 여자들이 좋아하는 영화 찾기 
### - 여성 평점이 4.0 이상이고 여성 평점의 개수가 500개 이상인 영화

In [31]:
# 성별이 여성인 데이터만 가져오기
female=data[data['성별']=='F']
female.head()

Unnamed: 0,사용자아이디,성별,연령,직업,지역,영화아이디,평점,타임스탬프,영화제목,장르
0,1,F,1,10,48067,1193,5,978300760,One Flew Over the Cuckoo's Nest (1975),Drama
5,18,F,18,3,95825,1193,4,978156168,One Flew Over the Cuckoo's Nest (1975),Drama
7,24,F,25,7,10023,1193,5,978136709,One Flew Over the Cuckoo's Nest (1975),Drama
8,28,F,25,1,14607,1193,3,978125194,One Flew Over the Cuckoo's Nest (1975),Drama
19,59,F,50,1,55413,1193,4,977934292,One Flew Over the Cuckoo's Nest (1975),Drama


In [77]:
# 피벗 테이블 만들기
여성영화=female.pivot_table(index='영화제목',aggfunc=['mean','count'],values=['평점'])
여성영화.head()

Unnamed: 0_level_0,mean,count
Unnamed: 0_level_1,평점,평점
영화제목,Unnamed: 1_level_2,Unnamed: 2_level_2
"$1,000,000 Duck (1971)",3.375,16
'Night Mother (1986),3.388889,36
'Til There Was You (1997),2.675676,37
"'burbs, The (1989)",2.793478,92
...And Justice for All (1979),3.828571,35


In [78]:
여성영화.columns = ['평균','개수']
여성영화.head()

Unnamed: 0_level_0,평균,개수
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1
"$1,000,000 Duck (1971)",3.375,16
'Night Mother (1986),3.388889,36
'Til There Was You (1997),2.675676,37
"'burbs, The (1989)",2.793478,92
...And Justice for All (1979),3.828571,35


In [79]:
a=여성영화[(여성영화['평균']>=4)&(여성영화['개수']>=500)].nlargest(10,'평균')
a.head()

Unnamed: 0_level_0,평균,개수
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1
Schindler's List (1993),4.562602,615
"Shawshank Redemption, The (1994)",4.539075,627
"Sixth Sense, The (1999)",4.47741,664
"Wizard of Oz, The (1939)",4.35503,507
"Princess Bride, The (1987)",4.342767,636


In [57]:
# 1. 여성들이 매긴 평점 데이터만 선택
data[data.성별=='F'].pivot_table(index='영화제목',aggfunc=['mean','count'],values='평점')

Unnamed: 0_level_0,mean,count
Unnamed: 0_level_1,평점,평점
영화제목,Unnamed: 1_level_2,Unnamed: 2_level_2
"$1,000,000 Duck (1971)",3.375000,16
'Night Mother (1986),3.388889,36
'Til There Was You (1997),2.675676,37
"'burbs, The (1989)",2.793478,92
...And Justice for All (1979),3.828571,35
...,...,...
Your Friends and Neighbors (1998),2.888889,27
"Zed & Two Noughts, A (1985)",3.500000,8
Zero Effect (1998),3.864407,59
Zeus and Roxanne (1997),2.777778,9


In [58]:
# 2. 영화별 성별 평점
ex1=data.pivot_table(index='영화제목',columns='성별',aggfunc=['mean','count'],values='평점')

In [65]:
ex1[[(ex1[('mean','F')>=4.0) & (ex1[('count','F')]>=500)]]

SyntaxError: closing parenthesis ')' does not match opening parenthesis '[' (<ipython-input-65-bf4374c025f5>, line 1)

## [실습 #2] 실습 #1에서 구한 영화(여성인기영화)의 장르를 분석해 보자.
여성인기영화의 장르 통계 구하기

예를 들어, 여성인기영화 중 Drama 장르의 영화는 10개, Action 영화는 3개, ...

In [55]:
female.pivot_table(index='장르',aggfunc='count',values=['평점'])

Unnamed: 0_level_0,평점
장르,Unnamed: 1_level_1
Action,1611
Action|Adventure,1978
Action|Adventure|Animation,64
Action|Adventure|Animation|Children's|Fantasy,41
Action|Adventure|Animation|Horror|Sci-Fi,71
...,...
Sci-Fi|Thriller|War,40
Sci-Fi|War,231
Thriller,4312
War,101


In [69]:
# 여성인기영화들의 장르 정보 추출
여성영화.index

Index(['$1,000,000 Duck (1971)', ''Night Mother (1986)',
       ''Til There Was You (1997)', ''burbs, The (1989)',
       '...And Justice for All (1979)', '1-900 (1994)',
       '10 Things I Hate About You (1999)', '101 Dalmatians (1961)',
       '101 Dalmatians (1996)', '12 Angry Men (1957)',
       ...
       'Young Guns (1988)', 'Young Guns II (1990)',
       'Young Poisoner's Handbook, The (1995)', 'Young Sherlock Holmes (1985)',
       'Young and Innocent (1937)', 'Your Friends and Neighbors (1998)',
       'Zed & Two Noughts, A (1985)', 'Zero Effect (1998)',
       'Zeus and Roxanne (1997)', 'eXistenZ (1999)'],
      dtype='object', name='영화제목', length=3481)

In [70]:
movies.영화제목.isin(여성영화.index)

0       True
1       True
2       True
3       True
4       True
        ... 
3878    True
3879    True
3880    True
3881    True
3882    True
Name: 영화제목, Length: 3883, dtype: bool

In [72]:
# 첫번째  : isin()
movies[movies.영화제목.isin(여성영화.index)].장르

0        Animation|Children's|Comedy
1       Adventure|Children's|Fantasy
2                     Comedy|Romance
3                       Comedy|Drama
4                             Comedy
                    ...             
3878                          Comedy
3879                           Drama
3880                           Drama
3881                           Drama
3882                  Drama|Thriller
Name: 장르, Length: 3481, dtype: object

In [None]:
# 두번째 : 여성인기영화와 movies 합치기

In [73]:
여성영화.head(2)

Unnamed: 0_level_0,count
Unnamed: 0_level_1,장르
영화제목,Unnamed: 1_level_2
"$1,000,000 Duck (1971)",16
'Night Mother (1986),36


In [74]:
movies

Unnamed: 0,영화아이디,영화제목,장르
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama


In [80]:
pd.merge(a,movies,left_index=True,right_on='영화제목')

Unnamed: 0,평균,개수,영화아이디,영화제목,장르
523,4.562602,615,527,Schindler's List (1993),Drama|War
315,4.539075,627,318,"Shawshank Redemption, The (1994)",Drama
2693,4.47741,664,2762,"Sixth Sense, The (1999)",Thriller
907,4.35503,507,919,"Wizard of Oz, The (1939)",Adventure|Children's|Drama|Musical
1179,4.342767,636,1197,"Princess Bride, The (1987)",Action|Adventure|Comedy|Romance
1180,4.332168,572,1198,Raiders of the Lost Ark (1981),Action|Adventure
257,4.302937,647,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi
900,4.30099,505,912,Casablanca (1942),Drama|Romance|War
589,4.271955,706,593,"Silence of the Lambs, The (1991)",Drama|Thriller
2789,4.238901,946,2858,American Beauty (1999),Comedy|Drama


In [81]:
pd.concat([a,movies.set_index('영화제목')],axis=1)

Unnamed: 0_level_0,평균,개수,영화아이디,장르
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Schindler's List (1993),4.562602,615.0,527,Drama|War
"Shawshank Redemption, The (1994)",4.539075,627.0,318,Drama
"Sixth Sense, The (1999)",4.477410,664.0,2762,Thriller
"Wizard of Oz, The (1939)",4.355030,507.0,919,Adventure|Children's|Drama|Musical
"Princess Bride, The (1987)",4.342767,636.0,1197,Action|Adventure|Comedy|Romance
...,...,...,...,...
Meet the Parents (2000),,,3948,Comedy
Requiem for a Dream (2000),,,3949,Drama
Tigerland (2000),,,3950,Drama
Two Family House (2000),,,3951,Drama


In [84]:
import pandas as pd

In [87]:
# 통합 두번째 방법
pd.concat([a,movies.set_index('영화제목')],axis=1,join='inner').장르

영화제목
Schindler's List (1993)                                               Drama|War
Shawshank Redemption, The (1994)                                          Drama
Sixth Sense, The (1999)                                                Thriller
Wizard of Oz, The (1939)                     Adventure|Children's|Drama|Musical
Princess Bride, The (1987)                      Action|Adventure|Comedy|Romance
Raiders of the Lost Ark (1981)                                 Action|Adventure
Star Wars: Episode IV - A New Hope (1977)       Action|Adventure|Fantasy|Sci-Fi
Casablanca (1942)                                             Drama|Romance|War
Silence of the Lambs, The (1991)                                 Drama|Thriller
American Beauty (1999)                                             Comedy|Drama
Name: 장르, dtype: object

In [92]:
# 세번째
ex2=data.pivot_table(index=['영화제목','장르'],columns='성별',aggfunc=['mean','count'],values='평점')
여성인기장르=pd.Series(ex2[(ex2[('mean','F')]>=4.0) & (ex2[('count','F')]>=500)].index.get_level_values(1))

In [93]:
여성인기장르

0                           Comedy|Drama
1                                 Comedy
2                       Action|Drama|War
3                      Drama|Romance|War
4        Children's|Drama|Fantasy|Sci-Fi
5                   Crime|Drama|Thriller
6                     Comedy|Romance|War
7       Crime|Film-Noir|Mystery|Thriller
8                 Action|Sci-Fi|Thriller
9        Action|Adventure|Comedy|Romance
10                           Crime|Drama
11                      Action|Adventure
12                      Action|Drama|War
13                             Drama|War
14                        Comedy|Romance
15                                 Drama
16                        Drama|Thriller
17                              Thriller
18       Action|Adventure|Fantasy|Sci-Fi
19     Action|Adventure|Drama|Sci-Fi|War
20           Animation|Children's|Comedy
21    Adventure|Children's|Drama|Musical
Name: 장르, dtype: object

In [97]:
aa=여성인기장르.str.split('|',expand=True)

In [98]:
aa[0].value_counts()

Action        7
Comedy        4
Drama         4
Crime         3
Children's    1
Thriller      1
Animation     1
Adventure     1
Name: 0, dtype: int64

In [99]:
aa[1].value_counts()

Drama         6
Adventure     4
Romance       3
Children's    2
Film-Noir     1
Sci-Fi        1
War           1
Thriller      1
Name: 1, dtype: int64

In [102]:
aa[0].value_counts().add(aa[1].value_counts(),fill_value=0)\
.add(aa[2].value_counts(),fill_value=0)\
.add(aa[3].value_counts(),fill_value=0)\
.add(aa[4].value_counts(),fill_value=0)

Action         7.0
Adventure      5.0
Animation      1.0
Children's     3.0
Comedy         6.0
Crime          3.0
Drama         12.0
Fantasy        2.0
Film-Noir      1.0
Musical        1.0
Mystery        1.0
Romance        4.0
Sci-Fi         4.0
Thriller       5.0
War            6.0
dtype: float64

In [104]:
aa.columns

RangeIndex(start=0, stop=5, step=1)

In [107]:
장르통계=Series(dtype='float')
for col in aa.columns:
    장르통계=장르통계.add(aa[col].value_counts(),fill_value=0)
장르통계

Action         7.0
Adventure      5.0
Animation      1.0
Children's     3.0
Comedy         6.0
Crime          3.0
Drama         12.0
Fantasy        2.0
Film-Noir      1.0
Musical        1.0
Mystery        1.0
Romance        4.0
Sci-Fi         4.0
Thriller       5.0
War            6.0
dtype: float64

In [108]:
장르통계.sort_values(ascending=False)

Drama         12.0
Action         7.0
Comedy         6.0
War            6.0
Adventure      5.0
Thriller       5.0
Romance        4.0
Sci-Fi         4.0
Children's     3.0
Crime          3.0
Fantasy        2.0
Animation      1.0
Film-Noir      1.0
Musical        1.0
Mystery        1.0
dtype: float64

In [111]:
female_popular = 여성영화.merge(movies , left_index = True , right_on = '영화제목')['장르']
fe_pop = female_popular.str.split('|') #expand =True하면 컬럼단위로 준다. 데이터프레임으로 반환한다.
result = []
for i in fe_pop.values :
    result.extend(i)
Series(result).value_counts()


Drama          1379
Comedy         1107
Action          481
Thriller        466
Romance         450
Horror          320
Adventure       273
Sci-Fi          271
Children's      249
Crime           196
War             136
Musical         109
Animation       105
Mystery         101
Documentary      94
Fantasy          67
Western          62
Film-Noir        43
dtype: int64

## [실습 #3] 남자와 여자의 호불호가 크게 갈리는 영화 10개 찾기
전체 평점의 개수가 500개 이상인 영화만 대상으로 함.

In [112]:
남녀평점=data.pivot_table(index='영화제목',columns=['성별'],aggfunc=['mean','count'],values='평점')
남녀평점.head()

Unnamed: 0_level_0,mean,mean,count,count
성별,F,M,F,M
영화제목,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
"$1,000,000 Duck (1971)",3.375,2.761905,16.0,21.0
'Night Mother (1986),3.388889,3.352941,36.0,34.0
'Til There Was You (1997),2.675676,2.733333,37.0,15.0
"'burbs, The (1989)",2.793478,2.962085,92.0,211.0
...And Justice for All (1979),3.828571,3.689024,35.0,164.0


In [115]:
남녀평점['평점차이']=abs(남녀평점[('mean','F')]-남녀평점[('mean','M')])

In [118]:
남녀평점_500=남녀평점[(남녀평점[('count','F')]+남녀평점[('count','M')]>=500)]

In [119]:
남녀평점_500

Unnamed: 0_level_0,mean,mean,count,count,평점차이
성별,F,M,F,M,Unnamed: 5_level_1
영화제목,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
10 Things I Hate About You (1999),3.646552,3.311966,232.0,468.0,0.334586
101 Dalmatians (1961),3.791444,3.500000,187.0,378.0,0.291444
12 Angry Men (1957),4.184397,4.328421,141.0,475.0,0.144024
"13th Warrior, The (1999)",3.112000,3.168000,125.0,625.0,0.056000
"20,000 Leagues Under the Sea (1954)",3.670103,3.709205,97.0,478.0,0.039102
...,...,...,...,...,...
"X-Files: Fight the Future, The (1998)",3.489474,3.493797,190.0,806.0,0.004323
X-Men (2000),3.682310,3.851702,277.0,1234.0,0.169391
You've Got Mail (1998),3.542424,3.275591,330.0,508.0,0.266834
Young Frankenstein (1974),4.289963,4.239177,269.0,924.0,0.050785


In [120]:
남녀평점_500.nlargest(10,'평점차이')

Unnamed: 0_level_0,mean,mean,count,count,평점차이
성별,F,M,F,M,Unnamed: 5_level_1
영화제목,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Dirty Dancing (1987),3.790378,2.959596,291.0,396.0,0.830782
"Good, The Bad and The Ugly, The (1966)",3.494949,4.2213,99.0,723.0,0.726351
Dumb & Dumber (1994),2.697987,3.336595,149.0,511.0,0.638608
Evil Dead II (Dead By Dawn) (1987),3.297297,3.909283,74.0,474.0,0.611985
Grease (1978),3.975265,3.367041,283.0,534.0,0.608224
Caddyshack (1980),3.396135,3.969737,207.0,760.0,0.573602
Animal House (1978),3.628906,4.167192,256.0,951.0,0.538286
"Exorcist, The (1973)",3.537634,4.067239,186.0,699.0,0.529605
"Rocky Horror Picture Show, The (1975)",3.673016,3.160131,315.0,918.0,0.512885
Big Trouble in Little China (1986),2.987952,3.48503,83.0,501.0,0.497078


## [실습 #4] 연령대 별로 영화 평점 분석하기
연령대(10대 미만, 10대, 20대, ...50대) 컬럼을 추가한 후, 영화별 연령대별 영화평점 구하기

In [46]:
data['연령대']=0
for i in range(len(data)):
    if data['연령'][i]>=50:
        data['연령대'][i]='50대'
    elif data['연령'][i]>=40:
        data['연령대'][i]='40대'
    elif data['연령'][i]>=30:
        data['연령대'][i]='30대'
    elif data['연령'][i]>=20:
        data['연령대'][i]='20대'
    elif data['연령'][i]>=10:
        data['연령대'][i]='10대'
    else:
        data['연령대'][i]='10대 미만'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['연령대'][i]='10대 미만'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [121]:
def generate_agegroup(age):
    if age<10:
        return '10대 미만'
    elif age < 20:
        return '10대'
    elif age < 30:
        return '20대'
    elif age < 40:
        return '30대'
    elif age < 50:
        return '40대'
    elif age < 60:
        return '50대'
    else:
        return '60대 이상'

In [123]:
data['연령대']=data.연령.apply(generate_agegroup)

In [124]:
data.head()

Unnamed: 0,사용자아이디,성별,연령,직업,지역,영화아이디,평점,타임스탬프,영화제목,장르,연령대
0,1,F,1,10,48067,1193,5,978300760,One Flew Over the Cuckoo's Nest (1975),Drama,10대 미만
1,2,M,56,16,70072,1193,5,978298413,One Flew Over the Cuckoo's Nest (1975),Drama,50대
2,12,M,25,12,32793,1193,4,978220179,One Flew Over the Cuckoo's Nest (1975),Drama,20대
3,15,M,25,7,22903,1193,4,978199279,One Flew Over the Cuckoo's Nest (1975),Drama,20대
4,17,M,50,1,95350,1193,5,978158471,One Flew Over the Cuckoo's Nest (1975),Drama,50대


In [126]:
ex4=data.pivot_table(index='영화제목',columns=['연령대'],aggfunc='mean',values='평점')

In [127]:
ex4=ex4.fillna('-')

In [131]:
# ex4.sort_index(axis=1)
ex4[['10대 미만','10대','20대','30대','40대','50대']]

연령대,10대 미만,10대,20대,30대,40대,50대
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"$1,000,000 Duck (1971)",-,3.0,3.090909,3.133333,2.0,2.75
'Night Mother (1986),2.0,4.666667,3.423077,2.904762,3.833333,3.75
'Til There Was You (1997),3.5,2.5,2.666667,2.9,2.333333,2.6
"'burbs, The (1989)",4.5,3.244444,2.652174,2.818182,2.545455,3.1
...And Justice for All (1979),3.0,3.428571,3.724138,3.657143,4.1,3.674419
...,...,...,...,...,...,...
"Zed & Two Noughts, A (1985)",1.0,3.0,3.375,3.777778,4.0,3.0
Zero Effect (1998),4.125,3.883333,3.715278,3.608696,3.764706,3.769231
Zero Kelvin (Kjærlighetens kjøtere) (1995),-,-,-,3.5,-,-
Zeus and Roxanne (1997),1.5,2.5,2.833333,3.5,1.0,-
