# IMDb 영화인 전문분야 테이블에 값 입력

In [1]:
# MySQL Connector 모듈 및 pandas 모듈 import
import mysql.connector as mqc
import numpy as np
import pandas as pd

In [3]:
# MySQL 연결 객체 생성
cnx1 = mqc.connect(host='127.0.0.1', user='root', password='<password>', database='imdb')

# MySQL을 실질적으로 실행하는 cursor 객체 생성
cursor1 = cnx1.cursor()

In [4]:
# 실행할 쿼리문 입력
query = 'SELECT primaryProfession FROM `name_basics`'

# cursor 객체를 통하여 쿼리 실행
cursor1.execute(query)

In [5]:
# 결과물을 저장할 리스트 선언
prof = []

# 결과물을 리스트에 저장
# 한 개의 컬럼만 조회하였지만 결과물이 튜플들의 리스트로 반환되기 때문에 0번째 인덱스의 값만 가져오도록 코드 작성
for _ in cursor1:
    prof.append(_[0])
prof

['soundtrack,actor,miscellaneous',
 'actress,soundtrack',
 'actress,soundtrack,producer',
 'actor,soundtrack,writer',
 'writer,director,actor',
 'actress,soundtrack,producer',
 'actor,soundtrack,producer',
 'actor,soundtrack,director',
 'actor,producer,soundtrack',
 'actor,soundtrack,director',
 'actor,soundtrack,producer',
 'actress,soundtrack,make_up_department',
 'soundtrack,actress,producer',
 'actress,soundtrack',
 'actor,miscellaneous',
 'composer,soundtrack,music_department',
 'soundtrack,actress,music_department',
 'actor,producer,soundtrack',
 'writer,director,assistant_director',
 'actor,producer,soundtrack',
 'actress,soundtrack,producer',
 'actor,soundtrack,producer',
 'soundtrack,actress',
 'actor,writer,director',
 'music_department,soundtrack,composer',
 'actor,soundtrack,producer',
 'actor,soundtrack,writer',
 'actress,soundtrack,producer',
 'actress,miscellaneous',
 'actress,soundtrack',
 'actress,soundtrack,writer',
 'actor,director,writer',
 'director,producer,actor'

In [6]:
# 결과물을 저장할 Series 객체 생성
profs_series = pd.Series(prof, name='Profession')
profs_series

0               soundtrack,actor,miscellaneous
1                           actress,soundtrack
2                  actress,soundtrack,producer
3                      actor,soundtrack,writer
4                        writer,director,actor
                          ...                 
9971656    animation_department,art_department
9971657                                   None
9971658                        cinematographer
9971659                        cinematographer
9971660                                   None
Name: Profession, Length: 9971661, dtype: object

In [7]:
# 가져온 내용을 ,(쉼표)를 기준으로 분할하여 DataFrame에 저장
profs_split = profs_series.str.split(',', expand=True)
profs_split

Unnamed: 0,0,1,2
0,soundtrack,actor,miscellaneous
1,actress,soundtrack,
2,actress,soundtrack,producer
3,actor,soundtrack,writer
4,writer,director,actor
...,...,...,...
9971656,animation_department,art_department,
9971657,,,
9971658,cinematographer,,
9971659,cinematographer,,


In [8]:
# 겹치는 이름을 삭제하기 위하여 모든 컬럼을 하나의 Series 객체로 합침
profs_series = pd.concat([profs_split[0], profs_split[1], profs_split[2]])
# unique() 메서드를 활용하여 중복값을 제거한 후 반환된 ndarray 객체를 다시 Series 객체로 생성
# 이후 값을 기준으로 재정렬
profs_series = pd.Series(profs_series.unique(), name='Profession')
profs_series.sort_values(inplace=True, ignore_index=True)
profs_series

0                         actor
1                       actress
2          animation_department
3                art_department
4                  art_director
5                     assistant
6            assistant_director
7             camera_department
8            casting_department
9              casting_director
10              cinematographer
11                     composer
12           costume_department
13             costume_designer
14                     director
15                       editor
16         editorial_department
17        electrical_department
18                    executive
19                        legal
20          location_management
21           make_up_department
22                      manager
23                miscellaneous
24             music_department
25                     producer
26        production_department
27          production_designer
28           production_manager
29                    publicist
30            script_department
31      

In [9]:
# 정리된 Series 객체의 값만 추출하여 MySQL 쿼리에 대입할 수 있도록 튜플들의 리스트로 변환
value_params = list(zip(profs_series.T.values, ))
value_params

[('actor',),
 ('actress',),
 ('animation_department',),
 ('art_department',),
 ('art_director',),
 ('assistant',),
 ('assistant_director',),
 ('camera_department',),
 ('casting_department',),
 ('casting_director',),
 ('cinematographer',),
 ('composer',),
 ('costume_department',),
 ('costume_designer',),
 ('director',),
 ('editor',),
 ('editorial_department',),
 ('electrical_department',),
 ('executive',),
 ('legal',),
 ('location_management',),
 ('make_up_department',),
 ('manager',),
 ('miscellaneous',),
 ('music_department',),
 ('producer',),
 ('production_department',),
 ('production_designer',),
 ('production_manager',),
 ('publicist',),
 ('script_department',),
 ('set_decorator',),
 ('sound_department',),
 ('soundtrack',),
 ('special_effects',),
 ('stunts',),
 ('talent_agent',),
 ('transportation_department',),
 ('visual_effects',),
 ('writer',),
 (None,)]

In [10]:
# MySQL 쿼리 실행
query = 'INSERT INTO `professions` (prof_name) VALUES (%s)'
try:
    cursor1.executemany(query, value_params)
except mqc.Error as err:
    print('오류가 발생하였습니다:', err)
finally:
    print(str(cursor1.rowcount) + '개의 행이 처리되었습니다.')
    cnx1.commit()

41개의 행이 처리되었습니다.


In [11]:
# MySQL 연결 닫기
cnx1.close()