# IMDb 영화 장르 테이블 만들기

In [3]:
# MySQL Connector 모듈 및 pandas 모듈 import
import mysql.connector as mqc
import numpy as np
import pandas as pd

In [4]:
# MySQL 연결 객체 생성
cnx1 = mqc.connect(host='pc.pdmnu.com', user='root', password='!panda8902', database='imdb')

# MySQL을 실질적으로 실행하는 cursor 객체 생성
cursor1 = cnx1.cursor()

In [5]:
# 실행할 쿼리문 입력
query = 'SELECT genres FROM imdb.title_basics;'

# cursor 객체를 통하여 쿼리 실행
cursor1.execute(query)

In [6]:
# 결과물을 저장할 리스트 선언
genres = []

# 결과물을 리스트에 저장
# 한 개의 컬럼만 조회하였지만 결과물이 튜플들의 리스트로 반환되기 때문에 0번째 인덱스의 값만 가져오도록 코드 작성
for _ in cursor1:
    genres.append(_[0])
genres

['Documentary,Short',
 'Animation,Short',
 'Animation,Comedy,Romance',
 'Animation,Short',
 'Comedy,Short',
 'Short',
 'Short,Sport',
 'Documentary,Short',
 'Romance',
 'Documentary,Short',
 'Documentary,Short',
 'Action,Documentary,Short',
 'Documentary,Short',
 'Comedy,Short',
 'Animation,Short',
 'Documentary,Short',
 'Documentary,Short',
 'Short',
 'Comedy,Short',
 'Documentary,Short,Sport',
 'Documentary,Short',
 'Documentary,Short',
 'News,Short',
 'News,Short,Sport',
 'Documentary,Short',
 'Documentary,Short',
 'Documentary,Short',
 'Documentary,Short',
 'Documentary,Short',
 'Documentary,Short',
 'Short',
 'Comedy,Documentary,Short',
 'Documentary,Short',
 'Comedy,Short',
 'Drama,Short',
 'Short',
 'Documentary,Short,Sport',
 'Short',
 'Documentary,Short',
 'Action,Comedy,Documentary',
 'Documentary,Short',
 'Documentary,Short',
 'Short',
 'Short',
 'Documentary,Short',
 'Documentary,Short',
 'Short',
 'Short,Sport',
 'Documentary,Short',
 'Documentary,Short',
 'Documentary,Sho

In [8]:
# 결과물을 저장할 Series 객체 생성
genres_series = pd.Series(genres, name='Genres')
genres_series

0                 Documentary,Short
1                   Animation,Short
2          Animation,Comedy,Romance
3                   Animation,Short
4                      Comedy,Short
                     ...           
6658941                   Game-Show
6658942                       Music
6658943                     Musical
6658944    Animation,Comedy,Western
6658945                  Reality-TV
Name: Genres, Length: 6658946, dtype: object

In [9]:
# 가져온 내용을 ,(쉼표)를 기준으로 분할하여 DataFrame에 저장
genres_split = genres_series.str.split(',', expand=True)
genres_split

Unnamed: 0,0,1,2
0,Documentary,Short,
1,Animation,Short,
2,Animation,Comedy,Romance
3,Animation,Short,
4,Comedy,Short,
...,...,...,...
6658941,Game-Show,,
6658942,Music,,
6658943,Musical,,
6658944,Animation,Comedy,Western


In [10]:
# 겹치는 이름을 삭제하기 위하여 모든 컬럼을 하나의 Series 객체로 합침
genres_series = pd.concat([genres_split[0], genres_split[1], genres_split[2]])
# unique() 메서드를 활용하여 중복값을 제거한 후 반환된 ndarray 객체를 다시 Series 객체로 생성
# 이후 값을 기준으로 재정렬
genres_series = pd.Series(genres_series.unique(), name='Genres')
genres_series.sort_values(inplace=True, ignore_index=True)
genres_series

0          Action
1           Adult
2       Adventure
3       Animation
4       Biography
5          Comedy
6           Crime
7     Documentary
8           Drama
9          Family
10        Fantasy
11      Film-Noir
12      Game-Show
13        History
14         Horror
15          Music
16        Musical
17        Mystery
18           News
19     Reality-TV
20        Romance
21         Sci-Fi
22          Short
23          Sport
24      Talk-Show
25       Thriller
26            War
27        Western
28           None
Name: Genres, dtype: object

In [11]:
# 정리된 Series 객체의 값만 추출하여 MySQL 쿼리에 대입할 수 있도록 튜플들의 리스트로 변환
value_params = list(zip(genres_series.T.values, ))
value_params

[('Action',),
 ('Adult',),
 ('Adventure',),
 ('Animation',),
 ('Biography',),
 ('Comedy',),
 ('Crime',),
 ('Documentary',),
 ('Drama',),
 ('Family',),
 ('Fantasy',),
 ('Film-Noir',),
 ('Game-Show',),
 ('History',),
 ('Horror',),
 ('Music',),
 ('Musical',),
 ('Mystery',),
 ('News',),
 ('Reality-TV',),
 ('Romance',),
 ('Sci-Fi',),
 ('Short',),
 ('Sport',),
 ('Talk-Show',),
 ('Thriller',),
 ('War',),
 ('Western',),
 (None,)]

In [10]:
# MySQL 쿼리 실행
query = 'INSERT INTO genres (genre_name) VALUES (%s)'
try:
    cursor1.executemany(query, value_params)
except mqc.Error as err:
    print('오류가 발생하였습니다:', err)
finally:
    print(str(cursor1.rowcount) + '개의 행이 처리되었습니다.')
    cnx1.commit()

41개의 행이 처리되었습니다.


In [11]:
# MySQL 연결 닫기
cnx1.close()