In [1]:
import pandas as pd
import numpy as np
from db_conn import *

import sys

def create_movie_table():
    excel_file = "movie_list.xls"
    conn, cur = open_db()
    movie_table = "university.movie"
    colum_to_read=['영화명', '영화명(영문)', '제작연도', '제작국가', '유형', '제작상태', '제작사']
    
    df1 = pd.read_excel(excel_file, sheet_name='영화정보 리스트',usecols=colum_to_read, skiprows=4)
    
    # 두 번째 시트: 헤더가 없으므로 None으로 설정
    colum_to_read2=[0,1,2,3,4,6,8]
    df2 = pd.read_excel(excel_file, sheet_name='영화정보 리스트_2',usecols=colum_to_read2, header=None)
    
    
    # 두 번째 시트의 데이터프레임에 헤더 추가
    df2.columns = ['영화명', '영화명(영문)', '제작연도', '제작국가', '유형', '제작상태', '제작사']
    
    # 두 데이터프레임 합치기
    df = pd.concat([df1, df2], ignore_index=True)
    
    #df = list(df.itertuples(index=False, name=None))
    df = df.where(pd.notnull(df), None)
    df['제작연도'] = df['제작연도'].astype('Int64').where(pd.notnull(df['제작연도']), 0)
    #print(df.head())
    
    create_sql = f"""
        drop table if exists {movie_table} ;

        create table {movie_table} (
            m_id INT AUTO_INCREMENT PRIMARY KEY,
            title VARCHAR(500),
            eng_title VARCHAR(500),
            year INT,
            country VARCHAR(100),
            m_type VARCHAR(20),
            status VARCHAR(30),
            company TEXT,
            enter_date DATETIME DEFAULT NOW()
        ); """
    cur.execute(create_sql)
    conn.commit()
    
    insert_sql = f"""insert into {movie_table} (title, eng_title, year, country, m_type,status, company)
                    values(%s,%s,%s,%s,%s,%s,%s);"""

    for i, r in df.iterrows():
        row = tuple(r)
        try:
            cur.execute(insert_sql, row)
            if (i+1) % 1000 == 0:
                print(f"{i} rows")
        
        except Exception as e:
            #pass
            print(e)
            print(row)
            sys.exit()
       
    
    conn.commit()
    close_db(conn, cur)

def create_directors_table():
    excel_file = "movie_list.xls"
    conn, cur = open_db()
    director_table="university.director"
    colum_to_read=['감독']
    
    df1 = pd.read_excel(excel_file, sheet_name='영화정보 리스트',usecols=colum_to_read, skiprows=4)
    
    # 두 번째 시트: 헤더가 없으므로 None으로 설정
    colum_to_read2=[7]
    df2 = pd.read_excel(excel_file, sheet_name='영화정보 리스트_2',usecols=colum_to_read2, header=None)
    
    
    # 두 번째 시트의 데이터프레임에 헤더 추가
    df2.columns = ['감독']
    
    # 두 데이터프레임 합치기
    df = pd.concat([df1, df2], ignore_index=True)
    
    #df = list(df.itertuples(index=False, name=None))
    df = df.where(pd.notnull(df), None)
    #print(df.head())
    create_sql = f"""
        drop table if exists {director_table} ;

        create table {director_table} (
            d_id INT AUTO_INCREMENT PRIMARY KEY,
            name VARCHAR(255)
        ); """

    cur.execute(create_sql)
    conn.commit()
    
    insert_sql = f"""insert into {director_table} (name)
                    values(%s);"""

    for i, r in df.iterrows():
        row = tuple(r)
        try:
            cur.execute(insert_sql, row)
            if (i+1) % 1000 == 0:
                print(f"{i} rows")
        
        except Exception as e:
            #pass
            print(e)
            print(row)
            sys.exit()
       
    
    conn.commit()
    close_db(conn, cur)

def create_genre_table():
    excel_file = "movie_list.xls"
    conn, cur = open_db()
    genre_table="university.genre"
    
    colum_to_read=['장르']
    
    df1 = pd.read_excel(excel_file, sheet_name='영화정보 리스트',usecols=colum_to_read, skiprows=4)
    
    # 두 번째 시트: 헤더가 없으므로 None으로 설정
    colum_to_read2=[5]
    df2 = pd.read_excel(excel_file, sheet_name='영화정보 리스트_2',usecols=colum_to_read2, header=None)
    
    
    # 두 번째 시트의 데이터프레임에 헤더 추가
    df2.columns = ['장르']
    
    # 두 데이터프레임 합치기
    df = pd.concat([df1, df2], ignore_index=True)
    
    #df = list(df.itertuples(index=False, name=None))
    df = df.where(pd.notnull(df), None)
    #print(df.head())
    create_sql = f"""
        drop table if exists {genre_table} ;

        create table {genre_table} (
            m_id INT,
            genre VARCHAR(100),
            PRIMARY KEY (m_id, genre),
            FOREIGN KEY (m_id) REFERENCES university.movie(m_id)
        ); """

    cur.execute(create_sql)
    conn.commit()
    
    insert_sql = f"""INSERT INTO {genre_table} (m_id, genre) VALUES (%s, %s);"""

    for i, row in df.iterrows():
        m_id = i + 1  # Assuming m_id should be unique for each row or derived from data
        genre = row['장르']
        try:
            cur.execute(insert_sql, (m_id, genre))
            if (i + 1) % 1000 == 0:
                print(f"{i+1} rows inserted")
        
        except Exception as e:
            print(e)
            print(row)
            # continue processing other rows or raise the exception as needed
    
    conn.commit()
    close_db(conn, cur)

def create_movie_director_table():
    excel_file = "movie_list.xls"
    conn, cur = open_db()
    movie_director_table = "university.movie_director"
    
    colum_to_read=['영화명']
    df1 = pd.read_excel(excel_file, sheet_name='영화정보 리스트',usecols=colum_to_read, skiprows=4)
    
    # 두 번째 시트: 헤더가 없으므로 None으로 설정
    colum_to_read2=[0]
    df2 = pd.read_excel(excel_file, sheet_name='영화정보 리스트_2',usecols=colum_to_read2, header=None)
    
    
    # 두 번째 시트의 데이터프레임에 헤더 추가
    df2.columns = ['영화명']
    
    # 두 데이터프레임 합치기
    df = pd.concat([df1, df2], ignore_index=True)
    
    #df = list(df.itertuples(index=False, name=None))
    df = df.where(pd.notnull(df), None)
    
    
    create_sql = f"""
        DROP TABLE IF EXISTS {movie_director_table};
        
        CREATE TABLE {movie_director_table} (
            m_id INT,
            d_id INT,
            PRIMARY KEY (m_id, d_id),
            FOREIGN KEY (m_id) REFERENCES university.movie(m_id),
            FOREIGN KEY (d_id) REFERENCES university.director(d_id)
        ); """
    
    cur.execute(create_sql)
    conn.commit()

    insert_sql = f"""INSERT INTO {movie_director_table} (m_id, d_id) VALUES (%s, %s);"""
    
    for i, row in df.iterrows():
        m_id = i + 1  # Assuming m_id should be unique for each row or derived from data
        d_id = i + 1
        try:
            cur.execute(insert_sql, (m_id, genre))
            if (i + 1) % 1000 == 0:
                print(f"{i+1} rows inserted")
        
        except Exception as e:
            print(e)
            print(row)

    conn.commit()
    close_db(conn, cur)
    
    
 


In [6]:
if __name__ == '__main__':
    #create_movie_table()
    #create_directors_table()
    #create_genre_table()
    create_movie_director_table()

1000 rows inserted
2000 rows inserted
3000 rows inserted
4000 rows inserted
5000 rows inserted
6000 rows inserted
7000 rows inserted
8000 rows inserted
9000 rows inserted
10000 rows inserted
11000 rows inserted
12000 rows inserted
13000 rows inserted
14000 rows inserted
15000 rows inserted
16000 rows inserted
17000 rows inserted
18000 rows inserted
19000 rows inserted
20000 rows inserted
21000 rows inserted
22000 rows inserted
23000 rows inserted
24000 rows inserted
25000 rows inserted
26000 rows inserted
27000 rows inserted
28000 rows inserted
29000 rows inserted
30000 rows inserted
31000 rows inserted
32000 rows inserted
33000 rows inserted
34000 rows inserted
35000 rows inserted
36000 rows inserted
37000 rows inserted
38000 rows inserted
39000 rows inserted
40000 rows inserted
41000 rows inserted
42000 rows inserted
43000 rows inserted
44000 rows inserted
45000 rows inserted
46000 rows inserted
47000 rows inserted
48000 rows inserted
49000 rows inserted
50000 rows inserted
51000 row