# SELECT
다양한 조건을 사용하여 SELECT문을 사용해본다

In [1]:
import pymysql
import pandas as pd
import numpy as np
from config import *
import uuid
import datetime 
from itertools import product

In [2]:
def query(sql, data):
    """
    DB에 SQL문 실행(query: 데이터베이스에 정보 요청)
       Args:
           sql(str): sql 구문
           data(list or tuple): sql 파라미터. ex) [val1, val2 ...]
       Returns:
           query 결과. 결과가 없으면 빈 tuple 리턴
    """
    # connect(): DB와 연결
    db = pymysql.Connect(host=MYSQL_HOST, 
                     user=MYSQL_USER, 
                     password=MYSQL_PASSWORD, 
                     database=MYSQL_DATABASE,
                     autocommit=MYSQL_AUTOCOMMIT, # Query 실행 후 자동 commit(실제 insert, update 반영)
                     charset=MYSQL_CHARSET, # 인코딩 정보
                     cursorclass=pymysql.cursors.DictCursor # DB를 조회한 결과를 Column 명이 Key인 Dictionary로 저장
                    )
    cursor = db.cursor()

    cursor.execute(sql, data) 
    result = cursor.fetchall() # fetchall(): 모든 데이터를 한 번에 가져옴
    db.close()
    return to_pandas(result)

def to_pandas(result):
    return pd.DataFrame(result)

## 기본 SELECT

In [3]:
# 테이블의 전체 데이터를 select
sql = "SELECT * FROM " + MYSQL_TABLENAME+ ";"
print(sql)
result = query(sql, ())
result.head()

SELECT * FROM mytable;


Unnamed: 0,index,uuid,reg_date,name,email,phone,energy,label,mark,ext01
0,1,73e01babaaf511ed847040b076a1ec6b,2023-02-12 16:51:10,Choi,Choi@mysql.com,010-9876-5432,35,"[{'serach': 'naver'}, {'viewing time': 30}]",,
1,2,73dff493aaf511ed9dc140b076a1ec6b,2023-02-12 16:51:10,Lee,Lee@mysql.com,010-4567-8910,80,"[{'serach': 'youtube'}, {'viewing time': 54}]",,
2,3,73dfa65faaf511ed9fb340b076a1ec6b,2023-02-12 16:51:10,Kim,Kim@mysql.com,010-1234-5678,44,"[{'serach': 'github'}, {'viewing time': 64}]",,
3,4,73e01bacaaf511edb1d840b076a1ec6b,2023-02-12 15:51:10,Choi,Choi@mysql.com,010-9876-5432,29,"[{'serach': 'github'}, {'viewing time': 33}]",,
4,5,73dff494aaf511ed8c0f40b076a1ec6b,2023-02-12 15:51:10,Lee,Lee@mysql.com,010-4567-8910,0,"[{'serach': 'naver'}, {'viewing time': 19}]",,


In [4]:
result.info() # 총 144rows

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 10 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   index     144 non-null    int64         
 1   uuid      144 non-null    object        
 2   reg_date  144 non-null    datetime64[ns]
 3   name      144 non-null    object        
 4   email     144 non-null    object        
 5   phone     144 non-null    object        
 6   energy    144 non-null    int64         
 7   label     144 non-null    object        
 8   mark      144 non-null    object        
 9   ext01     144 non-null    object        
dtypes: datetime64[ns](1), int64(2), object(7)
memory usage: 11.4+ KB


## SELECT + 자주쓰는 기본 문법
- DATE, COALESCE

In [5]:
sql = "SELECT DATE('2023-02-24 11:00:00');" # date 생성해서 테이블처럼 만듦
print(sql)
result = query(sql, ())
result.head()

SELECT DATE('2023-02-24 11:00:00');


Unnamed: 0,DATE('2023-02-24 11:00:00')
0,2023-02-24


In [6]:
sql = "SELECT COALESCE('2023-02-24 11:00:00', 0);" # date 생성해서 테이블처럼 만듦
print(sql)
result = query(sql, ())
result.head()

SELECT COALESCE('2023-02-24 11:00:00', 0);


Unnamed: 0,"COALESCE('2023-02-24 11:00:00', 0)"
0,2023-02-24 11:00:00


In [7]:
sql = "SELECT COALESCE(null, 10, 5);" # 값이 null이 아니면서 가장 먼저 입력한 10이 나옴. null만 있다면 null만 나옴
print(sql)
result = query(sql, ())
result.head()

SELECT COALESCE(null, 10, 5);


Unnamed: 0,"COALESCE(null, 10, 5)"
0,10


## SELECT + WHERE + 조건

In [8]:
# name 지정
sql = "SELECT * FROM " + MYSQL_TABLENAME +" where name=%s;" 
print(sql)
result = query(sql, (['Kim']))
result[['name']].value_counts()

SELECT * FROM mytable where name=%s;


name
Kim     48
dtype: int64

In [9]:
# 시간 지정
sql = "SELECT * FROM " + MYSQL_TABLENAME +" where reg_date BETWEEN TIMESTAMP(%s) AND TIMESTAMP(%s);" 
print(sql)
result = query(sql, (['2023-02-12 00:00:00', '2023-02-13 00:00:00']))
result.sort_values('reg_date')

SELECT * FROM mytable where reg_date BETWEEN TIMESTAMP(%s) AND TIMESTAMP(%s);


Unnamed: 0,index,uuid,reg_date,name,email,phone,energy,label,mark,ext01
50,51,73dfcd7caaf511eda45b40b076a1ec6b,2023-02-12 00:51:10,Kim,Kim@mysql.com,010-1234-5678,35,"[{'serach': 'naver'}, {'viewing time': 75}]",,
49,50,73dff4a3aaf511eda61c40b076a1ec6b,2023-02-12 00:51:10,Lee,Lee@mysql.com,010-4567-8910,83,"[{'serach': 'github'}, {'viewing time': 32}]",,
48,49,73e042aaaaf511edbb8d40b076a1ec6b,2023-02-12 00:51:10,Choi,Choi@mysql.com,010-9876-5432,16,"[{'serach': 'naver'}, {'viewing time': 36}]",,
47,48,73dfcd7baaf511ed920b40b076a1ec6b,2023-02-12 01:51:10,Kim,Kim@mysql.com,010-1234-5678,74,"[{'serach': 'youtube'}, {'viewing time': 23}]",,
46,47,73dff4a2aaf511eda53a40b076a1ec6b,2023-02-12 01:51:10,Lee,Lee@mysql.com,010-4567-8910,80,"[{'serach': 'youtube'}, {'viewing time': 61}]",,
45,46,73e042a9aaf511eda98940b076a1ec6b,2023-02-12 01:51:10,Choi,Choi@mysql.com,010-9876-5432,0,"[{'serach': 'naver'}, {'viewing time': 63}]",,
44,45,73dfcd7aaaf511ed97bc40b076a1ec6b,2023-02-12 02:51:10,Kim,Kim@mysql.com,010-1234-5678,57,"[{'serach': 'youtube'}, {'viewing time': 31}]",,
43,44,73dff4a1aaf511edb39540b076a1ec6b,2023-02-12 02:51:10,Lee,Lee@mysql.com,010-4567-8910,29,"[{'serach': 'github'}, {'viewing time': 25}]",,
42,43,73e01bb9aaf511edb72e40b076a1ec6b,2023-02-12 02:51:10,Choi,Choi@mysql.com,010-9876-5432,81,"[{'serach': 'github'}, {'viewing time': 58}]",,
41,42,73dfcd79aaf511edbb1c40b076a1ec6b,2023-02-12 03:51:10,Kim,Kim@mysql.com,010-1234-5678,32,"[{'serach': 'github'}, {'viewing time': 9}]",,


In [10]:
# name 지정 + 시간 지정
sql = "SELECT * FROM " + MYSQL_TABLENAME +" where name=%s AND reg_date BETWEEN TIMESTAMP(%s) AND TIMESTAMP(%s);" 
print(sql)
result = query(sql, (['Kim', '2023-02-12 00:00:00', '2023-02-13 00:00:00']))
result.sort_values('reg_date')

SELECT * FROM mytable where name=%s AND reg_date BETWEEN TIMESTAMP(%s) AND TIMESTAMP(%s);


Unnamed: 0,index,uuid,reg_date,name,email,phone,energy,label,mark,ext01
16,51,73dfcd7caaf511eda45b40b076a1ec6b,2023-02-12 00:51:10,Kim,Kim@mysql.com,010-1234-5678,35,"[{'serach': 'naver'}, {'viewing time': 75}]",,
15,48,73dfcd7baaf511ed920b40b076a1ec6b,2023-02-12 01:51:10,Kim,Kim@mysql.com,010-1234-5678,74,"[{'serach': 'youtube'}, {'viewing time': 23}]",,
14,45,73dfcd7aaaf511ed97bc40b076a1ec6b,2023-02-12 02:51:10,Kim,Kim@mysql.com,010-1234-5678,57,"[{'serach': 'youtube'}, {'viewing time': 31}]",,
13,42,73dfcd79aaf511edbb1c40b076a1ec6b,2023-02-12 03:51:10,Kim,Kim@mysql.com,010-1234-5678,32,"[{'serach': 'github'}, {'viewing time': 9}]",,
12,39,73dfcd78aaf511ed81bc40b076a1ec6b,2023-02-12 04:51:10,Kim,Kim@mysql.com,010-1234-5678,29,"[{'serach': 'naver'}, {'viewing time': 39}]",,
11,36,73dfcd77aaf511edbc0440b076a1ec6b,2023-02-12 05:51:10,Kim,Kim@mysql.com,010-1234-5678,99,"[{'serach': 'youtube'}, {'viewing time': 49}]",,
10,33,73dfcd76aaf511ed978640b076a1ec6b,2023-02-12 06:51:10,Kim,Kim@mysql.com,010-1234-5678,79,"[{'serach': 'youtube'}, {'viewing time': 82}]",,
9,30,73dfcd75aaf511edae5b40b076a1ec6b,2023-02-12 07:51:10,Kim,Kim@mysql.com,010-1234-5678,20,"[{'serach': 'youtube'}, {'viewing time': 69}]",,
8,27,73dfcd74aaf511ed8b2640b076a1ec6b,2023-02-12 08:51:10,Kim,Kim@mysql.com,010-1234-5678,77,"[{'serach': 'youtube'}, {'viewing time': 9}]",,
7,24,73dfcd73aaf511edbf5440b076a1ec6b,2023-02-12 09:51:10,Kim,Kim@mysql.com,010-1234-5678,81,"[{'serach': 'github'}, {'viewing time': 25}]",,


## SELECT + LIMIT
- 조회 개수 제한
- 참고로 String Format을 사용하면 d %s %f 등 지정해주는 것이 맞지만 pymysql에서 변수 타입과 상관없이 전부 %s를 사용해줘야 한다

In [11]:
# name 지정
sql = "SELECT * FROM " + MYSQL_TABLENAME +" where name=%s LIMIT %s;" # 변수 타입 상관없이 %s
print(sql)
result = query(sql, (['Kim', 5])) 
result[['name']].value_counts()

SELECT * FROM mytable where name=%s LIMIT %s;


name
Kim     5
dtype: int64

In [12]:
result

Unnamed: 0,index,uuid,reg_date,name,email,phone,energy,label,mark,ext01
0,3,73dfa65faaf511ed9fb340b076a1ec6b,2023-02-12 16:51:10,Kim,Kim@mysql.com,010-1234-5678,44,"[{'serach': 'github'}, {'viewing time': 64}]",,
1,6,73dfa660aaf511edaebc40b076a1ec6b,2023-02-12 15:51:10,Kim,Kim@mysql.com,010-1234-5678,67,"[{'serach': 'github'}, {'viewing time': 83}]",,
2,9,73dfa661aaf511edb22f40b076a1ec6b,2023-02-12 14:51:10,Kim,Kim@mysql.com,010-1234-5678,21,"[{'serach': 'naver'}, {'viewing time': 36}]",,
3,12,73dfa662aaf511eda43140b076a1ec6b,2023-02-12 13:51:10,Kim,Kim@mysql.com,010-1234-5678,87,"[{'serach': 'naver'}, {'viewing time': 88}]",,
4,15,73dfa663aaf511edaa5940b076a1ec6b,2023-02-12 12:51:10,Kim,Kim@mysql.com,010-1234-5678,88,"[{'serach': 'youtube'}, {'viewing time': 58}]",,


## SELECT + ORDER BY + LIMIT N,M
- 오름차순으로 내림/오름 차순으로 정렬 뒤에 N행 부터 M개 행을 가져옴
    - LIMITE N, M: N행에서부터 M개를 가져옴

In [13]:
# LIMIT로 지정한 행을 가져올 수 있음 
sql = "SELECT * FROM " + MYSQL_TABLENAME +" where name=%s ORDER BY reg_date DESC  LIMIT %s, %s;" # 내림차순
print(sql)
result = query(sql, (['Kim', 2, 5])) 
result

SELECT * FROM mytable where name=%s ORDER BY reg_date DESC  LIMIT %s, %s;


Unnamed: 0,index,uuid,reg_date,name,email,phone,energy,label,mark,ext01
0,9,73dfa661aaf511edb22f40b076a1ec6b,2023-02-12 14:51:10,Kim,Kim@mysql.com,010-1234-5678,21,"[{'serach': 'naver'}, {'viewing time': 36}]",,
1,12,73dfa662aaf511eda43140b076a1ec6b,2023-02-12 13:51:10,Kim,Kim@mysql.com,010-1234-5678,87,"[{'serach': 'naver'}, {'viewing time': 88}]",,
2,15,73dfa663aaf511edaa5940b076a1ec6b,2023-02-12 12:51:10,Kim,Kim@mysql.com,010-1234-5678,88,"[{'serach': 'youtube'}, {'viewing time': 58}]",,
3,18,73dfcd71aaf511ed920240b076a1ec6b,2023-02-12 11:51:10,Kim,Kim@mysql.com,010-1234-5678,65,"[{'serach': 'naver'}, {'viewing time': 39}]",,
4,21,73dfcd72aaf511ed93dc40b076a1ec6b,2023-02-12 10:51:10,Kim,Kim@mysql.com,010-1234-5678,87,"[{'serach': 'naver'}, {'viewing time': 88}]",,


## SELECT + COUNT

In [14]:
# row counts
sql = "SELECT COUNT(*) FROM " + MYSQL_TABLENAME +";" # 내림차순
print(sql)
result = query(sql, ()) 
result

SELECT COUNT(*) FROM mytable;


Unnamed: 0,COUNT(*)
0,144


In [15]:
# row counts + 조건문
sql = "SELECT COUNT(*) FROM " + MYSQL_TABLENAME +" where name=%s;" # 내림차순
print(sql)
result = query(sql, (['Kim'])) 
result

SELECT COUNT(*) FROM mytable where name=%s;


Unnamed: 0,COUNT(*)
0,48


## SELECT + LIKE
- 특정 문자열이 있는지 검색 LIKE에 붙는 '%'위치에 따라 조건이 다름

In [16]:
# "[{'serach':%": [{'serach':로 시작하는 데이터 검색
sql = "SELECT * FROM " + MYSQL_TABLENAME + " where label LIKE %s;"
print(sql)
result = query(sql, ([ "[{'serach':%" ])) 
result

SELECT * FROM mytable where label LIKE %s;


Unnamed: 0,index,uuid,reg_date,name,email,phone,energy,label,mark,ext01
0,1,73e01babaaf511ed847040b076a1ec6b,2023-02-12 16:51:10,Choi,Choi@mysql.com,010-9876-5432,35,"[{'serach': 'naver'}, {'viewing time': 30}]",,
1,2,73dff493aaf511ed9dc140b076a1ec6b,2023-02-12 16:51:10,Lee,Lee@mysql.com,010-4567-8910,80,"[{'serach': 'youtube'}, {'viewing time': 54}]",,
2,3,73dfa65faaf511ed9fb340b076a1ec6b,2023-02-12 16:51:10,Kim,Kim@mysql.com,010-1234-5678,44,"[{'serach': 'github'}, {'viewing time': 64}]",,
3,4,73e01bacaaf511edb1d840b076a1ec6b,2023-02-12 15:51:10,Choi,Choi@mysql.com,010-9876-5432,29,"[{'serach': 'github'}, {'viewing time': 33}]",,
4,5,73dff494aaf511ed8c0f40b076a1ec6b,2023-02-12 15:51:10,Lee,Lee@mysql.com,010-4567-8910,0,"[{'serach': 'naver'}, {'viewing time': 19}]",,
...,...,...,...,...,...,...,...,...,...,...
139,140,73e01ba9aaf511ed9a4440b076a1ec6b,2023-02-10 18:51:10,Lee,Lee@mysql.com,010-4567-8910,91,"[{'serach': 'youtube'}, {'viewing time': 36}]",,
140,141,73dff491aaf511ed8e4040b076a1ec6b,2023-02-10 18:51:10,Kim,Kim@mysql.com,010-1234-5678,10,"[{'serach': 'naver'}, {'viewing time': 43}]",,
141,142,73e042c9aaf511eda63640b076a1ec6b,2023-02-10 17:51:10,Choi,Choi@mysql.com,010-9876-5432,66,"[{'serach': 'youtube'}, {'viewing time': 16}]",,
142,143,73e01baaaaf511ed97b340b076a1ec6b,2023-02-10 17:51:10,Lee,Lee@mysql.com,010-4567-8910,48,"[{'serach': 'github'}, {'viewing time': 67}]",,


In [17]:
# "%64}]": 64}]로 끝나는 데이터 검색
sql = "SELECT * FROM " + MYSQL_TABLENAME + " where label LIKE %s;"
print(sql)
result = query(sql, ([ "%64}]" ])) 
result

SELECT * FROM mytable where label LIKE %s;


Unnamed: 0,index,uuid,reg_date,name,email,phone,energy,label,mark,ext01
0,3,73dfa65faaf511ed9fb340b076a1ec6b,2023-02-12 16:51:10,Kim,Kim@mysql.com,010-1234-5678,44,"[{'serach': 'github'}, {'viewing time': 64}]",,


In [18]:
# "%[{'serach': 'youtube'}%": [{'serach': 'youtube'}이 포함되는 데이터 검색
sql = "SELECT * FROM " + MYSQL_TABLENAME + " where label LIKE %s;"
print(sql)
result = query(sql, ([ "%[{'serach': 'youtube'}%" ])) 
result

SELECT * FROM mytable where label LIKE %s;


Unnamed: 0,index,uuid,reg_date,name,email,phone,energy,label,mark,ext01
0,2,73dff493aaf511ed9dc140b076a1ec6b,2023-02-12 16:51:10,Lee,Lee@mysql.com,010-4567-8910,80,"[{'serach': 'youtube'}, {'viewing time': 54}]",,
1,11,73dff496aaf511ed837340b076a1ec6b,2023-02-12 13:51:10,Lee,Lee@mysql.com,010-4567-8910,60,"[{'serach': 'youtube'}, {'viewing time': 77}]",,
2,14,73dff497aaf511edaf8e40b076a1ec6b,2023-02-12 12:51:10,Lee,Lee@mysql.com,010-4567-8910,30,"[{'serach': 'youtube'}, {'viewing time': 2}]",,
3,15,73dfa663aaf511edaa5940b076a1ec6b,2023-02-12 12:51:10,Kim,Kim@mysql.com,010-1234-5678,88,"[{'serach': 'youtube'}, {'viewing time': 58}]",,
4,20,73dff499aaf511ed83ce40b076a1ec6b,2023-02-12 10:51:10,Lee,Lee@mysql.com,010-4567-8910,13,"[{'serach': 'youtube'}, {'viewing time': 40}]",,
5,23,73dff49aaaf511eda03d40b076a1ec6b,2023-02-12 09:51:10,Lee,Lee@mysql.com,010-4567-8910,72,"[{'serach': 'youtube'}, {'viewing time': 26}]",,
6,26,73dff49baaf511ed805640b076a1ec6b,2023-02-12 08:51:10,Lee,Lee@mysql.com,010-4567-8910,66,"[{'serach': 'youtube'}, {'viewing time': 52}]",,
7,27,73dfcd74aaf511ed8b2640b076a1ec6b,2023-02-12 08:51:10,Kim,Kim@mysql.com,010-1234-5678,77,"[{'serach': 'youtube'}, {'viewing time': 9}]",,
8,28,73e01bb4aaf511ed96a840b076a1ec6b,2023-02-12 07:51:10,Choi,Choi@mysql.com,010-9876-5432,18,"[{'serach': 'youtube'}, {'viewing time': 43}]",,
9,29,73dff49caaf511ed848540b076a1ec6b,2023-02-12 07:51:10,Lee,Lee@mysql.com,010-4567-8910,67,"[{'serach': 'youtube'}, {'viewing time': 61}]",,
