In [1]:
# ===========================================
# 1. 조직도 로드 및 딕셔너리 생성
# ===========================================
import pymysql
import pandas as pd
import json

org_path = r"C:\Users\LEEJUHWAN\Desktop\애니파이브\백업\새로받은 user\인사정보_부서코드추가.csv"
df_org = pd.read_csv(org_path, encoding='utf-8-sig')

# 이름 기준 딕셔너리: {이름: {id, 부서, 직위, 부서코드}}
org_dict = {}
for _, row in df_org.iterrows():
    name = str(row['사원명']).strip()
    org_dict[name] = {
        'emailId': str(row['ID']).strip() if pd.notna(row['ID']) else '',
        'deptName': str(row['부서']).strip() if pd.notna(row['부서']) else '',
        'positionName': str(row['직위']).strip() if pd.notna(row['직위']) else '',
        'deptCode': str(row['부서코드']).strip() if pd.notna(row['부서코드']) else ''
    }

print(f"조직도 인원: {len(org_dict)}명")
print(f"샘플: {list(org_dict.items())[:3]}")

조직도 인원: 156명
샘플: [('any-rnd', {'emailId': 'any-rnd', 'deptName': '공통계정', 'positionName': '기타', 'deptCode': 'AB90'}), ('anyfive', {'emailId': 'anyfive', 'deptName': '공통계정', 'positionName': '기타', 'deptCode': 'AB90'}), ('anysm', {'emailId': 'anysm', 'deptName': '공통계정', 'positionName': '기타', 'deptCode': 'AB90'})]


In [2]:
# ===========================================
# 2. documents 테이블 조회
# ===========================================
conn = pymysql.connect(
    host='localhost',
    port=3306,
    user='root',
    password='1234',
    database='any_approval',
    charset='utf8mb4'
)

query = "SELECT id, drafter_name, activities FROM documents"
df_docs = pd.read_sql(query, conn)
conn.close()

print(f"documents 총 건수: {len(df_docs)}")
print(df_docs.head())

  df_docs = pd.read_sql(query, conn)


documents 총 건수: 23320
   id drafter_name                                         activities
0   1          고상환  [{"positionName":"상무","deptName":"ITO사업팀","act...
1   2          한승재  [{"positionName":"","deptName":"","actionLogTy...
2   3          김선홍  [{"positionName":"선임","deptName":"공공사업팀","acti...
3   4          이필호  [{"positionName":"","deptName":"","actionLogTy...
4   5          한승재  [{"positionName":"","deptName":"","actionLogTy...


In [3]:
# ===========================================
# 3. 업데이트 데이터 생성
# ===========================================
def get_drafter_info(name):
    """drafter 정보 반환 (재직자/퇴사자 구분)"""
    name = str(name).strip() if name else ''
    
    if name in org_dict:
        # 재직자
        info = org_dict[name]
        return {
            'email': info['emailId'],
            'position': info['positionName'],
            'dept': info['deptName'],
            'dept_code': info['deptCode']
        }
    else:
        # 퇴사자
        return {
            'email': 'master',
            'position': '',
            'dept': '',
            'dept_code': ''
        }

def process_activities(activities_str):
    """activities JSON 처리"""
    if not activities_str or pd.isna(activities_str):
        return None
    
    try:
        activities = json.loads(activities_str)
    except json.JSONDecodeError:
        return None
    
    for activity in activities:
        name = activity.get('name', '').strip()
        
        if name in org_dict:
            # 재직자
            info = org_dict[name]
            activity['emailId'] = info['emailId']
            activity['positionName'] = info['positionName']
            activity['deptName'] = info['deptName']
            activity['deptCode'] = info['deptCode']
        else:
            # 퇴사자
            activity['emailId'] = ''
            activity['positionName'] = ''
            activity['deptName'] = ''
            activity['deptCode'] = ''
    
    return json.dumps(activities, ensure_ascii=False)

# 업데이트 데이터 생성
update_data = []
for idx, row in df_docs.iterrows():
    drafter_info = get_drafter_info(row['drafter_name'])
    new_activities = process_activities(row['activities'])
    
    update_data.append({
        'id': row['id'],
        'drafter_email': drafter_info['email'],
        'drafter_position': drafter_info['position'],
        'drafter_dept': drafter_info['dept'],
        'drafter_dept_code': drafter_info['dept_code'],
        'activities': new_activities
    })

df_update = pd.DataFrame(update_data)
print(f"업데이트 대상: {len(df_update)}건")
print(df_update.head(10))

업데이트 대상: 23320건
   id drafter_email drafter_position drafter_dept drafter_dept_code  \
0   1          shko               상무       ITO사업팀              DB60   
1   2        master                                                   
2   3         shkim               선임        공공사업팀              DB10   
3   4        master                                                   
4   5        master                                                   
5   6        master                                                   
6   7        shshin               책임       IP서비스팀              GB30   
7   8        master                                                   
8   9        master                                                   
9  10        master                                                   

                                          activities  
0  [{"positionName": "상무", "deptName": "ITO사업팀", ...  
1  [{"positionName": "", "deptName": "", "actionL...  
2  [{"positionName": "선임", "deptName"

In [4]:
# ===========================================
# 4. 변경 내역 미리보기
# ===========================================
# 퇴사자 drafter 샘플
resigned_drafter = df_update[df_update['drafter_email'] == 'master']
print(f"퇴사자 drafter: {len(resigned_drafter)}건")
print(resigned_drafter.head(5))

print("\n" + "="*50 + "\n")

# 재직자 drafter 샘플
active_drafter = df_update[df_update['drafter_email'] != 'master']
print(f"재직자 drafter: {len(active_drafter)}건")
print(active_drafter.head(5))

퇴사자 drafter: 10919건
   id drafter_email drafter_position drafter_dept drafter_dept_code  \
1   2        master                                                   
3   4        master                                                   
4   5        master                                                   
5   6        master                                                   
7   8        master                                                   

                                          activities  
1  [{"positionName": "", "deptName": "", "actionL...  
3  [{"positionName": "", "deptName": "", "actionL...  
4  [{"positionName": "", "deptName": "", "actionL...  
5  [{"positionName": "", "deptName": "", "actionL...  
7  [{"positionName": "", "deptName": "", "actionL...  


재직자 drafter: 12401건
    id drafter_email drafter_position drafter_dept drafter_dept_code  \
0    1          shko               상무       ITO사업팀              DB60   
2    3         shkim               선임        공공사업팀       

In [5]:
# ===========================================
# 5. activities 변경 샘플 확인
# ===========================================
# 원본 vs 변경 비교
sample_id = df_docs.iloc[0]['id']
print(f"=== 문서 ID: {sample_id} ===\n")

print("[ 원본 activities ]")
original = df_docs[df_docs['id'] == sample_id]['activities'].values[0]
print(json.dumps(json.loads(original), ensure_ascii=False, indent=2))

print("\n[ 변경 activities ]")
updated = df_update[df_update['id'] == sample_id]['activities'].values[0]
print(json.dumps(json.loads(updated), ensure_ascii=False, indent=2))

=== 문서 ID: 1 ===

[ 원본 activities ]
[
  {
    "positionName": "상무",
    "deptName": "ITO사업팀",
    "actionLogType": "DRAFT",
    "name": "고상환",
    "emailId": "shko",
    "type": "DRAFT",
    "actionDate": 1609740886000,
    "deptCode": "DB60",
    "actionComment": "LGC 실험실행관리시스템 계약 품의입니다."
  },
  {
    "positionName": "이사",
    "deptName": "경영기획팀",
    "actionLogType": "APPROVAL",
    "name": "정주연",
    "emailId": "jyjung",
    "type": "APPROVAL",
    "actionDate": 1609747680000,
    "deptCode": "AB30",
    "actionComment": "승인합니다."
  },
  {
    "positionName": "",
    "deptName": "",
    "actionLogType": "APPROVAL",
    "name": "CEO",
    "emailId": "",
    "type": "APPROVAL",
    "actionDate": 1609747852000,
    "deptCode": "",
    "actionComment": "승인합니다"
  },
  {
    "positionName": "",
    "deptName": "",
    "actionLogType": "AGREEMENT",
    "name": "한승재",
    "emailId": "",
    "type": "AGREEMENT",
    "actionDate": 1609742126000,
    "deptCode": "",
    "actionComment": "합의합니다.

In [6]:
# ===========================================
# 5. 변경 샘플 확인 (drafter + activities)
# ===========================================
# 원본과 변경 비교
sample_idx = 0
sample_id = df_docs.iloc[sample_idx]['id']
sample_name = df_docs.iloc[sample_idx]['drafter_name']

print(f"=== 문서 ID: {sample_id}, drafter: {sample_name} ===\n")

# drafter 변경 확인
print("[ drafter 변경 ]")
updated_row = df_update[df_update['id'] == sample_id].iloc[0]
print(f"  drafter_email: {updated_row['drafter_email']}")
print(f"  drafter_position: {updated_row['drafter_position']}")
print(f"  drafter_dept: {updated_row['drafter_dept']}")
print(f"  drafter_dept_code: {updated_row['drafter_dept_code']}")
print(f"  (재직자 여부: {sample_name in org_dict})")

# activities 변경 확인
print("\n[ 원본 activities ]")
original = df_docs.iloc[sample_idx]['activities']
if original:
    print(json.dumps(json.loads(original), ensure_ascii=False, indent=2))

print("\n[ 변경 activities ]")
updated = updated_row['activities']
if updated:
    print(json.dumps(json.loads(updated), ensure_ascii=False, indent=2))

=== 문서 ID: 1, drafter: 고상환 ===

[ drafter 변경 ]
  drafter_email: shko
  drafter_position: 상무
  drafter_dept: ITO사업팀
  drafter_dept_code: DB60
  (재직자 여부: True)

[ 원본 activities ]
[
  {
    "positionName": "상무",
    "deptName": "ITO사업팀",
    "actionLogType": "DRAFT",
    "name": "고상환",
    "emailId": "shko",
    "type": "DRAFT",
    "actionDate": 1609740886000,
    "deptCode": "DB60",
    "actionComment": "LGC 실험실행관리시스템 계약 품의입니다."
  },
  {
    "positionName": "이사",
    "deptName": "경영기획팀",
    "actionLogType": "APPROVAL",
    "name": "정주연",
    "emailId": "jyjung",
    "type": "APPROVAL",
    "actionDate": 1609747680000,
    "deptCode": "AB30",
    "actionComment": "승인합니다."
  },
  {
    "positionName": "",
    "deptName": "",
    "actionLogType": "APPROVAL",
    "name": "CEO",
    "emailId": "",
    "type": "APPROVAL",
    "actionDate": 1609747852000,
    "deptCode": "",
    "actionComment": "승인합니다"
  },
  {
    "positionName": "",
    "deptName": "",
    "actionLogType": "AGREEMENT",
   

In [7]:
# ===========================================
# 5-2. 퇴사자/재직자 각각 샘플 확인
# ===========================================
# 퇴사자 샘플
print("=== 퇴사자 drafter 샘플 ===")
resigned_sample = df_update[df_update['drafter_email'] == 'master'].head(3)
for _, row in resigned_sample.iterrows():
    name = df_docs[df_docs['id'] == row['id']]['drafter_name'].values[0]
    print(f"ID: {row['id']}, 이름: {name}")
    print(f"  → email: {row['drafter_email']}, dept: '{row['drafter_dept']}', position: '{row['drafter_position']}'")

print("\n=== 재직자 drafter 샘플 ===")
active_sample = df_update[df_update['drafter_email'] != 'master'].head(3)
for _, row in active_sample.iterrows():
    name = df_docs[df_docs['id'] == row['id']]['drafter_name'].values[0]
    print(f"ID: {row['id']}, 이름: {name}")
    print(f"  → email: {row['drafter_email']}, dept: {row['drafter_dept']}, position: {row['drafter_position']}")

=== 퇴사자 drafter 샘플 ===
ID: 2, 이름: 한승재
  → email: master, dept: '', position: ''
ID: 4, 이름: 이필호
  → email: master, dept: '', position: ''
ID: 5, 이름: 한승재
  → email: master, dept: '', position: ''

=== 재직자 drafter 샘플 ===
ID: 1, 이름: 고상환
  → email: shko, dept: ITO사업팀, position: 상무
ID: 3, 이름: 김선홍
  → email: shkim, dept: 공공사업팀, position: 선임
ID: 7, 이름: 신성호
  → email: shshin, dept: IP서비스팀, position: 책임


In [8]:
# ===========================================
# 5-3. 업데이트 전 검증 테스트
# ===========================================

# 퇴사자 명단 로드
resigned_path = r"C:\Users\LEEJUHWAN\Desktop\애니파이브\백업\퇴사자명단.csv"
df_resigned = pd.read_csv(resigned_path, encoding='utf-8-sig')
resigned_set = set(df_resigned['퇴사자명'].str.strip())

print(f"퇴사자 명단: {len(resigned_set)}명")

퇴사자 명단: 335명


In [9]:
# ===========================================
# 검증 1: drafter_email='master'인데 다른 컬럼이 채워진 경우
# ===========================================
test1 = df_update[
    (df_update['drafter_email'] == 'master') & 
    ((df_update['drafter_dept'] != '') | 
     (df_update['drafter_position'] != '') | 
     (df_update['drafter_dept_code'] != ''))
]

print(f"검증1 - master인데 다른 컬럼 채워진 경우: {len(test1)}건")
if len(test1) > 0:
    print("⚠️ 문제 있음!")
    print(test1.head(10))
else:
    print("✅ 통과!")

검증1 - master인데 다른 컬럼 채워진 경우: 1건
⚠️ 문제 있음!
          id drafter_email drafter_position drafter_dept drafter_dept_code  \
22467  22469        master               사원         공통계정              AB90   

                                              activities  
22467  [{"positionName": "사원", "deptName": "공통계정", "a...  


In [10]:
# ===========================================
# 검증 2: 퇴사자인데 drafter_email이 'master'가 아닌 경우
# ===========================================
# df_docs에서 drafter_name 가져와서 df_update와 합치기
df_check = df_update.merge(df_docs[['id', 'drafter_name']], on='id')

# 퇴사자인데 master가 아닌 경우
test2 = df_check[
    (df_check['drafter_name'].isin(resigned_set)) & 
    (df_check['drafter_email'] != 'master')
]

print(f"검증2 - 퇴사자인데 master 아닌 경우: {len(test2)}건")
if len(test2) > 0:
    print("⚠️ 문제 있음!")
    print(test2[['id', 'drafter_name', 'drafter_email']].head(10))
else:
    print("✅ 통과!")

검증2 - 퇴사자인데 master 아닌 경우: 0건
✅ 통과!


In [11]:
# ===========================================
# 검증 3: 재직자인데 drafter_email이 'master'인 경우
# ===========================================
# 조직도에 있는데 master로 되어있으면 문제
org_names = set(org_dict.keys())

test3 = df_check[
    (df_check['drafter_name'].isin(org_names)) & 
    (df_check['drafter_email'] == 'master')
]

print(f"검증3 - 재직자인데 master인 경우: {len(test3)}건")
if len(test3) > 0:
    print("⚠️ 문제 있음!")
    print(test3[['id', 'drafter_name', 'drafter_email']].head(10))
else:
    print("✅ 통과!")

검증3 - 재직자인데 master인 경우: 1건
⚠️ 문제 있음!
          id drafter_name drafter_email
22467  22469          관리자        master


In [12]:
# ===========================================
# 검증 4: activities 내 퇴사자 규칙 확인
# ===========================================
error_activities = []

for idx, row in df_update.iterrows():
    if not row['activities']:
        continue
    
    try:
        activities = json.loads(row['activities'])
        for act in activities:
            name = act.get('name', '').strip()
            
            # 퇴사자인데 정보가 채워진 경우
            if name in resigned_set:
                if act.get('emailId') != '' or act.get('positionName') != '' or act.get('deptName') != '' or act.get('deptCode') != '':
                    error_activities.append({
                        'doc_id': row['id'],
                        'name': name,
                        'emailId': act.get('emailId'),
                        'positionName': act.get('positionName')
                    })
    except:
        continue

print(f"검증4 - activities 내 퇴사자 규칙 위반: {len(error_activities)}건")
if len(error_activities) > 0:
    print("⚠️ 문제 있음!")
    print(pd.DataFrame(error_activities).head(10))
else:
    print("✅ 통과!")

검증4 - activities 내 퇴사자 규칙 위반: 0건
✅ 통과!


In [None]:
# ===========================================
# 검증 요약
# ===========================================
print("=" * 50)
print("검증 요약")
print("=" * 50)
print(f"검증1 (master인데 다른 컬럼 채워짐): {len(test1)}건")
print(f"검증2 (퇴사자인데 master 아님): {len(test2)}건")
print(f"검증3 (재직자인데 master임): {len(test3)}건")
print(f"검증4 (activities 퇴사자 규칙 위반): {len(error_activities)}건")
print("=" * 50)

if len(test1) == 0 and len(test2) == 0 and len(test3) == 0 and len(error_activities) == 0:
    print("✅ 모든 검증 통과! 6번 셀 실행해도 됩니다.")
else:
    print("⚠️ 문제가 있습니다. 확인 후 진행하세요.")

In [13]:
# ===========================================
# 6. 실제 DB 업데이트 (확인 후 실행!)
# ===========================================
# ⚠️ 4, 5번 결과 확인 후 실행할 것!

conn = pymysql.connect(
    host='localhost',
    port=3306,
    user='root',
    password='1234',
    database='any_approval',
    charset='utf8mb4'
)
cursor = conn.cursor()

update_count = 0
error_count = 0

for idx, row in df_update.iterrows():
    try:
        query = """UPDATE documents 
        SET drafter_email = %s, 
            drafter_position = %s, 
            drafter_dept = %s, 
            drafter_dept_code = %s,
            activities = %s
        WHERE id = %s"""
        
        cursor.execute(query, (
            row['drafter_email'],
            row['drafter_position'],
            row['drafter_dept'],
            row['drafter_dept_code'],
            row['activities'],
            row['id']
        ))
        update_count += 1
        
        # 진행 상황 표시 (1000건마다)
        if update_count % 1000 == 0:
            print(f"진행 중... {update_count}/{len(df_update)}")
            
    except Exception as e:
        error_count += 1
        print(f"에러 - ID {row['id']}: {e}")

conn.commit()
cursor.close()
conn.close()

print(f"\n완료! 업데이트: {update_count}건, 에러: {error_count}건")

진행 중... 1000/23320
진행 중... 2000/23320
진행 중... 3000/23320
진행 중... 4000/23320
진행 중... 5000/23320
진행 중... 6000/23320
진행 중... 7000/23320
진행 중... 8000/23320
진행 중... 9000/23320
진행 중... 10000/23320
진행 중... 11000/23320
진행 중... 12000/23320
진행 중... 13000/23320
진행 중... 14000/23320
진행 중... 15000/23320
진행 중... 16000/23320
진행 중... 17000/23320
진행 중... 18000/23320
진행 중... 19000/23320
진행 중... 20000/23320
진행 중... 21000/23320
진행 중... 22000/23320
진행 중... 23000/23320

완료! 업데이트: 23320건, 에러: 0건
