# 영업 전환 여부 분류

> 영업사원이 효율적으로 움직일 수 있게 전환 여부 고객을 미리 예측하는 모델 만들기

이 페이지에는 다음과 같은 목록으로 진행한다.

  - Load Library(Module)
  - Load data
  - Data Transform



## 1. Load Library


In [1]:
# basic
import pandas as pd
import numpy as np
import os
import re
import pycountry
import pycountry_convert as pc
import tqdm
import time
import pandas as pd
import json
from pandas import json_normalize

# visualization
from matplotlib import pyplot as plt
import seaborn as sns
plt.rcParams['font.family'] = 'NanumGodic'

import googletrans
from googletrans import Translator

## 2. Load data

In [2]:
# load
train = pd.read_csv('train.csv')
test = pd.read_csv('submission.csv')

## 3. Data transform

### Customer Country

In [3]:
# 오타는 변환해야함
train['response_corporate'] = train['response_corporate'].replace('LGEBT', 'LGEPT')
test['response_corporate'] = test['response_corporate'].replace('LGEBT', 'LGEPT')

In [4]:
# 나라 이름 불러오기
countries = []

for country in pycountry.countries:
    countries.append(country.name)

In [5]:
countries

['Aruba',
 'Afghanistan',
 'Angola',
 'Anguilla',
 'Åland Islands',
 'Albania',
 'Andorra',
 'United Arab Emirates',
 'Argentina',
 'Armenia',
 'American Samoa',
 'Antarctica',
 'French Southern Territories',
 'Antigua and Barbuda',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Burundi',
 'Belgium',
 'Benin',
 'Bonaire, Sint Eustatius and Saba',
 'Burkina Faso',
 'Bangladesh',
 'Bulgaria',
 'Bahrain',
 'Bahamas',
 'Bosnia and Herzegovina',
 'Saint Barthélemy',
 'Belarus',
 'Belize',
 'Bermuda',
 'Bolivia, Plurinational State of',
 'Brazil',
 'Barbados',
 'Brunei Darussalam',
 'Bhutan',
 'Bouvet Island',
 'Botswana',
 'Central African Republic',
 'Canada',
 'Cocos (Keeling) Islands',
 'Switzerland',
 'Chile',
 'China',
 "Côte d'Ivoire",
 'Cameroon',
 'Congo, The Democratic Republic of the',
 'Congo',
 'Cook Islands',
 'Colombia',
 'Comoros',
 'Cabo Verde',
 'Costa Rica',
 'Cuba',
 'Curaçao',
 'Christmas Island',
 'Cayman Islands',
 'Cyprus',
 'Czechia',
 'Germany',
 'Djibouti',
 'Dominica'

In [6]:
# 나라 목록 확인과 대문자로 변환
cri = list(map(lambda x: x.upper(), countries))

In [7]:
# 결측치를 //로 대치
train['customer_country'][train['customer_country'].isna()] = '//'
test['customer_country'][test['customer_country'].isna()] = '//'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['customer_country'][train['customer_country'].isna()] = '//'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['customer_country'][test['customer_country'].isna()] = '//'


In [8]:
# //를 /로 대치하고 맨 뒤에있는거 그냥 부름
train['country'] = train['customer_country'].str.replace('//', '/').str.split('/').map(lambda x: x[-1]).str.strip().str.upper()
test['country'] = test['customer_country'].str.replace('//', '/').str.split('/').map(lambda x: x[-1]).str.strip().str.upper()

In [9]:
# 기준 나라에 있는 이름이면 이름 불러오고 아니면 ''불러오기
train['customer_country2'] = np.where(train['country'].isin(cri), train['country'], '')
test['customer_country2'] = np.where(test['country'].isin(cri), test['country'], '')

In [10]:
# 나라 이름 바뀐 경우 바꿔서 대입해줘야 함
before = [
    train['customer_country2'] == 'DEMOCRATIC REPUBLIC OF THE CONGO',
    train['customer_country2'] == 'U.A.E',
    train['customer_country2'] == 'IVORY COAST',
    train['customer_country2'] == "COTE D'IVOIRE",
    train['customer_country2'] == 'IRAN',
    train['customer_country2'] == 'TURKEY',
    train['customer_country2'] == 'SWAZILAND',
    train['customer_country2'] == 'UNITED REPUBLIC OF TANZANIA',
    train['customer_country2'] == 'SOUTH KOREA',
    train['customer_country2'] == 'RUSSIA',
    train['customer_country2'] == 'BRUNEI',
    train['customer_country2'] == 'TAIWAN',
    train['customer_country2'] == 'VIETNAM',
    train['customer_country2'] == 'CZECH',
    train['customer_country2'] == 'VENEZUELA',
    train['customer_country2'] == 'ST KITTS',
    train['customer_country2'] == 'ANTIGUA',
    train['customer_country2'] == 'ST MAARTEN',
    train['customer_country2'] == 'NETHERLANDS ANTILLES',
    train['customer_country2'] == 'BOLIVIA',
    train['customer_country2'] == 'SYRIA',
    train['customer_country2'] == 'MACEDONIA',
    train['customer_country2'] == 'PALESTINE'
]

after = [
    'CONGO',
    'UNITED ARAB EMIRATES',
    "CÔTE D'IVOIRE",
    "CÔTE D'IVOIRE",
    'IRAN, ISLAMIC REPUBLIC OF',
    'TURKEY',
    'ESWATINI',
    'TANZANIA, UNITED REPUBLIC OF',
    "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF",
    'RUSSIA',
    'BRUNEI DARUSSALAM',
    'TAIWAN, PROVINCE OF CHINA',
    'VIETNAM',
    'CZECHIA',
    'VENEZUELA, BOLIVARIAN REPUBLIC OF',
    'SAINT KITTS AND NEVIS',
    'ANTIGUA AND BARBUDA',
    'SINT MAARTEN (DUTCH PART)',
    'NETHERLANDS',
    'BOLIVIA, PLURINATIONAL STATE OF',
    'SYRIAN ARAB REPUBLIC',
    'NORTH MACEDONIA',
    'PALESTINE, STATE OF'
]

train['customer_country3'] = np.select(before, after, default=train['customer_country2'])

In [11]:
# 나라 이름 바뀐 경우 바꿔서 대입해줘야 함
before = [
    test['customer_country2'] == 'DEMOCRATIC REPUBLIC OF THE CONGO',
    test['customer_country2'] == 'U.A.E',
    test['customer_country2'] == 'IVORY COAST',
    test['customer_country2'] == "COTE D'IVOIRE",
    test['customer_country2'] == 'IRAN',
    test['customer_country2'] == 'TURKEY',
    test['customer_country2'] == 'SWAZILAND',
    test['customer_country2'] == 'UNITED REPUBLIC OF TANZANIA',
    test['customer_country2'] == 'SOUTH KOREA',
    test['customer_country2'] == 'RUSSIA',
    test['customer_country2'] == 'BRUNEI',
    test['customer_country2'] == 'TAIWAN',
    test['customer_country2'] == 'VIETNAM',
    test['customer_country2'] == 'CZECH',
    test['customer_country2'] == 'VENEZUELA',
    test['customer_country2'] == 'ST KITTS',
    test['customer_country2'] == 'ANTIGUA',
    test['customer_country2'] == 'ST MAARTEN',
    test['customer_country2'] == 'NETHERLANDS ANTILLES',
    test['customer_country2'] == 'BOLIVIA',
    test['customer_country2'] == 'SYRIA',
    test['customer_country2'] == 'MACEDONIA',
    test['customer_country2'] == 'PALESTINE'
]

after = [
    'CONGO',
    'UNITED ARAB EMIRATES',
    "CÔTE D'IVOIRE",
    "CÔTE D'IVOIRE",
    'IRAN, ISLAMIC REPUBLIC OF',
    'TÜRKIYE',
    'ESWATINI',
    'TANZANIA, UNITED REPUBLIC OF',
    "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF",
    'RUSSIA',
    'BRUNEI DARUSSALAM',
    'TAIWAN, PROVINCE OF CHINA',
    'VIETNAM',
    'CZECHIA',
    'VENEZUELA, BOLIVARIAN REPUBLIC OF',
    'SAINT KITTS AND NEVIS',
    'ANTIGUA AND BARBUDA',
    'SINT MAARTEN (DUTCH PART)',
    'NETHERLANDS',
    'BOLIVIA, PLURINATIONAL STATE OF',
    'SYRIAN ARAB REPUBLIC',
    'NORTH MACEDONIA',
    'PALESTINE, STATE OF'
]

test['customer_country3'] = np.select(before, after, default=test['customer_country2'])

In [12]:
# 나라의 딕셔너리 구성
country_dict = {"LGERA": "Russia",
"LGEUR": "Ukraine",
"LGEAP": "Australia",
"LGECH": "China",
"LGEHK": "Hong Kong",
"LGEIL": "India",
"LGEIN": "Indonesia",
"LGEJP": "Japan",
"LGEKR": "Korea",
"LGEML": "Malaysia",
"LGEPH": "Philippines",
"LGESL": "Singapore",
"LGETT": "Taiwan, Province of China",
"LGETH": "Thailand",
"LGEAR": "Argentina",
"LGECZ": "Czechia",
"LGEFS": "France",
"LGEDG": "Germany",
"LGEAG": "Austria",
"LGEHS": "Greece",
"LGEMK": "Hungary",
"LGEIS": "Italy",
"LGEBN": "Netherlands",
"LGEPL": "Poland",
"LGEPT": "Portugal",
"LGERO": "Romania",
"LGEES": "Spain",
"LGESW": "Sweden",
"LGEUK": "United Kingdom",
"LGEAS": "Algeria",
"LGEEG": "Egypt",
"LGELF": "Jordan",
"LGEMC": "Morocco",
"LGESA": "The Republic of South Africa",
"LGEGF": "United Arab Emirates",
"LGEAF": "United Arab Emirates",
"LGETK": "Turkiye",
"LGECI": "Canada",
"LGEMX": "Mexico",
"LGEMS": "Mexico",
"LGEUS": "United States",
"LGECL": "Chile",
"LGECB": "Colombia",
"LGEPS": "Panama",
"LGEPR": "Peru",
"LGESJ": "Saudi Arabia",
"LGESP": "Brazil",
"LGEEF": "Kenya",
"LGEYK": "Palestine, State of",
"LGEEB": "en_EU", 
"LGEVH": "Viet Nam",
"LGELA": "Latvia",
"LGEIR": "Iran, Islamic Republic of",
"LGEBT": "Portugal"
               }

In [13]:
# 나라 이름 대입
ccc = pd.DataFrame(countries, cri).reset_index()
ccc.columns = ['upper', 'countries']

In [14]:
ccc

Unnamed: 0,upper,countries
0,ARUBA,Aruba
1,AFGHANISTAN,Afghanistan
2,ANGOLA,Angola
3,ANGUILLA,Anguilla
4,ÅLAND ISLANDS,Åland Islands
...,...,...
244,SAMOA,Samoa
245,YEMEN,Yemen
246,SOUTH AFRICA,South Africa
247,ZAMBIA,Zambia


In [15]:
# 대문자로 전부 만든거 다시 되돌리기
train2 = pd.merge(train, ccc, how='left', left_on='customer_country3', right_on='upper')
train = train2
test2 = pd.merge(test, ccc, how='left', left_on='customer_country3', right_on='upper')
test = test2

In [16]:
# 지사에 맞게 값을 넣어주기
train['customer_country4'] = np.where(train['countries'].isna(),
                                         train['response_corporate'].apply(lambda x: country_dict[x]),
                                         train['countries'])
test['customer_country4'] = np.where(test['countries'].isna(),
                                         test['response_corporate'].apply(lambda x: country_dict[x]),
                                         test['countries'])

In [17]:
# 나라 대입해서 바꿔놓기
train['customer_country'] = train['customer_country4']
test['customer_country'] = test['customer_country4']

In [18]:
# 생성한 변수 전부 삭제
train.drop(['country', 'customer_country2', 'customer_country3', 'upper', 'countries',
           'customer_country4', 'customer_country.1'], axis = 1, inplace = True)
test.drop(['country', 'customer_country2', 'customer_country3', 'upper', 'countries',
          'customer_country4', 'customer_country.1'], axis = 1, inplace = True)

### Continent

In [19]:
# 반영이 안되는 값들 반영 되는 값으로 변경
train['customer_country'][train['customer_country'] == 'Turkiye'] = 'Türkiye'
train['customer_country'][train['customer_country'] == 'The Republic of South Africa'] = 'South Africa'
train['customer_country'][train['customer_country'] == 'Korea'] = "Korea, Democratic People's Republic of"

# 함수 만들기
def country_to_continent(country_name):
    country_alpha2 = pc.country_name_to_country_alpha2(country_name)
    country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    return country_continent_name

# 있으면 대륙이름 없으면 나라이름 대입하기
cont = []
for i in train['customer_country']:
    try:
        cont.append(country_to_continent(i))
    except:
        cont.append(i)

# test에서도 똑같이
cont2 = []
for i in test['customer_country']:
    try:
        cont2.append(country_to_continent(i))
    except:
        cont2.append(i)

# 칼럼 만들기
train['continent'] = cont
test['continent'] = cont2

# EU는 유럽
train['continent'][train['continent'] == 'en_EU'] = 'Europe'
test['continent'][test['continent'] == 'en_EU'] = 'Europe'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['customer_country'][train['customer_country'] == 'Turkiye'] = 'Türkiye'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['customer_country'][train['customer_country'] == 'The Republic of South Africa'] = 'South Africa'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['customer_country'][train['customer_country'] == 'Korea'] = "Korea, Democratic People's Republic of"
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

In [20]:
train.columns

Index(['bant_submit', 'customer_country', 'business_unit',
       'com_reg_ver_win_rate', 'customer_idx', 'customer_type', 'enterprise',
       'historical_existing_cnt', 'id_strategic_ver', 'it_strategic_ver',
       'idit_strategic_ver', 'customer_job', 'lead_desc_length',
       'inquiry_type', 'product_category', 'product_subcategory',
       'product_modelname', 'customer_position', 'response_corporate',
       'expected_timeline', 'ver_cus', 'ver_pro', 'ver_win_rate_x',
       'ver_win_ratio_per_bu', 'business_area', 'business_subarea',
       'lead_owner', 'is_converted', 'continent'],
      dtype='object')

### Expected Timeline

In [21]:
# expected_timeline
train['expected_timeline2'] = train['expected_timeline'].str.replace('-', '~').str.replace('_', ' ').str.lower()
test['expected_timeline2'] = test['expected_timeline'].str.replace('-', '~').str.replace('_', ' ').str.lower()

In [22]:
con = [
    train['expected_timeline2'].isna(),
    train['expected_timeline2'].str.contains('less than 3 months', na = False),
    train['expected_timeline2'].str.contains('3 months ~ 6 months', na = False),
    train['expected_timeline2'].str.contains('6 months ~ 9 months', na = False),
    train['expected_timeline2'].str.contains('9 months ~ 1 year', na = False),
    train['expected_timeline2'].str.contains('more than a year', na = False)
]
cri = [
    'no_answer',
    'less_than_3_months',
    '3_months_6_months',
    '6_months_9_months',
    '9_months_1_year',
    'more_than_a_year'
]
train['et'] = np.select(con, cri, default = 'others')

In [23]:
con2 = [
    test['expected_timeline2'].isna(),
    test['expected_timeline2'].str.contains('less than 3 months', na = False),
    test['expected_timeline2'].str.contains('3 months ~ 6 months', na = False),
    test['expected_timeline2'].str.contains('6 months ~ 9 months', na = False),
    test['expected_timeline2'].str.contains('9 months ~ 1 year', na = False),
    test['expected_timeline2'].str.contains('more than a year', na = False)
]
cri2 = [
    'no_answer',
    'less_than_3_months',
    '3_months_6_months',
    '6_months_9_months',
    '9_months_1_year',
    'more_than_a_year'
]
test['et'] = np.select(con2, cri2, default = 'others')

### Inquiry Type

In [24]:
train['inquiry_type'].unique()

array(['Quotation or purchase consultation', 'Product Information',
       'Quotation or Purchase Consultation', 'Other',
       'Usage or technical consultation', 'Trainings', 'Services',
       'Sales Inquiry', 'Etc.', 'Technical Support',
       'Usage or Technical Consultation', 'Technical Consultation',
       'Request for Partnership', nan, 'sales', 'technical',
       'usage or technical consultation',
       'usage_or_technical_consultation', 'other',
       'quotation_or_purchase_consultation', 'other_', 'Request a Demo',
       'Request for Distributorship', 'Request for quotation or purchase',
       'Request for technical consulting', '(Select ID_Needs)',
       'One Quick:Flex', 'AIO', 'Needs', 'Purchase',
       'technical_consultation', 'Customer Suggestions', 'Event Inquiry',
       'Others', 'OEM/ODM Request', 'Hospital TV', 'others',
       'i want to know the details about it', 'EDUCATIONAL EQUIPMENTS',
       'Digital platform', 'TV interactive', 'teach',
       'Di

In [25]:
# iqt2
train['inquiry_type2'] = train['inquiry_type'].str.replace('_', ' ').str.lower()
test['inquiry_type2'] = test['inquiry_type'].str.replace('_', ' ').str.lower()

In [26]:
# inquiry_type case
inquiry_type = [
        "Quotation or Purchase Consultation",
        "Request a Demo",
        "OEM/ODM Request",
        "Usage or Technical Consultation",
        "Request for Partnership",
        "Customer Suggestions",
        "Others",
        "Enquire to purchase and quotation",
        "Request from installer",
        "Request support for an existing LG installation",
        "Purchasing For Business Use",
        "Purchasing For Personal Use",
        "Request for Information"
]
inquiry_type_lower = list(map(lambda x: x.lower(), inquiry_type))

In [27]:
con = [
    train['inquiry_type2'].isna(),
    train['inquiry_type2'].isin(inquiry_type_lower)
]
cri = [
    'no_answer',
    train['inquiry_type2']
]
train['inqtype'] = np.select(con, cri, default = 'others')

In [28]:
con2 = [
    test['inquiry_type2'].isna(),
    test['inquiry_type2'].isin(inquiry_type_lower)
]
cri2 = [
    'no_answer',
    test['inquiry_type2']
]
test['inqtype'] = np.select(con2, cri2, default = 'others')

In [29]:
train.isna().sum()

bant_submit                    0
customer_country               0
business_unit                  0
com_reg_ver_win_rate       44731
customer_idx                   0
customer_type              43961
enterprise                     0
historical_existing_cnt    45543
id_strategic_ver           55855
it_strategic_ver           58178
idit_strategic_ver         54734
customer_job               18733
lead_desc_length               0
inquiry_type                 941
product_category           19374
product_subcategory        50064
product_modelname          50070
customer_position              0
response_corporate             0
expected_timeline          30863
ver_cus                        0
ver_pro                        0
ver_win_rate_x             40882
ver_win_ratio_per_bu       43995
business_area              40882
business_subarea           53773
lead_owner                     0
is_converted                   0
continent                      0
expected_timeline2         30863
et        

In [30]:
# 변수 재 대입 하기
train['inquiry_type'] = train['inqtype']
train['expected_timeline'] = train['et']

test['inquiry_type'] = test['inqtype']
test['expected_timeline'] = test['et']

In [31]:
# 생성한 변수 전부 삭제
train.drop(['expected_timeline2', 'et', 'inquiry_type2', 'inqtype'], axis = 1, inplace = True)
test.drop(['expected_timeline2', 'et', 'inquiry_type2', 'inqtype'], axis = 1, inplace = True)

### Product_category

In [32]:
# split할 이름 변형하여 split 적용
train['pc'] = train['product_category'].str.replace('/', ',').str.split(',')

In [33]:
# train2에서 pc를 반영하고 제거
train2 = train.explode('pc')
train2['product_category'] = train2['pc']
train2.drop('pc', axis = 1, inplace = True)

# 다시 train으로 변형
train = train2

In [34]:
# 목록
product_category = [
    "Reciprocating Compressor",
    "Rotary Compressor",
    "Scroll Compressor",
    "Motor",
    "Projector",
    "Monitor",
    "All Medical Displays",
    "Clinical Review Monitors",
    "Diagnostic Monitors",
    "Surgical Monitors",
    "Digital X-ray Detectors",
    "Laptop",
    "All Cloud Devices",
    "Thin Clients",
    "Zero Clients",
    "OLED Signage",
    "LED Signage",
    "Video Wall Signage",
    "Interactive Signage",
    "High Brightness Signage",
    "Special Signage",
    "Standard Signage",
    "Hotel TV",
    "Hospital TV",
    "Accessories",
    "Software Solution",
    "Signage Care Solution",
    "WebOS",
    "Pro:Centric",
    "One:Quick Series",
    "Ventilation",
    "VRF",
    "Multi-Split",
    "Single-Split",
    "Chiller",
    "Heating",
    "Energy Storage System",
    "Others"
]
product_category_lower = list(map(lambda x: x.lower(), product_category))

In [35]:
# product 카테고리 - 서브카테고리
data = """
{
    "product_subcategory": {
        "OLED Signage": [
            "Curvable OLED Signage",
            "Curvature Calibrator",
            "Flat OLED Signage",
            "OLED Pro Monitor",
            "Transparent OLED Signage",
            "UltraFine Display OLED Pro",
            "Flexible Curved Open Frame OLED Signage",
            "Transparent OLED Touch Signage",
            "Wallpaper OLED Signage"
        ],
        "LED Signage": [
            "Compact Series",
            "Curved Series",
            "Essential Series",
            "Fine-pitch Essential Series",
            "Indoor Versatile Series",
            "LED Cinema",
            "LG LED All-in-One Essential Series",
            "LG LED All-in-One Premium Series",
            "LG LED All-in-One Smart Series",
            "LG LED Bloc",
            "LG LED Curve Series",
            "LG MAGNIT",
            "Outdoor Versatile Series",
            "Premium Series",
            "Premium Slim Series",
            "Stadium Series (Perimeter)",
            "Stadium Series (Ribbon Board)",
            "Transparent LED Film",
            "Ultra Light Series",
            "Ultra Slim Series",
            "Digital Floor Series",
            "High Brightness Series"
        ],
        "Video Wall Signage": [
            "High Brightness Video Wall",
            "LV35A Series",
            "SVH7E Series",
            "SVH7F Series",
            "SVH7PF Series",
            "SVM5F Series",
            "Ultra Narrow Bezel Video Wall",
            "VH7B Series",
            "VH7E Series",
            "VH7E-H Series",
            "VL5D Series",
            "VL5F Series",
            "VL5PF Series",
            "VL7F Series",
            "VM5E Series",
            "49\\" 500 nits FHD Slim Bezel Video Wall",
            "55\\" 700 nits FHD 0.44mm Even Bezel Video Wall",
            "55\\" 500 nits FHD Slim Bezel Video Wall",
            "55\\" 700 nits FHD Slim Bezel Video Wall"
        ],
        "Interactive Signage": [
            "Built-in Touch Display",
            "Built-in Touch Signage",
            "Interactive Digital Board",
            "TA3E Series",
            "TN3F Series",
            "Touch Overlay Kit",
            "TR3BF Series",
            "TR3BG Series",
            "TR3DJ Series",
            "Touch Open Frame",
            "UHD IR-type Touch CreateBoard"
        ],
        "High Brightness Signage": [
            "Open Frame",
            "Outdoor Display",
            "Window Facing Display"
        ],
        "Special Signage": [
            "Ultra Stretch Signage",
            "LG Thermal Sensing Terminal"
        ],
        "Standard Signage": [
            "LT340C Series",
            "SE3KE Series",
            "SL5B Series",
            "SM3G Series",
            "SM5KE Series",
            "UH5F Series",
            "UH5F-H Series",
            "UH7F Series",
            "UH7F-H Series",
            "UL3G Series",
            "UM3DG Series",
            "UM3DG-H Series",
            "UT640S Series",
            "Full HD Standard Signage",
            "WebOS UHD Signage",
            "UHD TV Signage",
            "UHD Large Screen Signage Display",
            "New High Haze UHD Standard Signage",
            "High Haze UHD Signage for Hospitals"
        ],
        "Hotel TV": [
            "ES961H Series",
            "ET961H Series",
            "LS341H",
            "LT330H Series",
            "LT340H Series",
            "LT341H Series",
            "LT560H Series",
            "LT570H Series",
            "LT660H Series",
            "LT661H Series",
            "US342H Series",
            "US660H Series",
            "US662H Series",
            "US665H Series",
            "US670H Series",
            "US760H Series",
            "US761H Series",
            "US762H Series",
            "US765H Series",
            "US770H Series",
            "UT340H Series",
            "UT347H Series",
            "UT567H Series",
            "UT570H Series",
            "UT577H Series",
            "UT660H Series",
            "UT665H Series",
            "UT670H Series",
            "UT770H Series",
            "UT781H Series",
            "UT782H Series",
            "WS960H Series",
            "4K UHD Hospitality TV with Pro:Centric Direct",
            "Essential Commercial TV with NanoCell Display",
            "Essential Commercial TV with 4K Active HDR"
        ],
        "Hospital TV": [
            "LT572M Series",
            "LT662M series",
            "LU766A Series",
            "US772M Series",
            "UT672M series",
            "UT662M Series",
            "NanoCell Hospital TV",
            "Smart Touch Screen TV"
        ],
        "Accessories": [
            "Pro:Centric SMART Set Top Box",
            "webOS Box",
            "Floor Stand for One:Quick Flex"
        ],
        "Software Solution": [
            "SuperSign CMS",
            "SuperSign Control & Control+",
            "SuperSign WB",
            "LG Simple Editor",
            "SuperSign Simple Editor",
            "SuperSign Media Editor"
        ],
        "Signage Care Solution": [
            "LG ExtendedCare",
            "LG ConnectedCare",
            "Total Care Solution"
        ],
        "WebOS": [],
        "Pro:Centric": [
            "Pro:Centric Smart",
            "Pro:Centric Direct",
            "Pro:Centric Value",
            "Hotel Quick Menu"
        ],
        "One:Quick Series": [
            "One:Quick Share",
            "One:Quick Works",
            "One:Quick Flex"
        ]
    }
}
    """

# JSON 문자열을 Python 객체로 변환
parsed_data = json.loads(data)

# 키와 값의 쌍을 행으로 가지는 리스트 생성
rows = []
for category, products in parsed_data['product_subcategory'].items():
    for product in products:
        rows.append({'product_category': category, 'product_subcategory': product})

# 리스트를 DataFrame으로 변환
df = pd.DataFrame(rows)

# 데이터프레임 출력
print(df)

     product_category       product_subcategory
0        OLED Signage     Curvable OLED Signage
1        OLED Signage      Curvature Calibrator
2        OLED Signage         Flat OLED Signage
3        OLED Signage          OLED Pro Monitor
4        OLED Signage  Transparent OLED Signage
..                ...                       ...
142       Pro:Centric         Pro:Centric Value
143       Pro:Centric          Hotel Quick Menu
144  One:Quick Series           One:Quick Share
145  One:Quick Series           One:Quick Works
146  One:Quick Series            One:Quick Flex

[147 rows x 2 columns]


In [36]:
# product 서브카테고리 - 모델이름
data2 = """
{
    "product_modelname": {
        "Curvable OLED Signage": [
            "55EF5F-L",
            "55EF5F-P",
            "55EF5G-L",
            "55EF5G-P"
        ],
        "Curvature Calibrator": [
            "ACC-CC-EF5E"
        ],
        "Flat OLED Signage": [
            "55EG5CE",
            "55EJ5E-B",
            "55EJ5G",
            "65EJ5E-B",
            "65EV5E"
        ],
        "OLED Pro Monitor": [
            "65EP5G"
        ],
        "Transparent OLED Signage": [
            "55EW5F-A",
            "55EW5G-A",
            "55EW5TF-A",
            "55EW5G-V",
            "55EW5PG-S"
        ],
        "UltraFine Display OLED Pro": [
            "65EP5G"
        ],
        "Flexible Curved Open Frame OLED Signage": [
            "55EF5K-P",
            "55EF5K-L"
        ],
        "Transparent OLED Touch Signage": [
            "55EW5TK-A"
        ],
        "Wallpaper OLED Signage": [
            "55EJ5K"
        ],
        "Compact Series": [
            "LAS025DB9-V",
            "LSBA039",
            "LSBA025",
            "LAS039DB9-V",
            "LSBC019",
            "LSBC026",
            "LSBC029",
            "LSBC039"
        ],
        "Curved Series": [
            "LAC039DD4",
            "LAC025DD4",
            "LAC039DD3",
            "LAC029DD3",
            "LAC025DD3",
            "LAC029DD4"
        ],
        "Essential Series": [
            "GSCC066",
            "GSCC080",
            "GSCC160"
        ],
        "Fine-pitch Essential Series": [
            "LAS025-F",
            "LAS018-F",
            "LAS012-F",
            "LAS014-F",
            "LAS015-F",
            "LAS009-F",
            "LSBB009",
            "LSBB012",
            "LSBB015",
            "LSBB018"
        ],
        "Indoor Versatile Series": [
            "LSCA039",
            "LSCA029"
        ],
        "LED Cinema": [
            "LAD033F"
        ],
        "LG LED All-in-One Essential Series": [
            "LAEB015"
        ],
        "LG LED All-in-One Premium Series": [
            "LAA015F"
        ],
        "LG LED All-in-One Smart Series": [
            "LAEC015"
        ],
        "LG LED Bloc": [
            "LSAA012",
            "LSAC025",
            "LSAA015"
        ],
        "LG LED Curve Series": [
            "LAP025EP",
            "LAP020EP",
            "LAP015E",
            "LAP025E",
            "LAP020E",
            "LAP015EP"
        ],
        "LG MAGNIT": [
            "LSAB009",
            "LSAB012"
        ],
        "Outdoor Versatile Series": [
            "GSCA046",
            "GSCA039"
        ],
        "Premium Series": [
            "LBS060DA4D",
            "LBS080DA4D",
            "LBS120DA4D",
            "LBS100DA4D",
            "LBS160DA4D"
        ],
        "Premium Slim Series": [
            "LBS100DA4-V",
            "LBS062DA4-V",
            "LBS083DA4-V"
        ],
        "Stadium Series (Perimeter)": [
            "LBB100DD3",
            "LBB160DA4D2"
        ],
        "Stadium Series (Ribbon Board)": [
            "LBF160DA4D"
        ],
        "Transparent LED Film": [
            "LAT140",
            "LAT240"
        ],
        "Ultra Light Series": [
            "GSCD100",
            "GSCD069"
        ],
        "Ultra Slim Series": [
            "LSCB025",
            "LSCB018",
            "LSCB015"
        ],
        "Digital Floor Series": [
            "LFCG039"
        ],
        "High Brightness Series": [
            "LWBC026",
            "LWBC029",
            "LWBC039"
        ],
        "High Brightness Video Wall": [
            "55VX1D"
        ],
        "LV35A Series": [
            "55LV35A"
        ],
        "SVH7E Series": [
            "55SVH7E"
        ],
        "SVH7F Series": [
            "55SVH7F-A"
        ],
        "SVH7PF Series": [
            "55SVH7PF-H"
        ],
        "SVM5F Series": [
            "55SVM5F-H"
        ],
        "Ultra Narrow Bezel Video Wall": [
            "55LV77D",
            "55LV75D"
        ],
        "VH7B Series": [
            "55VH7B"
        ],
        "VH7E Series": [
            "55VH7E-A",
            "49VH7E-A"
        ],
        "VH7E-H Series": [
            "55VH7E-H"
        ],
        "VL5D Series": [
            "49VL5D"
        ],
        "VL5F Series": [
            "55VL5F-A",
            "49VL5F-A"
        ],
        "VL5PF Series": [
            "49VL5PF"
        ],
        "VL7F Series": [
            "55VL7F-A",
            "49VL7F-A"
        ],
        "VM5E Series": [
            "49VM5E-A",
            "55VM5E-A"
        ],
        "49\\" 500 nits FHD Slim Bezel Video Wall": [
            "49VL5G",
            "49VL5G-M",
            "49VL5PJ"
        ],
        "55\\" 700 nits FHD 0.44mm Even Bezel Video Wall": [
            "55VSH7J",
            "55VSM5J"
        ],
        "55\\" 500 nits FHD Slim Bezel Video Wall": [
            "55VM5J-H",
            "55VL5PJ"
        ],
        "55\\" 700 nits FHD Slim Bezel Video Wall": [
            "55VH7J-H"
        ],
        "Built-in Touch Display": [
            "55TC3CD"
        ],
        "Built-in Touch Signage": [
            "55TC3CG-H"
        ],
        "Interactive Digital Board": [
            "55TC3D",
            "75TC3D",
            "86TR3E"
        ],
        "TA3E Series": [
            "32TA3E",
            "43TA3E",
            "49TA3E",
            "55TA3E"
        ],
        "TN3F Series": [
            "86TN3F-B"
        ],
        "Touch Overlay Kit": [
            "KT-T32E",
            "KT-T75E",
            "KT-T65E",
            "KT-T55E",
            "KT-T49E",
            "KT-T43E"
        ],
        "TR3BF Series": [
            "86TR3BF-B",
            "75TR3BF-B",
            "65TR3BF-B"
        ],
        "TR3BG Series": [
            "65TR3BG-B",
            "55TR3BG-B"
        ],
        "TR3DJ Series": [
            "86TR3DJ",
            "75TR3DJ",
            "65TR3DJ"
        ],
        "Touch Open Frame": [
            "55TNF5J",
            "43TNF5J",
            "32TNF5J",
            "27TNF3K"
        ],
        "UHD IR-type Touch CreateBoard": [
            "86TR3PJ-B",
            "75TR3PJ-B",
            "65TR3PJ-B"
        ],
        "Open Frame": [
            "75XF3C-B",
            "75XF3ES-B",
            "49XF3E-B",
            "55XF3E-B"
        ],
        "Outdoor Display": [
            "49XE4F-M",
            "55XE4F-M",
            "75XE3C",
            "86XE3FS-B",
            "22XE1J-B"
        ],
        "Window Facing Display": [
            "49XS2E",
            "49XS4F-B",
            "55XS4F-B",
            "75XS2E",
            "55XS2E",
            "75XS4G",
            "55XS4J-B",
            "49XS4J-B"
        ],
        "Ultra Stretch Signage": [
            "88BH7F-B",
            "86BH5F-M",
            "88BH7G"
        ],
        "LG Thermal Sensing Terminal": [
            "8KC5PJ",
            "29KC5P2J-WG"
        ],
        "LT340C Series": [
            "43LT340C (EU)",
            "28LT340C (CIS)",
            "28LT340C (EU)",
            "32LT340C (CIS)",
            "43LT340C (CIS)",
            "49LT340C (CIS)",
            "32LT340C (EU)",
            "49LT340C (EU)",
            "32LT340C (ASIA)",
            "43LT340C (ASIA)",
            "49LT340C (NA)",
            "43LT340C (NA)",
            "32LT340C (NA)",
            "49LT340C (ASIA)"
        ],
        "SE3KE Series": [
            "43SE3KE-B"
        ],
        "SL5B Series": [
            "43SL5B-B"
        ],
        "SM3G Series": [
            "22SM3G-B"
        ],
        "SM5KE Series": [
            "43SM5KE-B"
        ],
        "UH5F Series": [
            "98UH5F-B",
            "43UH5F-B",
            "65UH5F-B",
            "55UH5F-B",
            "49UH5F-B"
        ],
        "UH5F-H Series": [
            "86UH5F-H",
            "98UH5F-H",
            "75UH5F-H",
            "43UH5F-H",
            "49UH5F-H",
            "55UH5F-H",
            "65UH5F-H"
        ],
        "UH7F Series": [
            "65UH7F-B",
            "49UH7F-B",
            "55UH7F-B"
        ],
        "UH7F-H Series": [
            "49UH7F-H",
            "55UH7F-H"
        ],
        "UL3G Series": [
            "86UL3G-B",
            "50UL3G-B",
            "55UL3G-B",
            "43UL3G-B",
            "65UL3G-B",
            "75UL3G-B"
        ],
        "UM3DG Series": [
            "49UM3DG-B",
            "55UM3DG-B",
            "65UM3DG-B",
            "43UM3DG-B"
        ],
        "UM3DG-H Series": [
            "98UM3DG-H",
            "86UM3DG-H",
            "75UM3DG-H"
        ],
        "UT640S Series": [
            "86UT640S (ASIA)",
            "43UT640S (ASIA)",
            "86UT640S (NA)",
            "75UT640S (NA)",
            "65UT640S (NA)",
            "55UT640S (NA)",
            "49UT640S (NA)",
            "49UT640S (ASIA)",
            "55UT640S (ASIA)",
            "65UT640S (CIS)",
            "75UT640S (CIS)",
            "65UT640S (EU)",
            "75UT640S (EU)",
            "75UT640S (ASIA)",
            "70UT640S (ASIA)",
            "65UT640S (ASIA)",
            "55UT640S (Colombia)",
            "43UT640S (NA)",
            "65UT640S (SCA)",
            "55UT640S (SCA)",
            "43UT640S (EU)",
            "49UT640S (EU)",
            "55UT640S (EU)",
            "60UT640S (EU)",
            "70UT640S (EU)",
            "43UT640S (Colombia)",
            "49UT640S (Colombia)",
            "75UT640S (Colombia)",
            "70UT640S (CIS)",
            "60UT640S (CIS)",
            "49UT640S (SCA)",
            "43UT640S (SCA)",
            "55UT640S (MEA)",
            "65UT640S (MEA)",
            "75UT640S (MEA)",
            "43UT640S (CIS)",
            "49UT640S (CIS)",
            "55UT640S (CIS)",
            "65UT640S (Colombia)"
        ],
        "Full HD Standard Signage": [
            "32SM5J-B"
        ],
        "WebOS UHD Signage": [
            "55UL3J-E",
            "65UL4J-E",
            "75UL5J-E",
            "43UL3J-B",
            "55UL3J-B",
            "65UL3J-B",
            "75UL3J-B",
            "86UL3J-B"
        ],
        "UHD TV Signage": [
            "43UR640S (NA)",
            "50UR641S (NA)",
            "55UR642S (NA)",
            "65UR643S (NA)",
            "75UR644S (NA)",
            "86UR645S (NA)",
            "43UR640S (SCA)",
            "50UR640S (SCA)",
            "55UR640S (SCA)",
            "65UR640S (SCA)",
            "75UR640S (SCA)",
            "86UR640S (SCA)",
            "43UR640S (Colombia)",
            "50UR640S (Colombia)",
            "55UR640S (Colombia)",
            "65UR640S (Colombia)",
            "75UR640S (Colombia)",
            "86UR640S (Colombia)",
            "43UR640S (EU/CIS)",
            "50UR640S (EU/CIS)",
            "55UR640S (EU/CIS)",
            "65UR640S (EU/CIS)",
            "75UR640S (EU/CIS)",
            "86UR640S (EU/CIS)",
            "43UR640S (MEA)",
            "50UR640S (MEA)",
            "55UR640S (MEA)",
            "65UR640S (MEA)",
            "75UR640S (MEA)",
            "86UR640S (MEA)",
            "43UR640S (ASIA)",
            "50UR640S (ASIA)",
            "55UR640S (ASIA)",
            "65UR640S (ASIA)",
            "75UR640S (ASIA)",
            "86UR640S (ASIA)"
        ],
        "UHD Large Screen Signage Display": [
            "110UM5J",
            "98UM5J"
        ],
        "New High Haze UHD Standard Signage": [
            "65UH5J-H",
            "55UH5J-H",
            "49UH5J-H",
            "43UH5J-H",
            "65UH7J-H",
            "55UH7J-H",
            "49UH7J-H",
            "43UH7J-H"
        ],
        "High Haze UHD Signage for Hospitals": [
            "43ML5K"
        ],
        "ES961H Series": [
            "48ES961H (EU)"
        ],
        "ET961H Series": [
            "65ET961H (CIS)",
            "55ET961H (CIS)",
            "65ET961H (MEA)",
            "55ET961H (MEA)"
        ],
        "LS341H": [
            "32LS341H (EU Only)"
        ],
        "LT330H Series": [
            "32LT330H (Brazil Only)"
        ],
        "LT340H Series": [
            "49LT340H (NA)",
            "43LT340H (SCA)",
            "43LT340H (Colombia)",
            "43LT340H (NA)",
            "32LT340H (NA)",
            "32LT340H (Colombia)",
            "32LT340H (MEA)",
            "43LT340H (MEA)",
            "49LT340H (MEA)"
        ],
        "LT341H Series": [
            "32LT341H (CIS)",
            "43LT341H (CIS)",
            "49LT341H (CIS)",
            "32LT341H (EU)",
            "43LT341H (EU)",
            "49LT341H (EU)"
        ],
        "LT560H Series": [
            "32LT560H (NA)",
            "43LT560H (NA)",
            "49LT560H (NA)"
        ],
        "LT570H Series": [
            "49LT570H (NA)",
            "32LT570H (NA)",
            "43LT570H (NA)"
        ],
        "LT660H Series": [
            "32LT660H (ASIA)"
        ],
        "LT661H Series": [
            "32LT661H (EU)",
            "24LT661H (EU)",
            "32LT661H (CIS)",
            "24LT661H (CIS)"
        ],
        "US342H Series": [
            "55US342H (EU)",
            "50US342H (EU)",
            "43US342H (EU)"
        ],
        "US660H Series": [
            "50US660H (Colombia)",
            "65US660H (SCA)",
            "43US660H (ASIA)",
            "50US660H (ASIA)",
            "55US660H (ASIA)",
            "65US660H (ASIA)",
            "43US660H (MEA)",
            "50US660H (MEA)",
            "55US660H (SCA)",
            "50US660H (SCA)",
            "43US660H (SCA)",
            "43US660H (Colombia)",
            "55US660H (MEA)",
            "55US660H (Colombia)",
            "65US660H (Colombia)"
        ],
        "US662H Series": [
            "50US662H (CIS)",
            "55US662H (CIS)",
            "43US662H (EU)",
            "50US662H (EU)",
            "65US662H (CIS)",
            "43US662H (CIS)",
            "55US662H (EU)",
            "65US662H (EU)"
        ],
        "US665H Series": [
            "50US665H (ASIA)",
            "43US665H (ASIA)",
            "55US665H (ASIA)",
            "65US665H (ASIA)",
            "55US665H (CIS)",
            "50US665H (CIS)",
            "43US665H (CIS)"
        ],
        "US670H Series": [
            "55US670H (NA)",
            "50US670H (NA)",
            "43US670H (NA)"
        ],
        "US760H Series": [
            "75US760H (EU)",
            "75US760H (MEA)"
        ],
        "US761H Series": [
            "49US761H (ASIA)",
            "55US761H (MEA)",
            "65US761H (MEA)",
            "65US761H (ASIA)",
            "49US761H (MEA)",
            "55US761H (ASIA)"
        ],
        "US762H Series": [
            "65US762H (EU)",
            "55US762H (EU)",
            "65US762H (CIS)",
            "49US762H (EU)",
            "55US762H (CIS)",
            "49US762H (CIS)"
        ],
        "US765H Series": [
            "65US765H (ASIA)",
            "55US765H (ASIA)",
            "49US765H (ASIA)"
        ],
        "US770H Series": [
            "75US770H (NA)",
            "49US770H (NA)",
            "65US770H (NA)",
            "55US770H (NA)"
        ],
        "UT340H Series": [
            "55UT340H (NA)",
            "49UT340H (NA)",
            "65UT340H (NA)"
        ],
        "UT347H Series": [
            "55UT347H (NA)",
            "49UT347H (NA)",
            "65UT347H (NA)"
        ],
        "UT567H Series": [
            "65UT567H (NA)",
            "49UT567H (NA)",
            "55UT567H (NA)"
        ],
        "UT570H Series": [
            "65UT570H (NA)",
            "55UT570H (NA)",
            "49UT570H (NA)",
            "43UT570H (NA)"
        ],
        "UT577H Series": [
            "65UT577H (NA)",
            "55UT577H (NA)",
            "49UT577H (NA)"
        ],
        "UT660H Series": [
            "55UT660H (SCA)",
            "65UT660H (SCA)"
        ],
        "UT665H Series": [
            "55UT665H (EU)",
            "43UT665H (EU)",
            "49UT665H (EU)"
        ],
        "UT670H Series": [
            "43UT670H (NA)",
            "49UT670H (NA)",
            "55UT670H (NA)"
        ],
        "UT770H Series": [
            "75UT770H (NA)",
            "65UT770H (NA)",
            "55UT770H (NA)",
            "49UT770H (NA)"
        ],
        "UT781H Series": [
            "43UT781H (MEA)",
            "43UT781H (ASIA)"
        ],
        "UT782H Series": [
            "43UT782H (EU)",
            "43UT782H (CIS)"
        ],
        "WS960H Series": [
            "55WS960H (MEA)",
            "65WS960H (EU)",
            "55WS960H (NA)",
            "65WS960H (NA)",
            "55WS960H (EU)",
            "65WS960H (CIS)",
            "55WS960H (CIS)",
            "65WS960H (MEA)"
        ],
        "4K UHD Hospitality TV with Pro:Centric Direct": [
            "75UR761H (ASIA)",
            "65UR761H (ASIA)",
            "55UR761H (ASIA)",
            "50UR761H (ASIA)",
            "75UR762H (MEA)",
            "65UR762H (MEA)",
            "55UR762H (MEA)",
            "50UR762H (MEA)",
            "75UR762H (EU/CIS)",
            "65UR762H (EU/CIS)",
            "55UR762H (EU/CIS)",
            "50UR762H (EU/CIS)",
            "75UR765H (ASIA)",
            "65UR765H (ASIA)",
            "55UR765H (ASIA)",
            "50UR765H (ASIA)",
            "75UR770H (NA)",
            "65UR770H (NA)",
            "55UR770H (NA)",
            "50UR770H (NA)",
            "65UR777H (NA)",
            "55UR777H (NA)",
            "50UR777H (NA)",
            "65UR767H (EU)",
            "55UR767H (EU)",
            "50UR767H (EU)",
            "75UR760H (NA)",
            "65UR760H (NA)",
            "55UR760H (NA)",
            "50UR760H (NA)",
            "55US660H (NA)",
            "50US660H (NA)",
            "43US660H (NA)",
            "65UR567H (NA)",
            "55UR567H (NA)",
            "50UR567H (NA)",
            "75UR761H (LATAM)",
            "65UR761H (LATAM)",
            "55UR761H (LATAM)",
            "50UR761H (LATAM)"
        ],
        "Essential Commercial TV with NanoCell Display": [
            "65UR577H (NA)",
            "55UR577H (NA)",
            "50UR577H (NA)",
            "65UR347H (NA)",
            "55UR347H (NA)",
            "50UR347H (NA)"
        ],
        "Essential Commercial TV with 4K Active HDR": [
            "55UT343H (NA)",
            "50UT343H (NA)",
            "43UT343H (NA)"
        ],
        "LT572M Series": [
            "43LT572M (NA)",
            "32LT572M (NA)",
            "24LT572M (NA)"
        ],
        "LT662M series": [
            "32LT662M (NA)"
        ],
        "LU766A Series": [
            "15LU766A (EU)",
            "15LU766A (NA)"
        ],
        "US772M Series": [
            "65US772M (NA)",
            "55US772M (NA)"
        ],
        "UT672M series": [
            "55UT672M (NA)",
            "49UT672M (NA)",
            "43UT672M (NA)"
        ],
        "UT662M Series": [
            "55UT662M (MEA)",
            "43UT662M (MEA)",
            "50UT662M (MEA)"
        ],
        "NanoCell Hospital TV": [
            "65UR772M (NA)",
            "55UR772M (NA)"
        ],
        "Smart Touch Screen TV": [
            "15LS766F (NA)"
        ],
        "Pro:Centric SMART Set Top Box": [
            "STB-6500 (MEA)",
            "STB-6500 (ASIA)",
            "STB-6500 (EU)",
            "STB-6500 (NA)",
            "STB-5500 (EU/CIS)",
            "STB-5500 (NA)",
            "STB-5500 (MEA)",
            "STB-5500 (ASIA)"
        ],
        "webOS Box": [
            "WP400",
            "WP401",
            "WP402"
        ],
        "Floor Stand for One:Quick Flex": [
            "ST-43HF"
        ],
        "SuperSign CMS": [],
        "SuperSign Control & Control+": [],
        "SuperSign WB": [],
        "LG Simple Editor": [],
        "SuperSign Simple Editor": [],
        "SuperSign Media Editor": [],
        "LG ExtendedCare": [],
        "LG ConnectedCare": [],
        "Total Care Solution": [],
        "Pro:Centric Smart": [],
        "Pro:Centric Direct": [],
        "Pro:Centric Value": [],
        "Hotel Quick Menu": [],
        "One:Quick Share": [
            "SC-00DA"
        ],
        "One:Quick Works": [
            "55CT5WJ"
        ],
        "One:Quick Flex": [
            "43HT3WJ"
        ]
    }
}
"""

# JSON 문자열을 Python 객체로 변환
parsed_data = json.loads(data2)

# 키와 값의 쌍을 행으로 가지는 리스트 생성
rows = []
for category, products in parsed_data['product_modelname'].items():
    for product in products:
        rows.append({'product_subcategory': category, 'product_modelname': product})

# 리스트를 DataFrame으로 변환
df2 = pd.DataFrame(rows)

# 데이터프레임 출력
print(df2)

                product_subcategory product_modelname
0             Curvable OLED Signage          55EF5F-L
1             Curvable OLED Signage          55EF5F-P
2             Curvable OLED Signage          55EF5G-L
3             Curvable OLED Signage          55EF5G-P
4              Curvature Calibrator       ACC-CC-EF5E
..                              ...               ...
519                       webOS Box             WP402
520  Floor Stand for One:Quick Flex           ST-43HF
521                 One:Quick Share           SC-00DA
522                 One:Quick Works           55CT5WJ
523                  One:Quick Flex           43HT3WJ

[524 rows x 2 columns]


In [37]:
# 기준 하나로 합치기
product_list = pd.merge(df, df2, how = 'outer', on = 'product_subcategory')

In [38]:
# 싹다 소문자로 바꿔넣기
for i in product_list.columns:
    product_list[i + '_lower'] = product_list[i].str.lower()

In [39]:
# 융합 기준 만들기
product_model = product_list[['product_category_lower', 'product_modelname_lower']].dropna().drop_duplicates()
product_sub = product_list[['product_category_lower', 'product_subcategory_lower']].dropna().drop_duplicates()

In [40]:
# modelname - modelname
# 대문자 모두 소문자로 바꿔버리기
train['product_modelname_lower'] = train['product_modelname'].str.lower()
test['product_modelname_lower'] = test['product_modelname'].str.lower()


# 융합하기(총 3번을 진행할 예정)
train2 = pd.merge(train, product_model, how = 'left', on = 'product_modelname_lower')
test2 = pd.merge(test, product_model, how = 'left', on = 'product_modelname_lower')

# 대입
train2['product_category'] = np.where(train2['product_category_lower'].notnull(), train2['product_category_lower'], train2['product_category'])
test2['product_category'] = np.where(test2['product_category_lower'].notnull(), test2['product_category_lower'], test2['product_category'])

# 생성 변수 제외
train = train2.drop(['product_modelname_lower', 'product_category_lower'], axis = 1)
test = test2.drop(['product_modelname_lower', 'product_category_lower'], axis = 1)

In [41]:
# subcategory - modelname
# 대문자 모두 소문자로 바꿔버리기
train['product_subcategory_lower'] = train['product_subcategory'].str.lower()
test['product_subcategory_lower'] = test['product_subcategory'].str.lower()

# 융합하기
train2 = pd.merge(train, product_model, how = 'left', left_on = 'product_subcategory_lower', right_on = 'product_modelname_lower')
test2 = pd.merge(test, product_model, how = 'left', left_on = 'product_subcategory_lower', right_on = 'product_modelname_lower')

# 대입
train2['product_category'] = np.where(train2['product_category_lower'].notnull(), train2['product_category_lower'], train2['product_category'])
test2['product_category'] = np.where(test2['product_category_lower'].notnull(), test2['product_category_lower'], test2['product_category'])

# 생성 변수 제외
train = train2.drop(['product_modelname_lower', 'product_subcategory_lower', 'product_category_lower'], axis = 1)
test = test2.drop(['product_modelname_lower', 'product_subcategory_lower', 'product_category_lower'], axis = 1)

In [42]:
# category - modelname
# 대문자 모두 소문자로 바꿔버리기
train['product_category_lower2'] = train['product_category'].str.lower()
test['product_category_lower2'] = test['product_category'].str.lower()

# 융합하기
train2 = pd.merge(train, product_model, how = 'left', left_on = 'product_category_lower2', right_on = 'product_modelname_lower')
test2 = pd.merge(test, product_model, how = 'left', left_on = 'product_category_lower2', right_on = 'product_modelname_lower')

# 대입
train2['product_category'] = np.where(train2['product_category_lower'].notnull(), train2['product_category_lower'], train2['product_category'])
test2['product_category'] = np.where(test2['product_category_lower'].notnull(), test2['product_category_lower'], test2['product_category'])

# 생성 변수 제외
train = train2.drop(['product_modelname_lower', 'product_category_lower2', 'product_category_lower'], axis = 1)
test = test2.drop(['product_modelname_lower', 'product_category_lower2', 'product_category_lower'], axis = 1)

In [43]:
# modelname - subcategory
# 대문자 모두 소문자로 바꿔버리기
train['product_modelname_lower'] = train['product_modelname'].str.lower()
test['product_modelname_lower'] = test['product_modelname'].str.lower()


# 융합하기(총 3번을 진행할 예정)
train2 = pd.merge(train, product_sub, how = 'left', left_on = 'product_modelname_lower', right_on = 'product_subcategory_lower')
test2 = pd.merge(test, product_sub, how = 'left', left_on = 'product_modelname_lower', right_on = 'product_subcategory_lower')

# 대입
train2['product_category'] = np.where(train2['product_category_lower'].notnull(), train2['product_category_lower'], train2['product_category'])
test2['product_category'] = np.where(test2['product_category_lower'].notnull(), test2['product_category_lower'], test2['product_category'])

# 생성 변수 제외
train = train2.drop(['product_modelname_lower', 'product_subcategory_lower', 'product_category_lower'], axis = 1)
test = test2.drop(['product_modelname_lower','product_subcategory_lower', 'product_category_lower'], axis = 1)

In [44]:
# subcategory - subcategory
# 대문자 모두 소문자로 바꿔버리기
train['product_subcategory_lower'] = train['product_subcategory'].str.lower()
test['product_subcategory_lower'] = test['product_subcategory'].str.lower()

# 융합하기
train2 = pd.merge(train, product_sub, how = 'left', on = 'product_subcategory_lower')
test2 = pd.merge(test, product_sub, how = 'left', on = 'product_subcategory_lower')

# 대입
train2['product_category'] = np.where(train2['product_category_lower'].notnull(), train2['product_category_lower'], train2['product_category'])
test2['product_category'] = np.where(test2['product_category_lower'].notnull(), test2['product_category_lower'], test2['product_category'])

# 생성 변수 제외
train = train2.drop(['product_subcategory_lower', 'product_category_lower'], axis = 1)
test = test2.drop(['product_subcategory_lower', 'product_category_lower'], axis = 1)

In [45]:
# category - subcategory
# 대문자 모두 소문자로 바꿔버리기
train['product_category_lower2'] = train['product_category'].str.lower()
test['product_category_lower2'] = test['product_category'].str.lower()

# 융합하기
train2 = pd.merge(train, product_sub, how = 'left', left_on = 'product_category_lower2', right_on = 'product_subcategory_lower')
test2 = pd.merge(test, product_sub, how = 'left', left_on = 'product_category_lower2', right_on = 'product_subcategory_lower')

# 대입
train2['product_category'] = np.where(train2['product_category_lower'].notnull(), train2['product_category_lower'], train2['product_category'])
test2['product_category'] = np.where(test2['product_category_lower'].notnull(), test2['product_category_lower'], test2['product_category'])

# 생성 변수 제외
train = train2.drop(['product_subcategory_lower', 'product_category_lower2', 'product_category_lower'], axis = 1)
test = test2.drop(['product_subcategory_lower', 'product_category_lower2', 'product_category_lower'], axis = 1)

In [46]:
# 소문자로 바꾸기
train['product_category2'] = train['product_category'].str.lower()
test['product_category2'] = test['product_category'].str.lower()

In [47]:
# 조건 맞춰서 대입하기
con = [
    train['product_category2'].isna(),
    train['product_category2'].isin(product_category_lower)
]
cri = [
    'no_answer',
    train['product_category2']
]
train['pc'] = np.select(con, cri, default = 'others')

In [48]:
# 조건 맞춰서 대입하기
con = [
    test['product_category2'].isna(),
    test['product_category2'].isin(product_category_lower)
]
cri = [
    'no_answer',
    test['product_category2']
]
test['pc'] = np.select(con, cri, default = 'others')

In [49]:
# 목록
product_category = [
    "Reciprocating Compressor",
    "Rotary Compressor",
    "Scroll Compressor",
    "Motor",
    "Projector",
    "Monitor",
    "All Medical Displays",
    "Clinical Review Monitors",
    "Diagnostic Monitors",
    "Surgical Monitors",
    "Digital X-ray Detectors",
    "Laptop",
    "All Cloud Devices",
    "Thin Clients",
    "Zero Clients",
    "OLED Signage",
    "LED Signage",
    "Video Wall Signage",
    "Interactive Signage",
    "High Brightness Signage",
    "Special Signage",
    "Standard Signage",
    "Hotel TV",
    "Hospital TV",
    "Accessories",
    "Software Solution",
    "Signage Care Solution",
    "WebOS",
    "Pro:Centric",
    "One:Quick Series",
    "Ventilation",
    "VRF",
    "Multi-Split",
    "Single-Split",
    "Chiller",
    "Heating",
    "Energy Storage System",
    "Others"
]

In [50]:
# 조건에 맞춰서 값 집어넣기
### one:quick
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('quick'))] = 'One:Quick Series'
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('quick'))] = 'One:Quick Series'

### wall
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('wall'))] = 'Video Wall Signage'
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('wall'))] = 'Video Wall Signage'

### oled
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('oled'))] = "OLED Signage"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('oled'))] = "OLED Signage"

### led
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('led'))] = "LED Signage"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('led'))] = "LED Signage"

### cloud
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('cloud'))] = "All Cloud Devices"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('cloud'))] = "All Cloud Devices"

### vrf
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('multi v|vrf'))] = "VRF"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('multi v|vrf'))] = "VRF"

### medical display
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('medical display'))] = "All Medical Displays"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('medical display'))] = "All Medical Displays"

### multi
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('multi'))] = "Multi-Split"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('multi'))] = "Multi-Split"

## commercial
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('commercial|comercial'))] = "Hotel TV"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('commercial|comercial'))] = "Hotel TV"

## centric
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('centric'))] = "Pro:Centric"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('centric'))] = "Pro:Centric"

## chill
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('chill'))] = "Chiller"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('chill'))] = "Chiller"

## single
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('single'))] = "Single-Split"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('single'))] = "Single-Split"

## uhd
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('uhd'))] = "Standard Signage"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('uhd'))] = "Standard Signage"

## surgical
train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('surgical'))] = "Surgical Monitors"
test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('surgical'))] = "Surgical Monitors"

## idb, id
train['pc'][(train['pc'] == 'others') & (train['product_category'].isin(['id', 'idb']))] = "Interactive Signage"
test['pc'][(test['pc'] == 'others') & (test['product_category'].isin(['id', 'idb']))] = "Interactive Signage"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('quick'))] = 'One:Quick Series'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['pc'][(test['pc'] == 'others') & (test['product_category'].str.contains('quick'))] = 'One:Quick Series'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['pc'][(train['pc'] == 'others') & (train['product_category'].str.contains('wall'))] = 'Video Wall Signage'
A value is trying to be set on a copy of a

In [51]:
# product_category 반영하기
train['product_category'] = train['pc'].str.lower()
test['product_category'] = test['pc'].str.lower()

In [52]:
# drop pc
train.drop(['product_category2', 'pc', 'product_subcategory', 'product_modelname',
           'customer_position'], axis = 1, inplace = True)
test.drop(['product_category2', 'pc', 'product_subcategory', 'product_modelname',
           'customer_position'], axis = 1, inplace = True)

In [53]:
# 이름 달라서 명명하기
df_train = train.copy()
df_test = test.copy()

In [54]:
# id_strategic_ver 결측치를 0으로 채운다.
df_train['id_strategic_ver'].fillna(0, inplace=True)
df_test['id_strategic_ver'].fillna(0, inplace=True)

In [55]:
# ver_win_ratio_per_bu 결측치를 0으로 채운다.
df_train['ver_win_ratio_per_bu'].fillna(0, inplace=True)
df_test['ver_win_ratio_per_bu'].fillna(0, inplace=True)

In [56]:
# ver_win_rate_x 결측치를 0으로 채운다.
df_train['ver_win_rate_x'].fillna(0, inplace=True)
df_test['ver_win_rate_x'].fillna(0, inplace=True)

In [57]:
# com_reg_ver_win_rate 결측치를 0으로 채운다.
df_train['com_reg_ver_win_rate'].fillna(0, inplace=True)
df_test['com_reg_ver_win_rate'].fillna(0, inplace=True)

In [58]:
# idit_strategic_ver 결측치를 0으로 채운다.
df_train['idit_strategic_ver'].fillna(0, inplace=True)
df_test['idit_strategic_ver'].fillna(0, inplace=True)

In [59]:
# it_strategic_ver 결측치를 0으로 채운다.
df_train['it_strategic_ver'].fillna(0, inplace=True)
df_test['it_strategic_ver'].fillna(0, inplace=True)

In [60]:
# historical_existing_cnt 결측치를 0으로 채운다.
df_train['historical_existing_cnt'].fillna(0, inplace=True)
df_test['historical_existing_cnt'].fillna(0, inplace=True)

### Customer Type

In [61]:
# End Customer
change_list = ['End Customer', 'End-Customer','End-user','Corporate', 'Home Owner', 'HomeOwner', 'Commercial end-user']
for item in change_list:
    df_train.loc[df_train['customer_type'] == item, 'customer_type'] = 'End-Customer'
    df_test.loc[df_test['customer_type'] == item, 'customer_type'] =  'End-Customer'

# Channel Partner
change_list = ['Channel Partner','Reseller','Distributor', 'System Integrator', 'Dealer/Distributor']
for item in change_list:
    df_train.loc[df_train['customer_type'] == item, 'customer_type'] = 'channel partner'
    df_test.loc[df_test['customer_type'] == item, 'customer_type'] =  'channel partner'
    
# Specifier/Influencer
change_list = ['Specifier / Influencer', 'Specifier/ Influencer','Consultant','Installer',
               'Technician', 'Technical Assistant', 'Installer/Contractor','Architect/Consultant', 'Interior Designer']
for item in change_list:
    df_train.loc[df_train['customer_type'] == item, 'customer_type'] = 'Specifier/ Influencer'
    df_test.loc[df_test['customer_type'] == item, 'customer_type'] =  'Specifier/ Influencer'

# Solution Eco-Partner
change_list = ['Solution Eco-Partner', 'Software/Solution Provider', 'Software / Solution Provider']
for item in change_list:
    df_train.loc[df_train['customer_type'] == item, 'customer_type'] = 'Solution Eco-Partner'
    df_test.loc[df_test['customer_type'] == item, 'customer_type'] =  'Solution Eco-Partner'

# Service Partner
change_list = ['Service Partner']
for item in change_list:
    df_train.loc[df_train['customer_type'] == item, 'customer_type'] = 'Service Partner'
    df_test.loc[df_test['customer_type'] == item, 'customer_type'] =  'Service Partner'

# Solution Eco-Partner
change_list = ['Solution Eco-Partner']
for item in change_list:
    df_train.loc[df_train['customer_type'] == item, 'customer_type'] = 'Solution Eco-Partner'
    df_test.loc[df_test['customer_type'] == item, 'customer_type'] =  'Solution Eco-Partner'

# Other
change_list = ['Others','Etc.', 'Other', 'Engineer', 'Manager / Director', 'HVAC Engineer', 'Administrator']
for item in change_list:
    df_train.loc[df_train['customer_type'] == item, 'customer_type'] = 'Other'
    df_test.loc[df_test['customer_type'] == item, 'customer_type'] =  'Other'

In [62]:
# 결측값을 'No answer'로 채워주는 함수 정의
def fill_missing_job(df):
    df['customer_type'].fillna('No answer', inplace=True)

# df_train 데이터프레임의 결측값을 채워주기
fill_missing_job(df_train)

# df_test 데이터프레임의 결측값을 채워주기
fill_missing_job(df_test)

### Customer Job

In [63]:
# 'customer_job' 열의 값을 '/' 기준으로 분할
df_train['customer_job'] = df_train['customer_job'].apply(lambda x: x.split(' / ') if isinstance(x, str) else x)

# explode 함수를 사용하여 분할된 각 값을 별도의 행으로 만듦
df_train = df_train.explode('customer_job')

In [64]:
# customer_job(Job Function)

# Accounting
change_list = ['accounting', 'accounts payable', 'account management', 'account exec/manager']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Accounting'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Accounting'

# Administrative
change_list = ['administrative', 'admin', 'administrative assistant', 'admin assistant',
              'platform administrator', 'imaging administrator', 'network administrator',
              'systems administrator', 'pacs administrator', 'administration', 'it admin', 'project administrator',
              'amministrativo', 'administración', 'adminisztráció', 'it administrator']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Administrative'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Administrative'

# Arts_and_Design
change_list = ['arts and design', 'graphic/color art', 'designer', 'systems designer', 
               'technology designer', 'design and installation company', 'art installation',
              'art installation', 'design and provide equipment', 'designer, producer',
              'designer, creative technologist', 'design and install', 'designer/installer',
              'design', 'design/decision maker', 'arts_and_design', 'interior designer',
              'design/build', 'designers', 'design consultant', 'design/purchaser', 'systems design',
              '3d/vfx art', 'colorist', 'system designer, integrator', 'art and design', 'design/install/training/support',
              'designere', 'művészet_és_design', 'arte_e_design', 'kreation und design', 'kreation_und_design',
              'graphic design', 'gallery', 'lead designer', 'project designer', 'arte y diseño',
              'fashion', 'sliding pictures of beauty salon', 'photographer', 'community theater',
              'home theater', 'interior stylist', 'exhibition', 'museum', 'convention center']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Arts_and_Design'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Arts_and_Design'

# Business_Development
change_list = ['business development', 'business_development', 'gm/part owner', 
              'managgere', 'engagement executive', 'coo', 'recommendation', 'submitting proposal',
              'corporate']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Business_Development'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Business_Development'

# Community_and_Social_Services
change_list = ['community and social services', 'community_and_social_services']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Community_and_Social_Services'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Community_and_Social_Services'

# Consulting
change_list = ['consulting', 'technology consultant', 'consultant,cabinet fabricator',
              'consultent', 'arquitecto/consultor', 'content creation, eq consultant']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Consulting'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Consulting'

# Education
change_list = ['education', 'educator', 'higher education (college & university)', 
              'teacher', 'teaching']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Education'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Education'
    
# Engineering
change_list = ['engineering', 'chief engineer', 'engineer', 'engineering director',
              'senior design engineer', 'designer/ engineer', 'hardware design engineer',
              'director of engineering', 'chief of engineering', 'project engineer', 'lead engineer',
              'engineering & technical executive', 'design engineer', 'solution engineer',
              'engineering & technical', 'systems engineer', 'principal engineer', 'system engineer',
              'engineering, design, and install', 'chief eng.', 'electrical contractor',
              'energy', 'renewable energy']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Engineering'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Engineering'
    
# Entrepreneurship
change_list = ['entrepreneurship', 'business owner']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Entrepreneurship'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Entrepreneurship'
    
# Finance
change_list = ['finance', 'the person with the credit card', '5% of hotel needs', 'asset management',
              'pricing', 'director of finance', 'budget', 'finance executive', 'finanzen',
              'finanzas']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Finance'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Finance'
    
# Healthcare_Services
change_list = ['healthcare services', 'radiology professional', 'medical imaging specialist', 'medical solution provider', 
               'healthcare_services', 'mental health', 'healthcare professionals', 'radiology_professional',
              'profesional de radiología', 'radiology  professional', 'medical solution  provider',
              'medical imaging  specialist', 'surgery professional', 'medical solution provider', 'clinic',
              'surgery professional', 'clinical specialist', 'doctor', 'profesional de cirugía',
              'spécialiste_en_imagerie_médicale', 'pathologist']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Healthcare_Services'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Healthcare_Services'
    
# Human_Resources
change_list = ['human resources', 'organizer', 'managing employee', 'hr posting', 'human_resources',
              'hr']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Human_Resources'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Human_Resources'
    
# Information_Technology
change_list = ['information technology', 'av tech', 'av technician', 'tech', 'information_technology',
              'it - information technology', 'emerging technology', 'information technology',
              'head of technology', 'tech service', 'it tech.', 'it specialist', 'it manager',
              'it integrator', 'it director', 'testing and troubleshooting', 'technical advisor, reseller',
               'director,it', 'director of it', "i'm directing it", 'director it', 
               'wall mounted screen mirroring', 'helpdesk specialist', 'display screen',
              'video wall', 'it dairector', 'it department', 'integration', 'solutions provider and specifier',
              'solutions architect', 'informatics, touch capability', 'hardware', 'technical director',
              'intergrator', 'deputy cio', 'display screen from control', 'it hardware technician',
              'software solution', 'infrastructure', 'collaboration & web apps', 'computing & it',
              'it/software', 'it', 'software developer', 'installer/ system integrater', 'office it',
              'cloud', 'hardware selection', 'directeur technique', 'si', 'electronics & telco']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Information_Technology'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Information_Technology'
    
# Legal
change_list = ['legal']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Legal'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Legal'
    
# Marketing
change_list = ['marketing', 'ownner-marketing director', 'marketing coordinator', 'marketing operations',
              'marketing executive', 'technical marketing', 'product marketing', 'influencer',
              'using for window display', 'display our products', 'advertising and promotions team',
              'digital signage', 'creative director', 'event marketing', 'customer experience',
              'field marketing', 'advertising', 'store promotions', 'sign company', 'signage for an attraction']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Marketing'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Marketing'
    
# Media_and_Communication
change_list = ['media and communication', 'media and communications', 'media_e_comunicazione', 'media_and_communication',
              'strategic communications', 'broadcasting & media', 'média_és_kommunikáció',
              'medien_und_kommunikation', 'medios_de_comunicación', 'studio manager',
              'av estimator', 'costar av team', 'part of video wall', 'component of video wall',
              'videowall']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Media_and_Communication'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Media_and_Communication'
    
# Military_and_Protective_Services
change_list = ['military and protective services', 'military_and_protective_services']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Military_and_Protective_Services'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Military_and_Protective_Services'
    
# Operations
change_list = ['operations', 'operations manager', 'strategy & operations specialist',
              'director of operations', 'facilities and operations', 'regional director of operations',
              'operations executive', 'operaciones', 'ops mgr', 'coordinator', 'coordinator',
              'comanager', 'decision maker', 'director', 'office manager', 'team leader',
              'ceo', 'leader', 'maintenance', 'overseer', 'facilitator', 'head']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Operations'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Operations'
    
# Product_Management
change_list = ['product management', 'tv studio manager', 'product owner', 'global lead of production',
              'film production', 'product_management', 'signage manager', 'digital display vs signage need']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Product_Management'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Product_Management'
    
# Program_and_Project_Management
change_list = ['program and project management', 'project manager', 'project coordinator',
              'project lead', 'general manager', 'gm', 'project facilitator',
              'projection manager',  'it project lead', 'project team member',
              'producer/project manager', 'r&d project manager', 'designer/pm/gc', 'a/v project manager',
              'project director', 'project manager/designer', 'project head', 'program_and_project_management',
              'av project manager', 'program_and_project_manager', 'designer/ project manager', 'projectr mgmt',
              'project manage', 'project sales/manage', 'project sales/manage', 'genera manager', 'programm- und projektmanagement',
              'projektmenedzsment	program and project management', 'program-_és_projektmenedzsment',
              'digital project manager', 'site manager', 'general management', 'reseorot general manager',
              'general manager - project manager', 'program directors', 'general manager (decision maker)',
              'quoting project', 'programm-_und_projektmanagement', 'manager', 'gc', 'pm']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Program_and_Project_Management'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Program_and_Project_Management'
    
# Purchasing
change_list = ['purchasing', 'buyer', 'purchasing manager', 'purchase', 'purchaser, it and installer',
              'requirements and buyer', 'purchasing coordinator', 'planner/purchaser',
              'obtain quotes, process purchase', 'director purchaser', 'drop, purchase maxhub',
              'purchase and install', 'general manager- purchaser', 'purchasing authority', 'purchasers',
              'purchasing supervisor', 'installation and purchaser', 'purchase dept', 'purchsing', 
              'designer purchaser', 'purchasing director', 'requisition', 'ordering manager',
              'procurement']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Purchasing'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Purchasing'
    
# Quality_Assurance
change_list = ['quality assurance', 'quality_assurance', 'tester']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Quality_Assurance'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Quality_Assurance'
    
# Real_Estate
change_list = ['real estate', 'project architect', 'general contractor', 'architect/owner',
              'estimator', 'sub contractor', 'contractor/owner', 'managing contractor',
              'signage subcontractor p/m', 'contractor', 'construction manager',
              'owner representation', 'furnish and install', 'facilitator installation services',
              'architect ass interiores', 'property owner', 'building owner', 'cintractor']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Real_Estate'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Real_Estate'
    
# Research
change_list = ['research', 'research/install', 'research and instalaltion', 'research products and prices',
              'product researcher', 'project researcher', 'product research', 'research & development',
              'research and developement', 'associate/analyst']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Research'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Research'
    
# Sales
change_list = ['sales', 'sales manager', 'sales executive', 'salesman', 'technical sales', 'sale',
              'installer/sales rep', 'sales rep', 'sales operations', 'outside sales', 'sales engineering',
              'distributor', 'reseller', 'sourcing & quoting for end user', 'sourcing',
              'quotation curator', 'quote gathering/proposer to owner', 'seller installer', 
               'distributor quotation', 'revendedor', 'vertrieb', 'var']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Sales'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Sales'
    
# Support
change_list = ['support', 'support/facilitator, designer', 'it support', 'service coordinator',
              'post install support and service', 'fixing tv', 'desktop services']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Support'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Support'
    
# Others
change_list = ['other', 'bidder', 'details send', 'curation', 'developer/property', 'recommend', 
               'system installer', 'technical', 'architect', 'owner',
               'execution', 'owning company', 'president for sennco',
              'artist, lead on equipment selection', 'specifier', 'integrator', 'public bidder', 
              'vp/gm', 'manufacturer', 'replacing tv', 'maintenance supervisor', 'equipment custodian',
              'supplier', 'change tv', 'liason', 'user', 'retailer/installer', 'primary',
              'development coordinator/procurement', 'reviewer', 'end user', 'installer',
              'vendor', 'supervisor', 'producer', 'investigator', 'solution advisor',
              'facilities', 'federal government contractor', 'stakeholder','primary end-user',
              'facility manager', 'principal', 'asking for quote for client', 'hotel manager',
              'repair uhd 120 hz units', 'equipment and app provider', 'president', 'main end user of the product',
              'ranger 2','appliance specialist', 'parts coordinator', 'correspondence',
              'following up', 'conference table', 'planning and installation', 'sourcing/procurement',
                'solution provider',
                'we are in iceland',
                'maintenance technician',
                'procurement specialist',
                'department secretary',
                'cctv monetoring',
                'supplier and installation',
                'managing director',
                'f&b director for bicycle casino',
                'procurment',
                'installer.',
                'principal in charge',
                'nothing',
                'electronics evaluator',
                'restaurant display',
                'conference room',
                'manger',
                'elevator company',
                'director of lodging',
                'inquiry-to-buy/contact-us test',
                'recommend (you recommend specific products or technologies for the solution)',
                'final approval',
                'otro',
                'integrador',
                'office',
                'others',
                'distribuidor',
                'otros',
                'ceo/founder',
                'planner',
                'no requirment',
                'requirement close',
                'no respoxse on phone will try again',
                'display',
                'reseller/integrator',
                'management',
                'manufacturing factory',
                'plant',
                'cliente final',
                'test4',
                'executive',
                'innovation',
                'field',
                'implement',
                'for confrence',
                'for presentations',
                'managing partner',
                'menu',
                'underboss',
                'assist in serving food',
                'sme',
                'decider',
                'instructor',
                'serving food',
                'authorize (you are responsible for making the final decision)',
                'chef',
                'contributor',
                'chief',
                'waiter',
                'cctv view',
                'serving robot',
                'team lead',
                'facility administrator',
                'tradeshow event',
                'serving',
                'recommender',
                'resource manager',
                'buyer, coordinating',
                'mobility',
                'equipment planner',
                'master mind',
                'sho lyrics',
                'replacement tv',
                'photos',
                'developer',
                'equipment selection',
                'commander',
                'enterprise resource planning',
                'help desk',
                'application development',
                'c-level executive',
                'vice president',
                'pénzügy',
                'sonstiges',
                'altro',
                'értékesítés',
                'üzemeltetés',
                'egyéb',
                'vendite',
                'gestión_de_proyectos',
                'chirurgien',
                'autres',
                'cirugano',
                'tierarzt',
                'entry level',
                'institute & academy',
                'director comercial',
                'other stores',
                'car dealership',
                'proprietário(a)',
                'genel müdür',
                'mindenes',
                'főorvos',
                'intern',
                'the big boss',
                'railway & metro station']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = 'Others'
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  'Others'

# np.nan
change_list = ['nan', 'need 1 tv 55" edge led 4k uhd', 'need one tv', 'hotel tv', 'n.a', 'guestroom tv']
for item in change_list:
    df_train.loc[df_train['customer_job'] == item, 'customer_job'] = np.nan
    df_test.loc[df_test['customer_job'] == item, 'customer_job'] =  np.nan

In [65]:
# 결측값을 'No answer'로 채워주는 함수 정의
def fill_missing_job(df):
    df['customer_job'].fillna('No answer', inplace=True)

# df_train 데이터프레임의 결측값을 채워주기
fill_missing_job(df_train)

# df_test 데이터프레임의 결측값을 채워주기
fill_missing_job(df_test)

In [66]:
# business area
df_train['business_area'][df_train['business_area'].isna()] = 'No answer'
df_train['business_subarea'][df_train['business_subarea'].isna()] = 'No answer'

# business subarea
df_test['business_area'][df_test['business_area'].isna()] = 'No answer'
df_test['business_subarea'][df_test['business_subarea'].isna()] = 'No answer'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['business_area'][df_train['business_area'].isna()] = 'No answer'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['business_subarea'][df_train['business_subarea'].isna()] = 'No answer'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['business_area'][df_test['business_area'].isna()] = 'No answer'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

In [67]:
# 이름 다시 바꾸기
train = df_train.copy()
test = df_test.copy()

In [68]:
# 판매자와 product 융합
dsa = train.groupby(['product_category', 'lead_owner']).sum()['is_converted']
dsa2 = train.groupby(['product_category', 'lead_owner']).count()['is_converted']
dsa3 = pd.merge(dsa, dsa2, how = 'outer', on = ['product_category', 'lead_owner']).reset_index()
dsa3['fan_mae_king'] = dsa3['is_converted_x'] / dsa3['is_converted_y']

# 상위 15개만 선정하기
dsa4 = dsa3.sort_values(['product_category', 'fan_mae_king'], ascending = False).groupby('product_category').head(15)

# 명단 만들기
dsa4['fan_mae_king_yn'] = 1
dsa5 = dsa4[['product_category', 'lead_owner', 'fan_mae_king_yn']]
# train, test에 합치기
train2 = pd.merge(train, dsa5, how = 'left', on = ['product_category', 'lead_owner'])
test2 = pd.merge(test, dsa5, how = 'left', on = ['product_category', 'lead_owner'])

# 없는 값 무조건 0으로 채워야됨
train2['fan_mae_king_yn'][train2['fan_mae_king_yn'].isna()] = 0
test2['fan_mae_king_yn'][test2['fan_mae_king_yn'].isna()] = 0

train = train2.copy()
test = test2.copy()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train2['fan_mae_king_yn'][train2['fan_mae_king_yn'].isna()] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test2['fan_mae_king_yn'][test2['fan_mae_king_yn'].isna()] = 0


In [69]:
# country와 product 융합
da = train.groupby(['customer_country', 'product_category']).sum()['is_converted']
da2 = train.groupby(['customer_country', 'product_category']).count()['is_converted']
da3 = pd.merge(da, da2, how = 'outer', on = ['customer_country', 'product_category']).reset_index()
da3['percentage'] = da3['is_converted_x'] / da3['is_converted_y']

# 그룹 나누기 (cnt = 14 : 75%)
### 0: otherwise 
### 1: cnt >= 14, 확률 > 0
### 2: 확률 > 0.5
### 3: cnt >= 14, 확률 > 0.5
da3['country_to_product'] = 0
da3['country_to_product'][(da3['is_converted_y'] >= 14) & (da3['percentage'] > 0)] = 1
da3['country_to_product'][(da3['percentage'] > 0.5)] = 2
da3['country_to_product'][(da3['is_converted_y'] >= 14) & (da3['percentage'] > 0.5)] = 3

# 기준 변수 생성하기
ctpd = da3[['customer_country', 'product_category', 'country_to_product']]

# train, test에 합치기
train2 = pd.merge(train, ctpd, how = 'left', on = ['customer_country', 'product_category'])
test2 = pd.merge(test, ctpd, how = 'left', on = ['customer_country', 'product_category'])

# 없는 값 무조건 0으로 채워야됨
train2['country_to_product'][train2['country_to_product'].isna()] = 0
test2['country_to_product'][test2['country_to_product'].isna()] = 0

train = train2.copy()
test = test2.copy()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  da3['country_to_product'][(da3['is_converted_y'] >= 14) & (da3['percentage'] > 0)] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  da3['country_to_product'][(da3['percentage'] > 0.5)] = 2
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  da3['country_to_product'][(da3['is_converted_y'] >= 14) & (da3['percentage'] > 0.5)] = 3
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [70]:
# partnership
partner1 = train.groupby(['customer_idx', 'lead_owner']).sum()['is_converted']
partner2 = train.groupby(['customer_idx', 'lead_owner']).count()['is_converted']
partner3 = train.groupby(['customer_idx', 'lead_owner']).sum()['is_converted']/train.groupby(['customer_idx', 'lead_owner']).count()['is_converted']

# merge & arrangae name
pt = pd.concat([partner1, partner2, partner3], axis = 1)
pt.columns = ['true_count', 'count', 'percent']
ppt = pt.reset_index()

# 명단 만들기
ppt2 = ppt[['customer_idx', 'lead_owner']][(ppt['count'] >= 2) & (ppt['percent'] >= 0.5)]
ppt2['partnership'] = 1

# train, test에 합치기
train2 = pd.merge(train, ppt2, how = 'left', on = ['customer_idx', 'lead_owner'])
test2 = pd.merge(test, ppt2, how = 'left', on = ['customer_idx', 'lead_owner'])

# 없는 값 무조건 0으로 채워야됨
train2['partnership'][train2['partnership'].isna()] = 0
test2['partnership'][test2['partnership'].isna()] = 0

train = train2.copy()
test = test2.copy()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train2['partnership'][train2['partnership'].isna()] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test2['partnership'][test2['partnership'].isna()] = 0


In [71]:
# 이동기록 있는 lead owner 2회이상인 사람들의 집단.
asdf = train[['lead_owner', 'response_corporate']].drop_duplicates().groupby('lead_owner').count().reset_index()

# train
train['move_corporate'] = 0
train['move_corporate'][train['lead_owner'].isin(list(asdf[asdf['response_corporate'] != 1]['lead_owner']))] = 1

# test
test['move_corporate'] = 0
test['move_corporate'][train['lead_owner'].isin(list(asdf[asdf['response_corporate'] != 1]['lead_owner']))] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['move_corporate'][train['lead_owner'].isin(list(asdf[asdf['response_corporate'] != 1]['lead_owner']))] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['move_corporate'][train['lead_owner'].isin(list(asdf[asdf['response_corporate'] != 1]['lead_owner']))] = 1


In [72]:
for i in train.columns:
    print(i)
    display(train[i].unique())

bant_submit


array([1.  , 0.75, 0.5 , 0.25, 0.  ])

customer_country


array(['Philippines', 'India', 'Nigeria', 'Saudi Arabia', 'Singapore',
       'Brazil', 'United Arab Emirates', 'South Africa', 'United States',
       'Colombia', 'Mexico', 'Ghana', 'Egypt', 'Ethiopia', 'Australia',
       'Kenya', 'Indonesia', 'Oman', 'Pakistan', 'United Kingdom',
       'Guatemala', 'Panama', 'Canada', 'Bangladesh', 'Papua New Guinea',
       'Qatar', 'Afghanistan', 'Chile', 'Mozambique', 'Türkiye',
       'El Salvador', 'Togo', 'Jordan', 'Iraq', 'Israel', 'Sri Lanka',
       "Korea, Democratic People's Republic of", 'Portugal', 'Mauritania',
       'Uruguay', 'Peru', 'Germany', 'Romania', 'Norway', 'Jamaica',
       'Hungary', 'Poland', 'Czechia', 'Spain', 'Argentina', 'Ecuador',
       'Senegal', 'Hong Kong', 'Malaysia', 'Japan', 'Kuwait', 'Ireland',
       'Albania', 'Greece', 'Algeria', 'Nicaragua', 'Slovenia', 'Italy',
       'Netherlands', 'Dominican Republic', 'France', 'Uganda',
       'Paraguay', 'Namibia', 'Tunisia', 'Puerto Rico', 'Anguilla',
       'Croa

business_unit


array(['AS', 'ID', 'IT', 'Solution', 'CM'], dtype=object)

com_reg_ver_win_rate


array([0.06666667, 0.08888889, 0.04081633, 0.00393701, 0.        ,
       0.05753425, 0.075     , 0.04464286, 0.08433735, 0.0199005 ,
       0.07494867, 0.04347826, 0.03119584, 0.14705882, 0.03305785,
       0.08069164, 0.33333333, 0.04310345, 0.11627907, 0.01351351,
       0.04854369, 0.01818182, 0.01185771, 0.21428571, 0.3902439 ,
       0.03225806, 0.01754386, 0.03278689, 0.05555556, 0.01515152,
       0.46153846, 0.06818182, 0.08333333, 0.26923077, 0.18181818,
       0.23076923, 0.64285714, 0.16666667, 0.01724138, 0.05389222,
       0.04968944, 0.01098901, 0.06779661, 0.03703704, 0.00378788,
       0.36363636, 0.10526316, 0.61538462, 0.08695652, 0.05442177,
       0.05319149, 0.07142857, 0.04      , 0.04166667, 0.02892562,
       0.11842105, 0.04918033, 0.11864407, 0.07324841, 0.02898551,
       0.01960784, 0.02272727, 0.004     , 0.125     , 0.01694915,
       0.12412178, 0.13636364, 0.02020202, 0.11363636, 0.03603604,
       0.06956522, 0.5       , 1.        , 0.25      , 0.44444

customer_idx


array([32160, 23122,  1755, ..., 19249, 40327, 30268])

customer_type


array(['End-Customer', 'Specifier/ Influencer', 'Service Partner',
       'channel partner', 'No answer', 'Solution Eco-Partner', 'Other',
       'Homeowner', 'Developer'], dtype=object)

enterprise


array(['Enterprise', 'SMB'], dtype=object)

historical_existing_cnt


array([0.000e+00, 1.200e+01, 1.440e+02, 3.000e+00, 2.300e+01, 4.700e+01,
       1.000e+00, 1.900e+01, 4.200e+01, 7.500e+01, 1.700e+01, 2.000e+01,
       5.000e+00, 2.000e+00, 1.150e+02, 4.000e+00, 1.600e+01, 6.000e+00,
       3.100e+01, 4.900e+01, 1.000e+01, 1.100e+01, 4.000e+01, 7.000e+00,
       6.100e+01, 4.500e+01, 5.700e+01, 1.750e+02, 1.300e+01, 9.900e+01,
       1.500e+01, 8.000e+00, 2.100e+01, 5.600e+01, 6.660e+02, 1.220e+02,
       3.240e+02, 2.200e+01, 8.000e+01, 3.600e+01, 2.800e+01, 1.720e+02,
       6.600e+01, 3.200e+01, 5.300e+01, 3.800e+01, 1.890e+02, 5.800e+01,
       2.700e+01, 1.060e+02, 1.680e+02, 9.000e+00, 4.600e+01, 4.800e+01,
       5.200e+01, 7.300e+01, 9.200e+01, 6.000e+01, 3.900e+01, 1.800e+01,
       1.400e+01, 2.120e+02, 4.720e+02, 3.900e+02, 6.900e+01, 2.330e+02,
       2.900e+01, 3.000e+01, 2.400e+01, 7.800e+01, 1.070e+02, 4.100e+01,
       1.660e+02, 5.900e+01, 4.400e+01, 1.140e+02, 3.700e+01, 3.500e+01,
       1.580e+02, 8.500e+01, 7.700e+01, 2.600e+01, 

id_strategic_ver


array([0., 1.])

it_strategic_ver


array([0., 1.])

idit_strategic_ver


array([0., 1.])

customer_job


array(['Purchasing', 'Media_and_Communication', 'Engineering',
       'Entrepreneurship', 'Consulting', 'Program_and_Project_Management',
       'Sales', 'Others', 'Operations', 'No answer', 'Administrative',
       'Business_Development', 'Information_Technology', 'Accounting',
       'Education', 'Healthcare_Services', 'Human_Resources', 'Support',
       'Finance', 'Marketing', 'Legal', 'Quality_Assurance',
       'Arts_and_Design', 'Real_Estate',
       'Military_and_Protective_Services', 'Research',
       'Product_Management', 'Community_and_Social_Services',
       'consultant', 'purchaser', 'lead', 'purchasing agent',
       'information technology\u200b', 'surgery professional\u200b',
       'medical solution provider\u200b'], dtype=object)

lead_desc_length


array([  62,   96,   56,   44,   97, 1114,  420,  205,  103,  252,   90,
         84,   67,  210,   46,  166,  129,    3,   57,   80,   43,  407,
         75,   66,   71,  113,  133,  142,   76,  334,  197,   33,  171,
         69,   30,  181,   63,  199,  167,   32,   26,   37,   68,   23,
         82,  526,   38,   41,   35,   77,  137,   13,    9,    1,    2,
         12,    8,  283,  164,   85,   94,  203,  109,   29,   17,  146,
        127,   14,   89,  285,  267,  143,  253,  300,   21,  141,   60,
        214,  139,   54,  632,  264,   86,  104,   49,   61,   24,  151,
        198,   11,  162,  389,  119,  155,  222,  116,  180,  218,   65,
        235,  154,  360,  189,  156,  114,   99,  306, 1072,  238,    6,
         47,   18,   98,  328,  282,  191,   52,  357,  313,  451,  212,
        280,   34,  165,  111,  115,  120,   88,  217,  365,  134,  172,
        100,  230,  101,  219,   50,   31,  216,  152,  319,   95,   58,
         79,   39,   73,  126,   40,  223,   22,   

inquiry_type


array(['quotation or purchase consultation', 'others',
       'usage or technical consultation', 'request for partnership',
       'no_answer', 'request a demo', 'customer suggestions',
       'oem/odm request'], dtype=object)

product_category


array(['multi-split', 'single-split', 'vrf', 'chiller', 'others',
       'no_answer', 'software solution', 'ventilation', 'heating',
       'standard signage', 'high brightness signage',
       'interactive signage', 'video wall signage', 'led signage',
       'hotel tv', 'signage care solution', 'oled signage',
       'one:quick series', 'special signage', 'accessories',
       'pro:centric', 'webos', 'hospital tv', 'monitor', 'laptop',
       'projector', 'all cloud devices', 'all medical displays',
       'energy storage system', 'surgical monitors', 'scroll compressor'],
      dtype=object)

response_corporate


array(['LGEPH', 'LGEIL', 'LGEAF', 'LGESJ', 'LGESL', 'LGESP', 'LGEGF',
       'LGESA', 'LGEUS', 'LGECB', 'LGEMS', 'LGEEG', 'LGEEF', 'LGEAP',
       'LGEIN', 'LGEUK', 'LGEKR', 'LGEPS', 'LGECI', 'LGECL', 'LGETK',
       'LGELF', 'LGEPT', 'LGEPR', 'LGEDG', 'LGERO', 'LGEMK', 'LGEPL',
       'LGECZ', 'LGEES', 'LGEAR', 'LGEHK', 'LGEML', 'LGEJP', 'LGEHS',
       'LGEAS', 'LGEYK', 'LGEIS', 'LGEBN', 'LGEFS', 'LGESW', 'LGEMC',
       'LGEAG', 'LGEEB', 'LGETH', 'LGEVH', 'LGECH', 'LGELA', 'LGETT',
       'LGERA', 'LGEUR', 'LGEIR'], dtype=object)

expected_timeline


array(['less_than_3_months', 'no_answer', '3_months_6_months',
       '9_months_1_year', 'more_than_a_year', '6_months_9_months',
       'others'], dtype=object)

ver_cus


array([1, 0])

ver_pro


array([0, 1])

ver_win_rate_x


array([3.07928766e-03, 0.00000000e+00, 5.71955128e-04, 7.16773438e-04,
       6.04403367e-05, 5.43222432e-04, 2.98310405e-04, 9.65915661e-05,
       1.18272889e-03, 2.15363418e-04, 2.31593813e-06, 1.27659029e-05,
       2.58895523e-05])

ver_win_ratio_per_bu


array([0.02684564, 0.        , 0.06456612, 0.03448276, 0.04862953,
       0.05147059, 0.06407035, 0.02263374, 0.12857143, 0.13114754,
       0.03548387, 0.02012072, 0.07941176, 0.02272727, 0.04984026,
       0.01158301, 0.07134503, 0.03691275, 0.06092437, 0.28571429,
       0.22727273, 0.05357143, 0.03157895, 0.02877698])

business_area


array(['corporate / office', 'No answer', 'education',
       'hotel & accommodation', 'hospital & health care',
       'special purpose', 'residential (home)', 'government department',
       'retail', 'factory', 'power plant / renewable energy',
       'transportation', 'public facility'], dtype=object)

business_subarea


array(['Engineering', 'Advertising', 'Construction', 'IT/Software',
       'No answer', 'Manufacturing', 'Energy', 'Developer/Property',
       'Entertainment', 'Agriculture', 'Pharmaceutical', 'Others',
       'Banking', 'Consulting', 'Healthcare', 'Finance',
       'Broadcasting & Media', 'Distribution Center', 'Law Firm',
       'Logistics', 'Telecommunication', 'Aerospace', 'Network/Cabling',
       'Insurance', 'Institute & Academy', 'Hotel',
       'K12 Kindergarten & Schools', 'Hospital',
       'Mixed-use (Multi Complex)', 'Telecom Base Station / Data, Call',
       'Botanical Garden / Green House',
       'Higher Education (College & University)', 'Clinic',
       'General Hospital', 'Fitness', 'LTC(Long-Term Care)',
       'Villa / Single-Family Home', 'Apartment', 'Townhouse',
       'Officetel', 'General Government Office', 'Military',
       'Police / Fire station', 'Welfare Facilities', 'Other Stores',
       'Luxury(Watch/Jewelry Shop)', 'Electronics & Telco',
       'Sp

lead_owner


array([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
         11,   12,   13,   14,   15,   16,   17,   18,   19,   20,   21,
         22,   23,   24,   25,   26,   27,   28,   29,   30,   31,   32,
         33,   34,   35,   36,   37,   38,   39,   40,   41,   42,   43,
         44,   45,   46,   47,   48,   49,   50,   51,   52,   53,   54,
         55,   56,   57,   58,   59,   60,   61,   62,   63,   64,   65,
         66,   67,   68,   69,   70,   71,   72,   73,   74,   75,   76,
         77,   78,   79,   80,   81,   82,   83,   84,   85,   86,   87,
         88,   89,   90,   91,   92,   93,   94,   95,   96,   97,   98,
         99,  100,  101,  102,  103,  104,  105,  106,  107,  108,  109,
        110,  111,  112,  113,  114,  115,  117,  119,  120,  116,  124,
        125,  126,  128,  130,  134,  133,  140,  145,  146,  147,  148,
        149,  150,  151,  152,  153,  154,  155,  156,  157,  158,  159,
        160,  161,  162,  163,  164,  165,  166,  1

is_converted


array([ True, False])

continent


array(['Asia', 'Africa', 'South America', 'North America', 'Oceania',
       'Europe'], dtype=object)

fan_mae_king_yn


array([0., 1.])

country_to_product


array([1, 2, 3, 0])

partnership


array([0., 1.])

move_corporate


array([0, 1])

In [73]:
# 임시 파일 저장
train.to_csv('new_imputation_train_ddd.csv', index = False)
test.to_csv('new_imputation_test_ddd.csv', index = False)