In [85]:
import numpy as np
import pandas as pd
import os
import pickle


In [86]:
def preprocess_data(df_orgin):
    """데이터 전처리"""
    # 컬럼명 변경
    df = df_orgin.copy()
    
    # Churn status 컬럼명 변경
    df.rename(columns={'Churn status': 'Churn'}, inplace=True)

    # 10살 간격으로 연령대 컬럼 생성
    min_age = df['Age'].min()
    max_age = df['Age'].max()

    bin_from = int(np.floor(min_age / 10) * 10)
    bin_to = int(np.ceil(max_age / 10) * 10)

    bins = list(range(bin_from, bin_to + 10, 10))
    bins.append(99)
    labels = [i for i in bins[:-1]]

    df['Age_group'] = pd.cut(x=df['Age'], bins=bins, labels=labels, right=False)
    df.drop('Age', axis=1, inplace=True)

    # 지역별 가격 알아오기
    df_region_price = pd.read_csv('./data/region_price.csv')

    # 사용자 데이터와 지역별 평균 요금 데이터 결합
    df_merged = pd.merge(df, df_region_price, on='Region', how='left')


    # 실제 구독 요금 계산 컬럼 생성
    conditions = [
        (df_merged['Subscription Plan'] == 'Basic'),
        (df_merged['Subscription Plan'] == 'Standard'),
        (df_merged['Subscription Plan'] == 'Premium')
    ]
    choices = [
        df_merged['avg_region_price_basic'],
        df_merged['avg_region_price_standard'],
        df_merged['avg_region_price_premium']
    ]

    df_merged['User_Subscription_Price'] = np.select(conditions, choices, default=0)

    # 새로운 복합 컬럼 생성
    epsilon = 1e-6 # 0으로 나누는 것을 방지하기 위한 작은 값

    # 1. 소득 대비 요금
    df_merged['Price_Burden_Ratio'] = df_merged['User_Subscription_Price'] / (df_merged['Monthly Income ($)'] + epsilon)

    # 2. 요금별 시청시간(1달러당 얼마나 보는지)
    df_merged['Watch_Time_per_Dollar'] = df_merged['Daily Watch Time (Hours)'] / (df_merged['User_Subscription_Price'] + epsilon)

    # 3. 요금별 만족도
    df_merged['Satisfaction_per_Dollar'] = df_merged['Customer Satisfaction Score (1-10)'] / (df_merged['User_Subscription_Price'] + epsilon)

    # 4. 월 평균 문의 수
    df_merged['Queries_per_Month'] = df_merged['Support Queries Logged'] / (df_merged['Subscription Length (Months)'] + epsilon)

    # 무한대 값 처리
    df_merged.replace([np.inf, -np.inf], 0, inplace=True)

    return df_merged

In [92]:
df = pd.read_csv('./data/our_class.csv')
df = preprocess_data(df)

In [93]:
def predict_churn(model, df):
    """이탈 확률 예측"""

    x = df.drop('Churn', axis=1)
    y = df['Churn']
    # loaded_pred = loaded_pipeline.predict(x)
    proba = model.predict_proba(x)
    
    
    return proba
    # return churn_proba

In [94]:
target_dir = './model'
pipeline_path = os.path.join(target_dir, 'churn_lgbm_pipeline.pkl')

with open(pipeline_path, 'rb') as f:
    model = pickle.load(f)


predictions = predict_churn(model, df)



In [96]:
predictions[:, 1].round(1), predictions

(array([0.9, 1. , 1. , 0.7, 0.3, 0.2, 1. , 0.5, 1. , 0.3, 1. , 0.5, 0.4,
        0.4, 0.9, 0.6, 1. , 0.2, 0.9, 0.6, 0.3, 0.6, 0.3, 0.5, 0.5]),
 array([[0.07606951, 0.92393049],
        [0.00832093, 0.99167907],
        [0.01406646, 0.98593354],
        [0.32919035, 0.67080965],
        [0.68174047, 0.31825953],
        [0.79201018, 0.20798982],
        [0.0347035 , 0.9652965 ],
        [0.54376006, 0.45623994],
        [0.01371043, 0.98628957],
        [0.6535983 , 0.3464017 ],
        [0.01691439, 0.98308561],
        [0.46237459, 0.53762541],
        [0.64812469, 0.35187531],
        [0.5838318 , 0.4161682 ],
        [0.0546582 , 0.9453418 ],
        [0.37562441, 0.62437559],
        [0.04391575, 0.95608425],
        [0.80628179, 0.19371821],
        [0.13679344, 0.86320656],
        [0.44559582, 0.55440418],
        [0.72022829, 0.27977171],
        [0.35605115, 0.64394885],
        [0.71005842, 0.28994158],
        [0.51648362, 0.48351638],
        [0.50133832, 0.49866168]]))

In [97]:
feature_importance = pd.DataFrame({
    'importance': [238, 221, 22, 20, 20, 20, 19, 19, 17, 16, 16, 15, 15, 12, 11, 11, 11, 9, 9, 8, 7, 7],
    'feature': [
        'Subscription Length (Months)', 
        'Customer Satisfaction Score (1-10)', 
        'Daily Watch Time (Hours)', 
        'Engagement Rate (1-10)', 
        'Device Used Most Often', 
        'Genre Preference', 
        'Region', 
        'Payment History (On-Time/Delayed)', 
        'Subscription Plan', 
        'Support Queries Logged', 
        'Monthly Income ($)', 
        'Promotional Offers Used', 
        'Number of Profiles Created', 
        'Age_group', 
        'avg_region_price_basic', 
        'avg_region_price_standard', 
        'avg_region_price_premium', 
        'User_Subscription_Price', 
        'Price_Burden_Ratio', 
        'Watch_Time_per_Dollar', 
        'Satisfaction_per_Dollar', 
        'Queries_per_Month'
    ]
}).sort_values('importance', ascending=False)

In [101]:
feature_importance.head(8).sort_values('importance', ascending=False)

Unnamed: 0,importance,feature
0,238,Subscription Length (Months)
1,221,Customer Satisfaction Score (1-10)
2,22,Daily Watch Time (Hours)
3,20,Engagement Rate (1-10)
4,20,Device Used Most Often
5,20,Genre Preference
6,19,Region
7,19,Payment History (On-Time/Delayed)


In [102]:
import random

In [103]:
def generate_random_name():
    """
    형용사 + 동물의 규칙으로 랜덤 이름을 생성합니다. (확장 버전)
    """
    # 1. 확장된 형용사 리스트
    adjectives = [
        "아주빠른", "느릿느릿한", "용감한", "영리한", "밝게빛나는", 
        "어두컴컴한", "재미있는", "힘센", "작은", "거대한",
        "우아한", "날쌘", "똑똑한", "조용한", "시끄러운",
        "신비로운", "차분한", "활기찬", "게으른", "사나운",
        "온순한", "기묘한", "반짝이는", "고요한", "매혹적인",
        "현명한", "자유로운", "민첩한", "단단한", "부드러운",
        "차가운", "뜨거운", "황금빛", "은빛", "투명한",
        "몽환적인", "천진난만한", "장난꾸러기", "눈부신", "기발한",
        "까만", "하얀", "붉은", "푸른", "노란"
    ]
    
    # 2. 확장된 동물 리스트
    animals = [
        "나무늘보", "호랑이", "코끼리", "치타", "펭귄", 
        "부엉이", "고래", "카멜레온", "사자", "토끼",
        "여우", "코알라", "달팽이", "독수리", "늑대",
        "돌고래", "팬더", "기린", "하마", "오리너구리",
        "퓨마", "매미", "개미", "벌새", "알파카",
        "이구아나", "악어", "앵무새", "문어", "고슴도치",
        "사슴", "곰", "표범", "다람쥐", "양",
        "염소", "수달", "캥거루", "바다표범", "비둘기",
        "뱀", "제비", "참새", "두더지", "낙타",
        "코뿔소"
    ]
    
    adjective = random.choice(adjectives)
    animal = random.choice(animals)
    
    random_name = adjective + animal
    
    return random_name

In [106]:
x = []
for _ in range(25):
    # x.append(generate_random_name())
    print(generate_random_name())



현명한코끼리
반짝이는토끼
차분한뱀
황금빛퓨마
영리한곰
노란다람쥐
아주빠른사자
아주빠른호랑이
매혹적인참새
천진난만한기린
기묘한코뿔소
힘센다람쥐
은빛비둘기
작은뱀
작은수달
노란나무늘보
기발한알파카
뜨거운달팽이
용감한여우
사나운독수리
자유로운매미
자유로운수달
힘센여우
투명한앵무새
차분한코알라


In [1]:
!pip install chardet

