In [5]:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# 데이터 로드
@st.cache_data
def load_data():
    data = pd.read_csv("DS_Customer_data_modified2.csv")
    return data

data = load_data()

# 데이터 전처리
def preprocess_data(df):
    features = ['연령', '부양자수', '추천횟수', '데이터사용량', '유지기간', '총과금액', '고객월평균납부액']
    X = df[features]
    y = df['고객이탈여부'].map({'Yes': 1, 'No': 0})
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y, features, scaler

X, y, features, scaler = preprocess_data(data)

# 모델 학습
@st.cache_resource
def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

model = train_model(X, y)

# Streamlit 앱
st.title('고객 이탈 예측 대시보드')

# 사용자 입력
st.header('고객 정보 입력')
age = st.slider('연령', 18, 100, 30)
dependents = st.slider('부양자수', 0, 5, 0)
referrals = st.slider('추천횟수', 0, 10, 0)
data_usage = st.slider('데이터사용량 (GB)', 0, 100, 10)
tenure = st.slider('유지기간 (개월)', 0, 84, 12)
total_charges = st.number_input('총과금액', min_value=0, value=1000000)
monthly_charges = st.number_input('고객월평균납부액', min_value=0, value=50000)

# 예측
if st.button('이탈 확률 예측'):
    input_data = np.array([[age, dependents, referrals, data_usage, tenure, total_charges, monthly_charges]])
    input_data_scaled = scaler.transform(input_data)
    
    prediction = model.predict_proba(input_data_scaled)[0][1]
    
    st.header('예측 결과')
    st.write(f'이 고객의 이탈 확률은 {prediction:.2%}입니다.')
    
    # 시각화
    fig, ax = plt.subplots()
    ax.bar(['유지', '이탈'], [1-prediction, prediction])
    ax.set_ylim(0, 1)
    ax.set_ylabel('확률')
    st.pyplot(fig)

    # 특성 중요도
    importances = model.feature_importances_
    feature_imp = pd.DataFrame({'feature': features, 'importance': importances})
    feature_imp = feature_imp.sort_values('importance', ascending=False)

    st.subheader('특성 중요도')
    fig, ax = plt.subplots()
    ax.bar(feature_imp['feature'], feature_imp['importance'])
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    st.pyplot(fig)

2024-11-14 06:50:18.608 No runtime found, using MemoryCacheStorageManager
