In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 그래프 기본 테마 설정
# https://coldbrown.co.kr/2023/07/%ED%8C%8C%EC%9D%B4%EC%8D%AC-%EC%8B%A4%EC%A0%84%ED%8E%B8-08-seaborn-sns-set%EC%9D%84-%ED%86%B5%ED%95%B4-%EC%8A%A4%ED%83%80%EC%9D%BC-%EC%84%A4%EC%A0%95%ED%95%98%EA%B8%B0/
sns.set()

# 그래프 기본 설정
plt.rcParams['font.family'] = 'Malgun Gothic'
# plt.rcParams['font.family'] = 'AppleGothic'
plt.rcParams['figure.figsize'] = 12, 6
plt.rcParams['font.size'] = 14
plt.rcParams['axes.unicode_minus'] = False


# 복잡한 통계 처리를 위한 라이브러리
from scipy import stats

In [19]:
df1 = pd.read_csv('../data/total_총정리_이유NaN 존재.csv')

In [21]:
df1

Unnamed: 0,고객ID,위도,경도,성별,고령자여부,배우자여부,부양가족여부,가입개월수,전화서비스가입여부,복수회선여부,...,가입혜택,장거리통화요금,월평균다운로드용량(GB),프리미엄기술지원여부,음악스트리밍이용여부,무제한데이터이용여부,총환불액,총초과데이터요금,총장거리통화요금,총납부금
0,0002-ORFBO,34.827662,-118.999073,False,False,True,False,9,True,False,...,No,42.39,16,True,False,True,0.00,0,381.51,974.81
1,0003-MKNFE,34.162515,-118.203869,True,False,False,False,9,True,True,...,No,10.69,10,False,True,False,38.33,10,96.21,610.28
2,0004-TLHLJ,33.645672,-117.922613,True,False,False,False,4,True,False,...,Offer E,33.65,30,False,False,True,0.00,0,134.60,415.45
3,0011-IGKFF,38.014457,-122.115432,True,True,True,False,13,True,False,...,Offer D,27.82,4,False,False,True,0.00,0,361.66,1599.51
4,0013-EXCHZ,34.227846,-119.079903,False,True,True,False,3,True,False,...,No,7.38,11,True,False,True,0.00,0,22.14,289.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,9987-LUTYD,32.759327,-116.997260,False,False,False,False,13,True,False,...,Offer D,46.68,59,True,True,True,0.00,0,606.84,1349.74
7039,9992-RRAMN,37.734971,-120.954271,True,False,True,False,22,True,True,...,Offer D,16.20,17,False,True,True,0.00,0,356.40,2230.10
7040,9992-UJOEL,39.108252,-123.645121,True,False,False,False,2,True,False,...,Offer E,18.62,51,False,False,True,0.00,0,37.24,129.99
7041,9993-LHIEB,33.001813,-117.263628,True,False,True,False,67,True,False,...,Offer A,2.12,58,True,True,True,0.00,0,142.04,4769.69


### 배우자가 있으면 월별 계약을 덜 한다.

### 월별 계약인 사람들은 고객 만족도가 낮다.

### 프리미엄 기술지원 여부에 따른 계약유형

### 무제한 데이터 이용 여부에 따른 계약유형

### 친구추천여부, 친구추천 횟수에 따른 계약유형

In [27]:
df1['배우자여부']

0        True
1       False
2       False
3        True
4        True
        ...  
7038    False
7039     True
7040    False
7041     True
7042     True
Name: 배우자여부, Length: 7043, dtype: bool

In [29]:
df1['고객만족도점수']

0       3
1       5
2       1
3       1
4       1
       ..
7038    4
7039    1
7040    5
7041    3
7042    3
Name: 고객만족도점수, Length: 7043, dtype: int64

In [23]:
df1['프리미엄기술지원여부']

0        True
1       False
2       False
3       False
4        True
        ...  
7038     True
7039    False
7040    False
7041     True
7042    False
Name: 프리미엄기술지원여부, Length: 7043, dtype: bool

In [31]:
df1['무제한데이터이용여부']

0        True
1       False
2        True
3        True
4        True
        ...  
7038     True
7039     True
7040     True
7041     True
7042     True
Name: 무제한데이터이용여부, Length: 7043, dtype: bool

In [33]:
df1['친구추천여부']

0        True
1       False
2       False
3        True
4        True
        ...  
7038    False
7039     True
7040    False
7041     True
7042     True
Name: 친구추천여부, Length: 7043, dtype: bool

In [37]:
df1['친구추천횟수'].max()

11

In [45]:
# 친구추천횟수 구간화(Binning)
def bin_recommend(x):
    if x <= 3:
        return '0-3'
    elif x <= 7:
        return '4-7'
    elif x <= 11:
        return '8-11'

# 새 필드 추가
df1['친구추천횟수(bin)'] = df1['친구추천횟수'].apply(bin_recommend)

# 계약기간유형별 친구추천횟수(bin)별 고객 수 집계
result = df1.groupby(['계약기간유형', '친구추천횟수(bin)']).size().reset_index(name='고객수')

# 결과 출력
result

Unnamed: 0,계약기간유형,친구추천횟수(bin),고객수
0,Month-to-month,0-3,3336
1,Month-to-month,4-7,330
2,Month-to-month,8-11,209
3,One year,0-3,1060
4,One year,4-7,255
5,One year,8-11,158
6,Two year,0-3,1002
7,Two year,4-7,384
8,Two year,8-11,309
