In [1]:
import pandas as pd
import numpy as np

from sqlalchemy import create_engine

In [2]:
path_to_db = '/datasets/telecomm_csi.db'
engine = create_engine(f'sqlite:///{path_to_db}', echo = False)

In [3]:
query = """
SELECT u.user_id,
       u.lt_day,
       CASE 
           WHEN u.lt_day <= 365 THEN 'Новый'
           ELSE 'Старый'
           END AS is_new,
       u.age,
       CASE 
           WHEN u.gender_segment == 1 THEN 'Женщина'
           WHEN u.gender_segment == 0 THEN 'Мужчина'
           ELSE 'z'
           END AS gender_segment,
       u.os_name,
       u.cpe_type_name,
       location.country,
       location.city,
       SUBSTR(age_segment.title, 4) AS age_segment,
       SUBSTR(traffic_segment.title, 4) AS traffic_segment,
       SUBSTR(lifetime_segment.title, 4) AS lifetime_segment,
       u.nps_score,
       CASE 
           WHEN u.nps_score >= 9 THEN 'Cторонник'
           WHEN u.nps_score >= 7 THEN 'Нейтрал'
           ELSE 'Критик'
           END AS nps_group
FROM user AS u
JOIN location ON u.location_id = location.location_id
JOIN age_segment ON u.age_gr_id = age_segment.age_gr_id
JOIN traffic_segment ON u.tr_gr_id = traffic_segment.tr_gr_id
JOIN lifetime_segment ON u.lt_gr_id = lifetime_segment.lt_gr_id;
"""

In [4]:
df = pd.read_sql(query, engine)
df.sample(10)

Unnamed: 0,user_id,lt_day,is_new,age,gender_segment,os_name,cpe_type_name,country,city,age_segment,traffic_segment,lifetime_segment,nps_score,nps_group
16998,AVHDET,2022,Старый,45.0,Мужчина,ANDROID,SMARTPHONE,Россия,СанктПетербург,45-54,25-30,36+,10,Cторонник
39387,C18TEP,1089,Старый,46.0,Женщина,PROPRIETARY,PHONE,Россия,Архангельск,45-54,25-30,36+,10,Cторонник
74258,DTRA0Y,485,Старый,40.0,Мужчина,ANDROID,SMARTPHONE,Россия,Ярославль,35-44,1-5,13-24,10,Cторонник
378885,TLQT6Y,233,Новый,25.0,Женщина,ANDROID,SMARTPHONE,Россия,Москва,25-34,1-5,7-12,1,Критик
289895,OZRRLO,4124,Старый,47.0,Мужчина,ANDROID,SMARTPHONE,Россия,Ульяновск,45-54,5-10,36+,9,Cторонник
443148,WXL6ZG,1225,Старый,26.0,Мужчина,IOS,SMARTPHONE,Россия,Пенза,25-34,20-25,36+,7,Нейтрал
305443,PSWFEV,263,Новый,55.0,Мужчина,ANDROID,SMARTPHONE,Россия,Курск,55-64,1-5,7-12,10,Cторонник
374655,TDQJ9G,1548,Старый,50.0,Женщина,ANDROID,SMARTPHONE,Россия,Москва,45-54,1-5,36+,1,Критик
410763,V8X7ZF,5069,Старый,51.0,Женщина,ANDROID,SMARTPHONE,Россия,Казань,45-54,0.1-1,36+,10,Cторонник
182438,JFE0VI,342,Новый,22.0,Мужчина,ANDROID,SMARTPHONE,Россия,Калининград,16-24,100+,7-12,10,Cторонник


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 502493 entries, 0 to 502492
Data columns (total 14 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   user_id           502493 non-null  object 
 1   lt_day            502493 non-null  int64  
 2   is_new            502493 non-null  object 
 3   age               501939 non-null  float64
 4   gender_segment    502493 non-null  object 
 5   os_name           502493 non-null  object 
 6   cpe_type_name     502493 non-null  object 
 7   country           502493 non-null  object 
 8   city              502493 non-null  object 
 9   age_segment       502493 non-null  object 
 10  traffic_segment   502493 non-null  object 
 11  lifetime_segment  502493 non-null  object 
 12  nps_score         502493 non-null  int64  
 13  nps_group         502493 non-null  object 
dtypes: float64(1), int64(2), object(11)
memory usage: 53.7+ MB


In [6]:
df.to_csv('telecomm_csi_tableau.csv', index=False)

### Укажите ссылку на дашборд на сайте Tableau Public:

https://public.tableau.com/app/profile/nick5546/viz/Telecomm_project_mk3_MaskoN_/gender?publish=yes

### Укажите ссылку на pdf-файл с презентацией:

https://disk.yandex.ru/i/bKdUeaO-_B95uA