In [1]:
import pandas as pd
import numpy as np

**Загрузка данных**

In [2]:
general_df = pd.read_csv('DS/general.csv', sep=',')
traffic_df = pd.read_csv('DS/traffic.csv', sep=',')
recharge_df = pd.read_csv('DS/recharge.csv', sep=',')
balance_df = pd.read_csv('DS/balance.csv', sep=',')

## Описание данных

## general
общая информацию об абоненте (устройства, тарифные планы, канал продаж).

#### Признаки
- ***client_id*** - идентификатор клиента
- ***date_reg*** - дата активации
- ***market*** - 
- ***group*** - 
- ***archetype*** - 
- ***hardware*** - аппаратное обеспечение
- ***channel*** - канал продаж
- ***dealer*** - 
- ***phone*** - телефон
- ***type_t*** - тарифные планы
- ***device*** - устройство 


- ***churn*** - ушедшие абоненты. Целевая переменная 

In [3]:
general_df.head()

Unnamed: 0,client_id,date_reg,market,group,archetype,hardware,channel,dealer,phone,type_t,device,churn
0,09777552-cc75-4f03-9438-11c3a7480f97,2016-05-10 00:00:00.0,R25,PP1_9,PP2_0,PPT1,C1_22,C2_22,D2254,DT1_6,DT2_5,0.0
1,5fbe2ea9-0c80-4164-b703-416b85c23d8c,2016-04-19 00:00:00.0,R25,PP1_9,PP2_0,PPT1,C1_6,C2_0,,,,1.0
2,8cbd48c0-8f90-4f08-bef7-8f09d9924e55,2016-05-19 00:00:00.0,R25,PP1_9,PP2_0,PPT1,C1_6,C2_0,D2594,DT1_6,DT2_5,0.0
3,ffda8d15-cd38-45c3-84ba-29a9811ed79e,2016-05-28 00:00:00.0,R25,PP1_9,PP2_0,PPT1,C1_22,C2_11,D3131,DT1_6,DT2_5,1.0
4,1f9618ad-a980-4110-af32-9741e491a728,2016-05-23 00:00:00.0,R4,PP1_9,PP2_0,PPT1,C1_28,C2_21,D454,DT1_6,DT2_5,0.0


In [4]:
general_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240000 entries, 0 to 239999
Data columns (total 12 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   client_id  240000 non-null  object 
 1   date_reg   240000 non-null  object 
 2   market     240000 non-null  object 
 3   group      240000 non-null  object 
 4   archetype  240000 non-null  object 
 5   hardware   240000 non-null  object 
 6   channel    240000 non-null  object 
 7   dealer     240000 non-null  object 
 8   phone      225165 non-null  object 
 9   type_t     225165 non-null  object 
 10  device     225165 non-null  object 
 11  churn      240000 non-null  float64
dtypes: float64(1), object(11)
memory usage: 22.0+ MB


In [5]:
general_df.shape

(240000, 12)

In [6]:
# количество пропущеных значений категориальных признаков
(general_df.isnull().sum()).sort_values(ascending=False)

device       14835
type_t       14835
phone        14835
churn            0
dealer           0
channel          0
hardware         0
archetype        0
group            0
market           0
date_reg         0
client_id        0
dtype: int64

In [7]:
# замена пропущенных значений ктегориальных признаков на отдельную сущность 'NAN'
general_df = general_df.fillna('NAN')

In [8]:
# количество пропущеных значений категориальных признаков после замены
(general_df.isnull().sum()).sort_values(ascending=False)

churn        0
device       0
type_t       0
phone        0
dealer       0
channel      0
hardware     0
archetype    0
group        0
market       0
date_reg     0
client_id    0
dtype: int64

In [9]:
# приведение даты активаци к типу дата
general_df['date_reg'] = pd.to_datetime(general_df['date_reg'], format="%Y-%m-%d")

# приведение целевой переменной к целому типу
general_df['churn'] = general_df['churn'].astype('int64')

In [10]:
general_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240000 entries, 0 to 239999
Data columns (total 12 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   client_id  240000 non-null  object        
 1   date_reg   240000 non-null  datetime64[ns]
 2   market     240000 non-null  object        
 3   group      240000 non-null  object        
 4   archetype  240000 non-null  object        
 5   hardware   240000 non-null  object        
 6   channel    240000 non-null  object        
 7   dealer     240000 non-null  object        
 8   phone      240000 non-null  object        
 9   type_t     240000 non-null  object        
 10  device     240000 non-null  object        
 11  churn      240000 non-null  int64         
dtypes: datetime64[ns](1), int64(1), object(10)
memory usage: 22.0+ MB


Всего 240000 абонентов.  

Есть пропущенные значения у признаков device, type_t, phone.   
Выделяем пропущенные значения как отдельную сущность NAN

In [11]:
# распределение целевой переменной 
general_df['churn'].value_counts()

0    141748
1     98252
Name: churn, dtype: int64

## traffic
Количество/длительность звонков (входящих и исходящих), количество смс, дата.

#### Признаки
- ***client_id*** - идентификатор клиента
- ***day*** - номер дня ?? 
- ***call_in_num*** - количество входящих звонков 
- ***call_out_num*** - количество исходящих звонков
- ***call_in_dur*** - продолжительность входящих звонков
- ***call_out_dur*** - продолжительность исходящих звонков
- ***sms_out_num*** - исходящие СМС
- ***gprs*** - gprs

In [12]:
traffic_df.head()

Unnamed: 0,client_id,day,call_in_num,call_out_num,call_in_dur,call_out_dur,sms_out_num,gprs
0,09777552-cc75-4f03-9438-11c3a7480f97,7,0.0,3.0,0.0,418.0,0.0,0.0
1,09777552-cc75-4f03-9438-11c3a7480f97,1,0.0,1.0,0.0,1558.0,0.0,0.0
2,09777552-cc75-4f03-9438-11c3a7480f97,13,0.0,1.0,0.0,52.0,0.0,0.0
3,09777552-cc75-4f03-9438-11c3a7480f97,11,0.0,1.0,0.0,84.0,1.0,0.0
4,09777552-cc75-4f03-9438-11c3a7480f97,3,0.0,2.0,0.0,211.0,4.0,0.0


In [13]:
traffic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3726461 entries, 0 to 3726460
Data columns (total 8 columns):
 #   Column        Dtype  
---  ------        -----  
 0   client_id     object 
 1   day           int64  
 2   call_in_num   float64
 3   call_out_num  float64
 4   call_in_dur   float64
 5   call_out_dur  float64
 6   sms_out_num   float64
 7   gprs          float64
dtypes: float64(6), int64(1), object(1)
memory usage: 227.4+ MB


In [14]:
traffic_df.shape

(3726461, 8)

In [15]:
# Количество уникальных клиентов в traffic_df
len(traffic_df[['client_id']].stack().value_counts()) 

240000

## recharge
Аггрегированная по дням информация о пополнениях в привязке к абоненту.

In [16]:
recharge_df.head()

Unnamed: 0,client_id,day,recharges
0,6cac2aeb-1816-498f-ad36-c1f27ee3044a,17.0,333.704
1,f1929358-fd06-45cf-a1e5-f5546ad3997a,17.0,634.0376
2,f1929358-fd06-45cf-a1e5-f5546ad3997a,0.0,734.1488
3,f1929358-fd06-45cf-a1e5-f5546ad3997a,18.0,333.704
4,db6100d4-5e2a-47f5-9e9c-864b875e4984,0.0,6.67408


In [17]:
recharge_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 467676 entries, 0 to 467675
Data columns (total 3 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   client_id  467676 non-null  object 
 1   day        467676 non-null  float64
 2   recharges  467676 non-null  float64
dtypes: float64(2), object(1)
memory usage: 10.7+ MB


## balance
Информация о балансе абонентов на конец каждого дня.

In [18]:
balance_df.head()

Unnamed: 0,client_id,day,balance
0,c602c1cf-71d4-413e-a8fe-c9c76abbb14f,0.0,3225.805111
1,c602c1cf-71d4-413e-a8fe-c9c76abbb14f,1.0,3114.570222
2,c602c1cf-71d4-413e-a8fe-c9c76abbb14f,2.0,3003.335333
3,c602c1cf-71d4-413e-a8fe-c9c76abbb14f,3.0,2892.100443
4,c602c1cf-71d4-413e-a8fe-c9c76abbb14f,4.0,2780.865554


In [19]:
balance_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7200000 entries, 0 to 7199999
Data columns (total 3 columns):
 #   Column     Dtype  
---  ------     -----  
 0   client_id  object 
 1   day        float64
 2   balance    float64
dtypes: float64(2), object(1)
memory usage: 164.8+ MB


## Агрегация данных по id абонента 

### traffic

In [20]:
traffic_df.head()

Unnamed: 0,client_id,day,call_in_num,call_out_num,call_in_dur,call_out_dur,sms_out_num,gprs
0,09777552-cc75-4f03-9438-11c3a7480f97,7,0.0,3.0,0.0,418.0,0.0,0.0
1,09777552-cc75-4f03-9438-11c3a7480f97,1,0.0,1.0,0.0,1558.0,0.0,0.0
2,09777552-cc75-4f03-9438-11c3a7480f97,13,0.0,1.0,0.0,52.0,0.0,0.0
3,09777552-cc75-4f03-9438-11c3a7480f97,11,0.0,1.0,0.0,84.0,1.0,0.0
4,09777552-cc75-4f03-9438-11c3a7480f97,3,0.0,2.0,0.0,211.0,4.0,0.0


Возможные новые признаки трафика по абоненту:
- количество дней активности трафика
- общее количество исх./вх. звонков
- общая продолжительность исх./вх. звонков
- общее количество SMS
- сумма gprs


- средняя продолжительность исх./вх. звонков
- общее количество звонков
- общая продолжительность звонков
- среднее количество звонков за активный день


- ? мксимальный день активности трафика

In [21]:
traffic_df['max_day'] = traffic_df['day']

# группируем данные
traffic_agg = traffic_df.groupby('client_id', as_index=False).aggregate({
    # количество дней активности трафика
    'day': 'count',
    # общее количество вх. звонков
    'call_in_num': 'sum',
    # общее количество исх. звонков
    'call_out_num': 'sum',
    # общая продолжительность вх. звонков
    'call_in_dur': 'sum',
    # общая продолжительность исх. звонков
    'call_out_dur': 'sum',
    # общее количество SMS
    'sms_out_num': 'sum',
    # сумма gprs
    'gprs': 'sum',
    # последний номер дня активности трафика за месяц
    'max_day': 'max'}).rename(columns={
                                    'day':'quantity_days_traffic', 
                                    'call_in_num': 'all_call_in_num',
                                    'call_out_num': 'all_call_out_num',
                                    'call_in_dur': 'all_call_in_dur',
                                    'call_out_dur': 'all_call_out_dur',
                                    'sms_out_num': 'all_out_sms',
                                    'gprs': 'all_gprs',
                                    'max_day': 'last_day_traffic'})

# средняя продолжительность вх. звонков
traffic_agg['mean_call_in_dur'] = traffic_agg['all_call_in_dur'] / traffic_agg['all_call_in_num']
# средняя продолжительность исх. звонков
traffic_agg['mean_call_out_dur'] = traffic_agg['all_call_out_dur'] / traffic_agg['all_call_out_num']
# общее количество звонков
traffic_agg['all_calls_num'] = traffic_agg['all_call_in_num'] + traffic_agg['all_call_out_num']
# общая продолжительность звонков
traffic_agg['all_calls_dur'] = traffic_agg['all_call_in_dur'] + traffic_agg['all_call_out_dur']
# корректированный последний день активности трафика
traffic_agg['last_day_traffic'] = traffic_agg['last_day_traffic'] + 1

# избавляемся от пропусков из-за деления на 0
traffic_agg = traffic_agg.fillna(0)

del traffic_df['max_day']

In [22]:
traffic_agg.head()

Unnamed: 0,client_id,quantity_days_traffic,all_call_in_num,all_call_out_num,all_call_in_dur,all_call_out_dur,all_out_sms,all_gprs,last_day_traffic,mean_call_in_dur,mean_call_out_dur,all_calls_num,all_calls_dur
0,000015c6-94a3-4989-861a-ebd71c848c68,29,102.0,250.0,16542.0,44359.0,13.0,0.0,30,162.176471,177.436,352.0,60901.0
1,00007210-4b79-4845-bdcc-b3621e65e2bf,1,0.0,0.0,0.0,0.0,4.0,0.0,1,0.0,0.0,0.0,0.0
2,0000a503-c107-4578-9410-be301f142013,6,7.0,34.0,586.0,1131.0,4.0,0.0,17,83.714286,33.264706,41.0,1717.0
3,0000d08d-2a6f-4f84-b660-298678c422f7,1,0.0,0.0,0.0,0.0,0.0,14.20827,1,0.0,0.0,0.0,0.0
4,0000e8f2-3741-4196-8d7f-c19ff7a10453,30,192.0,785.0,14098.0,40953.0,19.0,0.0,30,73.427083,52.169427,977.0,55051.0


In [23]:
# Количество уникальных абонентов 
len(traffic_agg[['client_id']].stack().value_counts()) 

240000

### recharge

In [24]:
recharge_df.head()

Unnamed: 0,client_id,day,recharges
0,6cac2aeb-1816-498f-ad36-c1f27ee3044a,17.0,333.704
1,f1929358-fd06-45cf-a1e5-f5546ad3997a,17.0,634.0376
2,f1929358-fd06-45cf-a1e5-f5546ad3997a,0.0,734.1488
3,f1929358-fd06-45cf-a1e5-f5546ad3997a,18.0,333.704
4,db6100d4-5e2a-47f5-9e9c-864b875e4984,0.0,6.67408


Возможные новые признаки пополнений по абоненту:
- количество пополнений
- общая сумма пополнений


- средняя сумма пополнений 
- максимальный номер дня пополнения в месяце

In [25]:
recharge_df['max_day'] = recharge_df['day']
# группируем данные
recharge_agg = recharge_df.groupby('client_id', as_index=False).aggregate({
    # количество пополнений
    'day': 'count',
    # общая сумма пополнений
    'recharges': 'sum',
    # поледний день пополнения
    'max_day': 'max'}).rename(columns={
                                    'day':'quantity_recharge', 
                                    'recharges': 'recharges_sum',
                                    'max_day': 'last_day_recharge'})

# средняя сумма пополнений
recharge_agg['mean_recharges_sum'] = recharge_agg['recharges_sum'] / recharge_agg['quantity_recharge']
# корректированный последний день активности трафика
recharge_agg['last_day_recharge'] = recharge_agg['last_day_recharge'] + 1

# избавляемся от пропусков из-за деления на 0
recharge_agg = recharge_agg.fillna(0)

del recharge_df['max_day']

In [26]:
recharge_agg.head()

Unnamed: 0,client_id,quantity_recharge,recharges_sum,last_day_recharge,mean_recharges_sum
0,000015c6-94a3-4989-861a-ebd71c848c68,1,2002.224,21.0,2002.224
1,0000a503-c107-4578-9410-be301f142013,2,2549.632042,8.0,1274.816021
2,000184d3-beee-432b-aede-453203a223b8,1,7341.488,3.0,7341.488
3,0001c87e-ddf6-43da-b3be-1266ec9faeba,13,9150.897829,30.0,703.915218
4,000266c3-8081-4b84-8f80-1e44afb6d935,9,1915.46096,30.0,212.828996


In [27]:
# Количество уникальных абонентов 
len(recharge_agg[['client_id']].stack().value_counts()) 

153321

In [28]:
len(recharge_agg[['client_id']].stack().value_counts()) - 240000

-86679

86679 абонентов в первый месяц не пополняли баланс

### balance

In [29]:
balance_df.head()

Unnamed: 0,client_id,day,balance
0,c602c1cf-71d4-413e-a8fe-c9c76abbb14f,0.0,3225.805111
1,c602c1cf-71d4-413e-a8fe-c9c76abbb14f,1.0,3114.570222
2,c602c1cf-71d4-413e-a8fe-c9c76abbb14f,2.0,3003.335333
3,c602c1cf-71d4-413e-a8fe-c9c76abbb14f,3.0,2892.100443
4,c602c1cf-71d4-413e-a8fe-c9c76abbb14f,4.0,2780.865554


Возможные новые признаки баланса по абоненту:
- начальный баланс
- конечный быланс
- разница между начальным и конечным балансом
- средний баланс в первый месяц
- ??? отрицательность конечного баланса


- ??? дисперсия баланса
- ??? размах 
- ??? медиана


In [30]:
# начальный баланс
balance_start = balance_df[balance_df['day'] == 0.0]
# конечный баланс
balance_final = balance_df[balance_df['day'] == 29.0]
# средний баданс 
balance_mean = balance_df.groupby('client_id', as_index=False).aggregate({
    'balance': 'mean'}).rename(columns={'balance': 'mean_balance'})

In [31]:
# подсоединяю началтный баланс
balance_mean = pd.merge(balance_mean,
                        balance_start,
                        on=['client_id'])
del balance_mean['day']
balance_mean = balance_mean.rename(columns={'balance': 'start_balance'})

# подсоединяю конечный баланс
balance_mean = pd.merge(balance_mean,
                        balance_final,
                        on=['client_id'])
del balance_mean['day']
balance_mean = balance_mean.rename(columns={'balance': 'final_balance'})

balance_agg = balance_mean

# разница начального и конечного баланса
balance_agg['difference_balance'] = balance_agg['final_balance'] - balance_agg['start_balance']

balance_agg.head()

Unnamed: 0,client_id,mean_balance,start_balance,final_balance,difference_balance
0,000015c6-94a3-4989-861a-ebd71c848c68,948.164299,1268.0752,1041.15648,-226.91872
1,00007210-4b79-4845-bdcc-b3621e65e2bf,-20.044487,96.77416,-37.374848,-134.149008
2,0000a503-c107-4578-9410-be301f142013,2514.250519,3225.849827,1176.106378,-2049.74345
3,0000d08d-2a6f-4f84-b660-298678c422f7,-1668.52,-1668.52,-1668.52,0.0
4,0000e8f2-3741-4196-8d7f-c19ff7a10453,282.091115,310.34472,210.23352,-100.1112


In [32]:
# Количество уникальных абонентов 
len(balance_agg[['client_id']].stack().value_counts()) 

240000

## Объединение данных

In [33]:
df = general_df.copy()
df = df.merge(traffic_agg, how='left', on='client_id')
df = df.merge(recharge_agg, how='left', on='client_id')
df = df.merge(balance_agg, how='left', on='client_id')

In [34]:
df.head()

Unnamed: 0,client_id,date_reg,market,group,archetype,hardware,channel,dealer,phone,type_t,...,all_calls_num,all_calls_dur,quantity_recharge,recharges_sum,last_day_recharge,mean_recharges_sum,mean_balance,start_balance,final_balance,difference_balance
0,09777552-cc75-4f03-9438-11c3a7480f97,2016-05-10,R25,PP1_9,PP2_0,PPT1,C1_22,C2_22,D2254,DT1_6,...,19.0,6506.0,1.0,1001.112,1.0,1001.112,1202.535734,2269.1872,1.334816,-2267.852384
1,5fbe2ea9-0c80-4164-b703-416b85c23d8c,2016-04-19,R25,PP1_9,PP2_0,PPT1,C1_6,C2_0,NAN,NAN,...,546.0,27599.0,2.0,5526.13824,13.0,2763.06912,1006.98519,2378.642112,447.830768,-1930.811344
2,8cbd48c0-8f90-4f08-bef7-8f09d9924e55,2016-05-19,R25,PP1_9,PP2_0,PPT1,C1_6,C2_0,D2594,DT1_6,...,476.0,141308.0,9.0,7014.45808,30.0,779.384231,291.946506,1134.5936,1355.505648,220.912048
3,ffda8d15-cd38-45c3-84ba-29a9811ed79e,2016-05-28,R25,PP1_9,PP2_0,PPT1,C1_22,C2_11,D3131,DT1_6,...,31.0,2931.0,,,,,406.673941,1268.0752,-40.04448,-1308.11968
4,1f9618ad-a980-4110-af32-9741e491a728,2016-05-23,R4,PP1_9,PP2_0,PPT1,C1_28,C2_21,D454,DT1_6,...,9.0,213.0,1.0,6674.08,1.0,6674.08,0.0,0.0,0.0,0.0


In [35]:
df.columns

Index(['client_id', 'date_reg', 'market', 'group', 'archetype', 'hardware',
       'channel', 'dealer', 'phone', 'type_t', 'device', 'churn',
       'quantity_days_traffic', 'all_call_in_num', 'all_call_out_num',
       'all_call_in_dur', 'all_call_out_dur', 'all_out_sms', 'all_gprs',
       'last_day_traffic', 'mean_call_in_dur', 'mean_call_out_dur',
       'all_calls_num', 'all_calls_dur', 'quantity_recharge', 'recharges_sum',
       'last_day_recharge', 'mean_recharges_sum', 'mean_balance',
       'start_balance', 'final_balance', 'difference_balance'],
      dtype='object')

In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 240000 entries, 0 to 239999
Data columns (total 32 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   client_id              240000 non-null  object        
 1   date_reg               240000 non-null  datetime64[ns]
 2   market                 240000 non-null  object        
 3   group                  240000 non-null  object        
 4   archetype              240000 non-null  object        
 5   hardware               240000 non-null  object        
 6   channel                240000 non-null  object        
 7   dealer                 240000 non-null  object        
 8   phone                  240000 non-null  object        
 9   type_t                 240000 non-null  object        
 10  device                 240000 non-null  object        
 11  churn                  240000 non-null  int64         
 12  quantity_days_traffic  240000 non-null  int6

In [38]:
'''
признаки с пропусками
quantity_recharge - количество пополнений            
recharges_sum - сумма пополнени                
last_day_recharge - крайний день пополнения в месяце            
mean_recharges_sum - средняя сумма пополнений

замена пропущенных значений признаков на 0, так как эти пропуски по клиентам, которые е совершали пополнений.
'''
df = df.fillna(0)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 240000 entries, 0 to 239999
Data columns (total 32 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   client_id              240000 non-null  object        
 1   date_reg               240000 non-null  datetime64[ns]
 2   market                 240000 non-null  object        
 3   group                  240000 non-null  object        
 4   archetype              240000 non-null  object        
 5   hardware               240000 non-null  object        
 6   channel                240000 non-null  object        
 7   dealer                 240000 non-null  object        
 8   phone                  240000 non-null  object        
 9   type_t                 240000 non-null  object        
 10  device                 240000 non-null  object        
 11  churn                  240000 non-null  int64         
 12  quantity_days_traffic  240000 non-null  int6

В итоговом df 30 признаков. 
- 10 категориальных 
- 20 числовых

In [39]:
# % оттока пользователей
df['churn'].mean()

0.4093833333333333

# визуализация
# ...

In [40]:
df.head()

Unnamed: 0,client_id,date_reg,market,group,archetype,hardware,channel,dealer,phone,type_t,...,all_calls_num,all_calls_dur,quantity_recharge,recharges_sum,last_day_recharge,mean_recharges_sum,mean_balance,start_balance,final_balance,difference_balance
0,09777552-cc75-4f03-9438-11c3a7480f97,2016-05-10,R25,PP1_9,PP2_0,PPT1,C1_22,C2_22,D2254,DT1_6,...,19.0,6506.0,1.0,1001.112,1.0,1001.112,1202.535734,2269.1872,1.334816,-2267.852384
1,5fbe2ea9-0c80-4164-b703-416b85c23d8c,2016-04-19,R25,PP1_9,PP2_0,PPT1,C1_6,C2_0,NAN,NAN,...,546.0,27599.0,2.0,5526.13824,13.0,2763.06912,1006.98519,2378.642112,447.830768,-1930.811344
2,8cbd48c0-8f90-4f08-bef7-8f09d9924e55,2016-05-19,R25,PP1_9,PP2_0,PPT1,C1_6,C2_0,D2594,DT1_6,...,476.0,141308.0,9.0,7014.45808,30.0,779.384231,291.946506,1134.5936,1355.505648,220.912048
3,ffda8d15-cd38-45c3-84ba-29a9811ed79e,2016-05-28,R25,PP1_9,PP2_0,PPT1,C1_22,C2_11,D3131,DT1_6,...,31.0,2931.0,0.0,0.0,0.0,0.0,406.673941,1268.0752,-40.04448,-1308.11968
4,1f9618ad-a980-4110-af32-9741e491a728,2016-05-23,R4,PP1_9,PP2_0,PPT1,C1_28,C2_21,D454,DT1_6,...,9.0,213.0,1.0,6674.08,1.0,6674.08,0.0,0.0,0.0,0.0


In [43]:
df.to_csv('DataFrame.csv', sep=',', index=False)