In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from dateutil.relativedelta import relativedelta

In [2]:
# Data load
tariff_plans_df = pd.read_csv('Tariff_plans_change.csv')
charges_df = pd.read_csv('Charges.csv')
suspensions_df = pd.read_csv('Suspended.csv')

# String data to datetime + cating some numerical features to categorical
tariff_plans_df['START_DTTM'] = pd.to_datetime(tariff_plans_df['START_DTTM'])
tariff_plans_df['END_DTTM'] = pd.to_datetime(tariff_plans_df['END_DTTM'], errors='coerce')
tariff_plans_df['TARIFF_PLAN_ID'] = tariff_plans_df['TARIFF_PLAN_ID'].astype('category')
charges_df['BILL_MONTH'] = pd.to_datetime(charges_df['BILL_MONTH'])
suspensions_df['START_DT'] = pd.to_datetime(suspensions_df['START_DT'])
suspensions_df['END_DT'] = pd.to_datetime(suspensions_df['END_DT'], errors='coerce')

In [3]:
tariff_plans_df.head(10)

Unnamed: 0,SUBSCRIBER_ID,TARIFF_PLAN_ID,START_DTTM,END_DTTM
0,1,2,2013-11-22 21:10:08,2017-03-11 14:24:17
1,1,3,2017-03-11 14:24:18,NaT
2,2,1,2016-02-05 16:36:34,2017-06-29 12:39:58
3,2,5,2017-06-29 12:39:59,NaT
4,3,1,2015-05-06 19:17:46,2017-04-14 12:00:59
5,3,5,2017-04-14 12:01:00,NaT
6,4,3,2016-01-27 23:03:51,2017-01-09 19:27:50
7,4,5,2017-01-09 19:27:51,NaT
8,5,1,2016-02-06 08:07:15,2017-05-08 12:39:59
9,5,5,2017-05-08 12:40:00,NaT


In [4]:
tariff_plans_df['SUBSCRIBER_ID'].nunique()

5989

In [5]:
charges_df.head(10)

Unnamed: 0,SUBSCRIBER_ID,BILL_MONTH,CHARGES
0,4741,2017-03-01,4.572
1,1754,2017-05-01,4.894
2,381,2017-01-01,6.67
3,5832,2016-12-01,4.299
4,5277,2017-03-01,1.61
5,4512,2017-02-01,3.22
6,1688,2017-05-01,7.391
7,3204,2017-10-01,2.456
8,1277,2016-08-01,11.242
9,5424,2017-04-01,26.986


In [6]:
suspensions_df.head(10)

Unnamed: 0,SUBSCRIBER_ID,START_DT,END_DT,STATUS
0,5195,2017-12-27,2017-12-27,Suspended
1,5198,2017-04-18,2017-04-18,Suspended
2,381,2017-04-18,2017-04-20,Suspended
3,3500,2017-12-21,NaT,Suspended
4,3302,2016-08-23,2016-09-07,Suspended
5,4512,2017-04-14,2017-04-28,Suspended
6,1738,2016-08-11,2017-01-10,Suspended
7,2294,2017-01-27,2017-01-27,Suspended
8,1277,2017-01-27,2017-03-21,Suspended
9,5424,2016-11-18,2016-11-29,Suspended


In [7]:
suspensions_df['SUBSCRIBER_ID'].nunique()

1310

## Tariff migration

In [8]:
# Creating dataset for all of migrations

tariff_migrations = tariff_plans_df.copy()
tariff_migrations['MIGRATION_TARIFF'] = tariff_migrations.groupby('SUBSCRIBER_ID')['TARIFF_PLAN_ID'].shift(-1)
migrations = tariff_migrations.dropna(subset=['MIGRATION_TARIFF']).reset_index()
migration_flows = migrations.groupby(['TARIFF_PLAN_ID', 'MIGRATION_TARIFF']).size().reset_index(name='COUNT')
migration_flows['MIGRATION_TARIFF'] = migration_flows['MIGRATION_TARIFF'].astype('category')

# Drop rows with same previous and migration tariffs
drop_mask = (migration_flows['TARIFF_PLAN_ID'] == migration_flows['MIGRATION_TARIFF'])
migration_flows.drop(migration_flows[drop_mask].index, inplace=True)
migration_flows

  migration_flows = migrations.groupby(['TARIFF_PLAN_ID', 'MIGRATION_TARIFF']).size().reset_index(name='COUNT')


Unnamed: 0,TARIFF_PLAN_ID,MIGRATION_TARIFF,COUNT
1,1,2,5
2,1,3,55
3,1,4,378
4,1,5,1613
5,2,1,13
7,2,3,20
8,2,4,158
9,2,5,417
10,3,1,28
11,3,2,12


In [9]:
# Creating Sankey diagram for migration flows

labels = (pd.concat([migration_flows['TARIFF_PLAN_ID'], migration_flows['MIGRATION_TARIFF']])
          .unique())
unique_labels_amount = len(labels)
labels = list(map(lambda l: f'Tariff {l}', labels))
sankey_labels = labels * 2

source = migration_flows['TARIFF_PLAN_ID'].apply(lambda x: sankey_labels.index(f'Tariff {x}')).to_list()
target = migration_flows['MIGRATION_TARIFF'].apply(lambda x: sankey_labels.index(f'Tariff {x}', unique_labels_amount)).to_list()
value = migration_flows['COUNT']

fig = go.Figure()
fig.add_trace(
    go.Sankey(
        valuesuffix=' Migrations',
        arrangement='snap',
        node=dict(
            label=sankey_labels,
            pad=20,
            thickness=30,  
            
        ),
        link=dict(
            source=source,
            target=target,
            value=value
        )
    )
)
fig.update_layout(title='Tariff-to-Tariff migration flows diagram. Left side is emigration tariff. Right side is immigration tariff',
                  height=1080, font=dict(size=20))
fig.show()

Most popular emigration tariff: Tariff 3 (~2.78k migrations)

Least popular emigration tariff: Tariff 5 (209 migrations)

Most popular imigration tariff: Tariff 5 (~4.93k migrations)

Least popular imigration tariff: Tariff 2 (41 migrations)

## Average month charge change

In [10]:
tariff_plans_df[tariff_plans_df['SUBSCRIBER_ID'] == 1]

Unnamed: 0,SUBSCRIBER_ID,TARIFF_PLAN_ID,START_DTTM,END_DTTM
0,1,2,2013-11-22 21:10:08,2017-03-11 14:24:17
1,1,3,2017-03-11 14:24:18,NaT


In [11]:
tariff_plans_df.groupby('SUBSCRIBER_ID').size().value_counts()

2     5718
3      233
4       20
6       12
5        3
8        1
16       1
18       1
Name: count, dtype: int64

In [12]:
charges_df[charges_df['SUBSCRIBER_ID'] == 1].sort_values(by='BILL_MONTH')

Unnamed: 0,SUBSCRIBER_ID,BILL_MONTH,CHARGES
55587,1,2016-06-01,21.137
56269,1,2016-07-01,32.545
56145,1,2016-08-01,30.042
55401,1,2016-09-01,24.964
55897,1,2016-10-01,27.628
55959,1,2016-11-01,22.374
55215,1,2016-12-01,21.132
56331,1,2017-01-01,20.268
55649,1,2017-02-01,27.858
55835,1,2017-03-01,27.66


In [13]:
charges_df.sort_values(by='BILL_MONTH').tail(5)

Unnamed: 0,SUBSCRIBER_ID,BILL_MONTH,CHARGES
58664,2908,2017-12-01,5.124
22109,4749,2017-12-01,9.832
97724,5701,2017-12-01,0.0
58630,3428,2017-12-01,4.964
56297,2842,2017-12-01,5.224


In [14]:
charges_change_df = tariff_plans_df.copy()
charges_change_df['PREV_PERIOD_CHARGE'] = None
charges_change_df['POST_PERIOD_CHARGE'] = None
charges_change_df['MIGRATION_TARIFF'] = None

In [15]:
charges_change_df = charges_change_df.sort_values(by=['SUBSCRIBER_ID', 'START_DTTM'])

In [None]:
# Creating pre- and post- tariff change 3-month period average monthly charge for each of subscribers
for ind, row in charges_change_df.iterrows():
    sub_id, end_date = row['SUBSCRIBER_ID'], row['END_DTTM']
    if end_date is pd.NaT:
        continue

    post_period_start = end_date.replace(day=1).normalize() + relativedelta(months=1)
    post_period_end = end_date.replace(day=1).normalize() + relativedelta(months=3)    
    post_period_mask = (charges_df['SUBSCRIBER_ID'] == sub_id) & (charges_df['BILL_MONTH'] >= post_period_start) & (charges_df['BILL_MONTH'] <= post_period_end)
    post_charge = charges_df[post_period_mask]['CHARGES'].mean()

    pre_period_end = end_date.replace(day=1).normalize() - relativedelta(months=1)
    pre_period_start = end_date.replace(day=1).normalize() - relativedelta(months=3)
    pre_period_mask = (charges_df['SUBSCRIBER_ID'] == sub_id) & (charges_df['BILL_MONTH'] >= pre_period_start) & (charges_df['BILL_MONTH'] <= pre_period_end)
    pre_charge = charges_df[pre_period_mask]['CHARGES'].mean()
    
    charges_change_df.loc[ind, 'PREV_PERIOD_CHARGE'] = pre_charge
    charges_change_df.loc[ind, 'POST_PERIOD_CHARGE'] = post_charge

    
    migration_tariff_mask = (charges_change_df['SUBSCRIBER_ID'] == sub_id) & (charges_change_df['START_DTTM'] >= end_date)
    migration_tariff = charges_change_df[migration_tariff_mask]
    if len(migration_tariff) == 0:            
        charges_change_df.loc[ind, 'MIGRATION_TARIFF'] = -1
    else:
        charges_change_df.loc[ind, 'MIGRATION_TARIFF'] = migration_tariff.iloc[0]['TARIFF_PLAN_ID']    

In [None]:
# Drop useless information from created average monthly charge dataset
charges_change_df.dropna(subset=['END_DTTM'], inplace=True)
tariff_drop_ids = charges_change_df[charges_change_df['MIGRATION_TARIFF'] == -1].index
charges_change_df.drop(tariff_drop_ids, axis=0, inplace=True)
charges_change_df['MIGRATION_TARIFF'] = charges_change_df['MIGRATION_TARIFF'].astype('int16').astype('category')
charges_change_df['PREV_PERIOD_CHARGE'] = charges_change_df['PREV_PERIOD_CHARGE'].astype('float64')
charges_change_df['POST_PERIOD_CHARGE'] = charges_change_df['POST_PERIOD_CHARGE'].astype('float64')

In [18]:
charges_change_df.groupby('SUBSCRIBER_ID').size().value_counts()

1     5718
2      233
3       20
4       15
6        1
14       1
16       1
Name: count, dtype: int64

In [19]:
charges_change_df.head(10)

Unnamed: 0,SUBSCRIBER_ID,TARIFF_PLAN_ID,START_DTTM,END_DTTM,PREV_PERIOD_CHARGE,POST_PERIOD_CHARGE,MIGRATION_TARIFF
0,1,2,2013-11-22 21:10:08,2017-03-11 14:24:17,23.086,25.040333,3
2,2,1,2016-02-05 16:36:34,2017-06-29 12:39:58,5.591667,5.132,5
4,3,1,2015-05-06 19:17:46,2017-04-14 12:00:59,5.193,4.997333,5
6,4,3,2016-01-27 23:03:51,2017-01-09 19:27:50,6.67,4.812,5
8,5,1,2016-02-06 08:07:15,2017-05-08 12:39:59,3.720333,4.788,5
10,6,1,2010-05-20 13:54:35,2017-01-17 14:12:51,7.49,5.481667,5
12,7,3,2015-01-15 15:06:52,2017-05-01 17:45:46,13.984333,7.517333,5
14,8,3,2016-01-15 18:43:18,2017-02-01 14:22:22,5.263,1.559333,4
16,9,3,2010-05-26 11:42:19,2017-02-11 11:19:01,2.075333,2.099333,4
18,10,1,2015-09-08 17:19:55,2017-04-28 18:07:22,5.516667,5.189333,5


In [None]:
# Creating pre- and post- tariff change 3-month period average agregating by migration flows
charges_change_df_agg = charges_change_df.groupby(['TARIFF_PLAN_ID', 'MIGRATION_TARIFF']).agg(
    PREV_PERIOD_MEAN=('PREV_PERIOD_CHARGE', 'mean'),
    POST_PERIOD_MEAN=('POST_PERIOD_CHARGE', 'mean')
)
charges_change_df_agg.reset_index(inplace=True)
same_tariff_drop_mask = charges_change_df_agg['TARIFF_PLAN_ID'] == charges_change_df_agg['MIGRATION_TARIFF']
same_tariff_drop_ids = charges_change_df_agg[same_tariff_drop_mask].index
charges_change_df_agg.drop(same_tariff_drop_ids, axis=0, inplace = True)
charges_change_df_agg['DELTA_CHARGE'] = charges_change_df_agg['POST_PERIOD_MEAN'] - charges_change_df_agg['PREV_PERIOD_MEAN']
charges_change_df_agg





Unnamed: 0,TARIFF_PLAN_ID,MIGRATION_TARIFF,PREV_PERIOD_MEAN,POST_PERIOD_MEAN,DELTA_CHARGE
1,1,2,5.128467,3.9892,-1.139267
2,1,3,11.287133,10.026624,-1.260509
3,1,4,6.443156,4.603508,-1.839649
4,1,5,9.02107,6.964071,-2.056999
5,2,1,7.857,6.910051,-0.946949
7,2,3,12.844017,9.495733,-3.348283
8,2,4,6.013911,4.248722,-1.765189
9,2,5,8.233916,6.599886,-1.63403
10,3,1,8.736103,7.831724,-0.904379
11,3,2,9.583917,12.377389,2.793472


In [None]:
# Bar plot for created average monthly charge change dataset
fig = px.bar(
    charges_change_df_agg,
    x='DELTA_CHARGE',
    y=charges_change_df_agg.apply(lambda x: f"Tariff {int(x['TARIFF_PLAN_ID'])} → Tariff {int(x['MIGRATION_TARIFF'])}", axis=1),
    orientation='h',
    color='DELTA_CHARGE',
    color_continuous_scale=['red', 'yellow', 'green'],
    title='Change in Average Charges After Tariff Migration',
    labels={'DELTA_CHARGE': 'Delta of Average Charges After Migration', 'y': 'Migration Direction'}
)
fig.update_layout(
    xaxis_title='Delta of Average Charges After Migration', 
    yaxis_title='Migration Direction',
    height=1080,
    font=dict(size=20)
)
fig.show()

In [22]:
print(f'Average charge change by all migration flows: {charges_change_df_agg["DELTA_CHARGE"].mean()}')

Average charge change by all migration flows: -0.8765267611645594


## Suspensions analysis

In [23]:
tariff_plans_df[tariff_plans_df['SUBSCRIBER_ID'] == 5]

Unnamed: 0,SUBSCRIBER_ID,TARIFF_PLAN_ID,START_DTTM,END_DTTM
8,5,1,2016-02-06 08:07:15,2017-05-08 12:39:59
9,5,5,2017-05-08 12:40:00,NaT


In [24]:
suspensions_df.sort_values(by='SUBSCRIBER_ID').head(10)

Unnamed: 0,SUBSCRIBER_ID,START_DT,END_DT,STATUS
2768,5,2017-06-16,2017-06-29,Suspended
2718,5,2017-07-31,2017-08-14,Suspended
2961,5,2016-07-25,2016-08-14,Suspended
2817,5,2017-03-23,2017-04-15,Suspended
2866,5,2016-11-22,2016-11-28,Suspended
2914,5,2016-09-19,2016-09-28,Suspended
1771,8,2017-02-23,2017-03-03,Suspended
1832,8,2016-09-27,2016-12-05,Suspended
1893,8,2016-07-25,2016-08-04,Suspended
1954,8,2017-11-21,2017-11-22,Suspended


In [25]:
suspensions_df[suspensions_df['SUBSCRIBER_ID'] == 5].sort_values(by='START_DT')

Unnamed: 0,SUBSCRIBER_ID,START_DT,END_DT,STATUS
2961,5,2016-07-25,2016-08-14,Suspended
2914,5,2016-09-19,2016-09-28,Suspended
2866,5,2016-11-22,2016-11-28,Suspended
2817,5,2017-03-23,2017-04-15,Suspended
2768,5,2017-06-16,2017-06-29,Suspended
2718,5,2017-07-31,2017-08-14,Suspended


In [None]:
# Creating basis for future statistics of suspensions change rate
# It is not needed to count subscribers that have never been in block
suspensions_change_df = tariff_plans_df[tariff_plans_df['SUBSCRIBER_ID'].isin(suspensions_df['SUBSCRIBER_ID'])].copy()
suspensions_change_df['PREV_PERIOD_SUSPENSIONS'] = None
suspensions_change_df['POST_PERIOD_SUSPENSIONS'] = None
suspensions_change_df['MIGRATION_TARIFF'] = None
suspensions_change_df = suspensions_change_df.sort_values(by=['SUBSCRIBER_ID', 'START_DTTM'])

In [None]:
# Creating pre- and post- tariff migration total amount of suspensions of 3-month period for each of subscribers
for ind, row in suspensions_change_df.iterrows():
    sub_id, end_date = row['SUBSCRIBER_ID'], row['END_DTTM']
    if end_date is pd.NaT:
        continue

    post_period_start = end_date.replace(day=1).normalize() + relativedelta(months=1)
    post_period_end = end_date.replace(day=1).normalize() + relativedelta(months=3)    
    post_period_mask = ((suspensions_df['SUBSCRIBER_ID'] == sub_id) & 
                        (suspensions_df['START_DT'] >= post_period_start) & 
                        (suspensions_df['START_DT'] <= post_period_end))
    post_charge = len(suspensions_df[post_period_mask])

    pre_period_end = end_date.replace(day=1).normalize() - relativedelta(months=1)
    pre_period_start = end_date.replace(day=1).normalize() - relativedelta(months=3)
    pre_period_mask = ((suspensions_df['SUBSCRIBER_ID'] == sub_id) & 
                       (suspensions_df['START_DT'] >= pre_period_start) & 
                       (suspensions_df['START_DT'] <= pre_period_end))
    pre_charge = len(suspensions_df[pre_period_mask])
    
    suspensions_change_df.loc[ind, 'PREV_PERIOD_SUSPENSIONS'] = pre_charge
    suspensions_change_df.loc[ind, 'POST_PERIOD_SUSPENSIONS'] = post_charge

    
    migration_tariff_mask = (suspensions_change_df['SUBSCRIBER_ID'] == sub_id) & (suspensions_change_df['START_DTTM'] >= end_date)
    migration_tariff = suspensions_change_df[migration_tariff_mask]
    if len(migration_tariff) == 0:            
        suspensions_change_df.loc[ind, 'MIGRATION_TARIFF'] = -1
    else:
        suspensions_change_df.loc[ind, 'MIGRATION_TARIFF'] = migration_tariff.iloc[0]['TARIFF_PLAN_ID']   

In [28]:
suspensions_change_df.head()

Unnamed: 0,SUBSCRIBER_ID,TARIFF_PLAN_ID,START_DTTM,END_DTTM,PREV_PERIOD_SUSPENSIONS,POST_PERIOD_SUSPENSIONS,MIGRATION_TARIFF
8,5,1,2016-02-06 08:07:15,2017-05-08 12:39:59,1.0,2.0,5.0
9,5,5,2017-05-08 12:40:00,NaT,,,
14,8,3,2016-01-15 18:43:18,2017-02-01 14:22:22,0.0,1.0,4.0
15,8,4,2017-02-01 14:22:23,NaT,,,
16,9,3,2010-05-26 11:42:19,2017-02-11 11:19:01,1.0,1.0,4.0


In [29]:
suspensions_change_df.dropna(subset=['END_DTTM'], inplace=True)
tariff_drop_ids = suspensions_change_df[suspensions_change_df['MIGRATION_TARIFF'] == -1].index
suspensions_change_df.drop(tariff_drop_ids, axis=0, inplace=True)
suspensions_change_df['MIGRATION_TARIFF'] = suspensions_change_df['MIGRATION_TARIFF'].astype('int16').astype('category')
suspensions_change_df['PREV_PERIOD_SUSPENSIONS'] = suspensions_change_df['PREV_PERIOD_SUSPENSIONS'].astype('int16')
suspensions_change_df['POST_PERIOD_SUSPENSIONS'] = suspensions_change_df['POST_PERIOD_SUSPENSIONS'].astype('int16')

In [30]:
suspensions_change_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1371 entries, 8 to 12340
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   SUBSCRIBER_ID            1371 non-null   int64         
 1   TARIFF_PLAN_ID           1371 non-null   category      
 2   START_DTTM               1371 non-null   datetime64[ns]
 3   END_DTTM                 1371 non-null   datetime64[ns]
 4   PREV_PERIOD_SUSPENSIONS  1371 non-null   int16         
 5   POST_PERIOD_SUSPENSIONS  1371 non-null   int16         
 6   MIGRATION_TARIFF         1371 non-null   category      
dtypes: category(2), datetime64[ns](2), int16(2), int64(1)
memory usage: 51.2 KB


In [31]:
suspensions_change_df.head(10)

Unnamed: 0,SUBSCRIBER_ID,TARIFF_PLAN_ID,START_DTTM,END_DTTM,PREV_PERIOD_SUSPENSIONS,POST_PERIOD_SUSPENSIONS,MIGRATION_TARIFF
8,5,1,2016-02-06 08:07:15,2017-05-08 12:39:59,1,2,5
14,8,3,2016-01-15 18:43:18,2017-02-01 14:22:22,0,1,4
16,9,3,2010-05-26 11:42:19,2017-02-11 11:19:01,1,1,4
26,14,3,2015-07-22 16:35:46,2017-04-04 22:56:51,0,0,5
34,18,3,2010-06-01 14:48:33,2017-04-08 07:54:56,0,0,5
38,20,2,2010-06-07 15:47:17,2017-02-23 10:50:41,0,0,4
51,26,3,2015-04-16 12:19:17,2017-03-09 12:11:04,0,0,5
57,29,1,2010-06-11 17:01:27,2017-05-07 17:05:19,1,0,5
61,31,5,2016-12-22 23:24:09,2017-01-12 14:20:14,0,0,4
63,32,3,2010-06-12 18:37:07,2017-06-25 19:11:30,0,0,5


In [None]:
# Creating pre- and post- tariff migration total amount of suspensions of 3-month period aggragating by migration flows
suspensions_change_df_agg = suspensions_change_df.groupby(['TARIFF_PLAN_ID', 'MIGRATION_TARIFF']).agg(
    PREV_PERIOD_SUM=('PREV_PERIOD_SUSPENSIONS', 'sum'),
    POST_PERIOD_SUM=('POST_PERIOD_SUSPENSIONS', 'sum')
)
suspensions_change_df_agg.reset_index(inplace=True)
same_tariff_drop_mask = suspensions_change_df_agg['TARIFF_PLAN_ID'] == suspensions_change_df_agg['MIGRATION_TARIFF']
same_tariff_drop_ids = suspensions_change_df_agg[same_tariff_drop_mask].index
suspensions_change_df_agg.drop(same_tariff_drop_ids, axis=0, inplace = True)
suspensions_change_df_agg['DELTA_SUSPENSIONS'] = suspensions_change_df_agg['POST_PERIOD_SUM'] - suspensions_change_df_agg['PREV_PERIOD_SUM']
suspensions_change_df_agg





Unnamed: 0,TARIFF_PLAN_ID,MIGRATION_TARIFF,PREV_PERIOD_SUM,POST_PERIOD_SUM,DELTA_SUSPENSIONS
1,1,2,0,0,0
2,1,3,2,1,-1
3,1,4,42,33,-9
4,1,5,111,92,-19
5,2,1,0,3,3
7,2,3,0,1,1
8,2,4,3,24,21
9,2,5,4,19,15
10,3,1,3,1,-2
11,3,2,7,1,-6


In [None]:
# Bar plot for created suspensions change rate dataset
fig = px.bar(
    suspensions_change_df_agg,
    x='DELTA_SUSPENSIONS',
    y=suspensions_change_df_agg.apply(lambda x: f"Tariff {int(x['TARIFF_PLAN_ID'])} → Tariff {int(x['MIGRATION_TARIFF'])}", axis=1),
    orientation='h',
    color='DELTA_SUSPENSIONS',
    color_continuous_scale=['green', 'yellow', 'red'],
    title='Change in Total Suspensions After Tariff Migration (Absolute Numbers)',
    labels={'DELTA_SUSPENSIONS': 'Delta of Total Suspensions After Migration', 'y': 'Migration Direction'}
)
fig.update_layout(
    xaxis_title='Delta of Total Suspensions After Migration', 
    yaxis_title='Migration Direction',
    height=1080,
    font=dict(size=20)
)
fig.show()

In [34]:
print(f'Average suspensions amount change by all migration flows: {suspensions_change_df_agg["DELTA_SUSPENSIONS"].mean()}')

Average suspensions amount change by all migration flows: -3.65
