In [1]:
# Core analysis: Value based pricing
# Function: RMS, market share, HHI
# Purpose: create market shares, HHI, and RMS for markets
# Creator: Alex Deshowitz
# Modified by: Alex Deshowitz
# Customer: LATAM Team
# Date created: 3/7/2019 
# Last updated: 3/12/2019 
# Last update comments: Added logic to group all LATAM cxrs

############################################################

# Data sources required:

# MIDT

#### DISCLAIMER ####

"""THIS MODEL IS BEING DELIVERED TO YOU AS IS, WITHOUT ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, SATISFACTORY QUALITY, FITNESS FOR A PARTICULAR PURPOSE.

In addition, McKinsey does not warrant any results obtained or conclusions drawn from the use of the model, and any opinions or estimates reflected in the model constitute our subjective judgment, with no guarantees made regarding accuracy or completeness.

McKinsey shall not be obligated to maintain, update or correct the model, and McKinsey shall not be liable to you for any damage or loss howsoever caused by the use of the model, including without limitation lost profits or consequential damages, even if you have told us of the possibility of such damages.

The model is being delivered to you subject to the same limitations on use and disclosure as our paper reports; that is, it is given to you for your internal use only within your organization and is not to be disclosed in whole or in part outside of your organization without our prior written consent."""



In [3]:
# import libraries used by functions:

import pandas as pd
import numpy as np
import glob as glob
import os as os
import time as time

In [2]:
# read in the dataset:

path = r'MIDT_data/Combined_data/MIDT_combined_aggregated.csv'

df = pd.read_csv(path, header = 6)


NameError: name 'pd' is not defined

In [33]:
# show head of data so user understands the inputs:

df.head()

Unnamed: 0,flight_date,operating_carrier,marketing_carrier,od_origin,od_destination,od_pax,F_od_pax,J_od_pax,W_od_pax,Y_od_pax
0,2018-01-01,2I,2I,CUZ,LIM,50,0,0,0,50
1,2018-01-01,2I,2I,CUZ,PEM,2,0,0,0,2
2,2018-01-01,2I,2I,HUU,LIM,3,0,0,0,3
3,2018-01-01,2I,2I,IQT,LIM,4,0,0,0,4
4,2018-01-01,2I,2I,IQT,PCL,9,0,0,0,9


In [101]:
# this code assumes that the user is aggregating to the overall date range in the dataset:

def create_rt_market (origin_column, destination_column) : 
    
    """Function for generating the half-alpha market column"""
    markets = np.where(origin_column < destination_column, 
                      origin_column + "-" + destination_column,
                      destination_column + "-" + origin_column)
    return markets

def aggregate_dataframe(dataframe, groupby_cols, agg_col):
    
    """Function to aggregate the dataframe prior to making calculations"""
    agg_df = dataframe.groupby(by = groupby_cols)[agg_col].sum().reset_index()
    
    return agg_df

def total_market_size(dataframe, level_of_detail, market_measure):
    
    """Function that generates the total market size by a level of detail specified"""
    total_market = dataframe.groupby(by = level_of_detail)[market_measure].transform(sum)
    
    return total_market

def market_share (dataframe, level_of_detail, market_measure): 
    
    """Function that calculates and returns the market share by carrier of each market"""
    
    total_market = dataframe.groupby(by = level_of_detail)[market_measure].transform(sum)
    market_share = dataframe[market_measure] / total_market
    
    
    return market_share

def HHI (dataframe, level_of_detail, market_measure): 
    
    """Function that calculates and returns the HHI of each market -- NOTE THAT THIS ONLY PERTAINS TO OVERALL MARKET HHI - NO ADDL DIMENSIONALITY"""
    
    total_market = dataframe.groupby(by = level_of_detail)[market_measure].transform(sum)
    market_share = (((dataframe[market_measure] / total_market) * 100)**2) 
    
    subframe = pd.concat([dataframe[level_of_detail], market_share], axis = 1, ignore_index = True )
    
    subframe.columns = [level_of_detail, 'market_share']
    
    hhi = subframe.groupby(by = level_of_detail)['market_share'].transform(sum)
    
    return hhi

def RMS (dataframe, level_of_detail, market_measure):
    
    """Function that creates relative market share for each carrier in a specific market
    **** NOTE: THIS CODE RETURNS A DATAFRAME FOR EASE OF UNDERSTANDING THE CALCULATION **** """
    
    # create market share:
    
    total_market = dataframe.groupby(by = level_of_detail)[market_measure].transform(sum)
    market_share = dataframe[market_measure] / total_market
    subframe = pd.concat([dataframe[level_of_detail], market_share], axis = 1, ignore_index = True )
    subframe.columns = [level_of_detail, 'market_share']
    
    subframe = subframe.assign(share_rank = subframe.sort_values(['market_share'], ascending = False)
          .groupby(by = [level_of_detail])
          .cumcount() + 1)
    
    #return subframe['share_rank']
    
    df_first = subframe[subframe['share_rank'] == 1][[level_of_detail, 'market_share']]
    df_first.columns = [level_of_detail, 'first_market_share']

    df_second = subframe[subframe['share_rank'] == 2][[level_of_detail, 'market_share']]
    df_second.columns = [level_of_detail, 'second_market_share']
    
    subframe = subframe.merge(right = df_first, on = [level_of_detail], how = 'inner')
    subframe = subframe.merge(right = df_second, on = [level_of_detail], how = 'inner')

    subframe['rms_compare'] = np.where(subframe['share_rank'] == 1,
                                subframe['second_market_share'],
                                subframe['first_market_share'])
    

    subframe['RMS_calc'] = subframe['market_share'] / subframe['rms_compare']
    
    return subframe
    #print(subframe[subframe['rt_market'] == 'AGP-BOD'])
    


In [35]:
# USAGE EXAMPLES - rt_market:

df['rt_market'] = create_rt_market(df['od_origin'], df['od_destination'])

In [36]:
# USAGE EXAMPLES - run aggregation (if needed):
groupby_cols = ['marketing_carrier', 'rt_market']
agg_col = 'od_pax'

df_agg = aggregate_dataframe(dataframe = df, groupby_cols = groupby_cols, agg_col = agg_col)

In [37]:
# USAGE EXAMPLES - calculate total market size:

level_of_detail = 'rt_market'
market_measure = 'od_pax'


df_agg['total_market_size'] = total_market_size(dataframe = df_agg, level_of_detail = level_of_detail, market_measure = market_measure)

print(df_agg[df_agg['rt_market'] == 'AGP-BOD'])


In [50]:
# USAGE EXAMPLES - calculate total market size:

level_of_detail = 'rt_market'
market_measure = 'od_pax'


df_agg['market_share'] = market_share(dataframe = df_agg, level_of_detail = level_of_detail, market_measure = market_measure)

print(df_agg[df_agg['rt_market'] == 'AGP-BOD'])

       marketing_carrier rt_market  od_pax  total_market_size  market_share
0                     0B   AGP-BOD       1               7768      0.000129
61418                 AF   AGP-BOD     425               7768      0.054712
124022                BA   AGP-BOD       4               7768      0.000515
208780                FR   AGP-BOD       6               7768      0.000772
215506                IB   AGP-BOD    1287               7768      0.165680
243024                KL   AGP-BOD      30               7768      0.003862
299545                LX   AGP-BOD       2               7768      0.000257
345980                SN   AGP-BOD       5               7768      0.000644
358839                TP   AGP-BOD     471               7768      0.060633
400496                UX   AGP-BOD       1               7768      0.000129
405807                V7   AGP-BOD    3390               7768      0.436406
409640                VY   AGP-BOD    2146               7768      0.276262


In [71]:
# USAGE EXAMPLES - calculate total HHI:

level_of_detail = 'rt_market'
market_measure = 'od_pax'


df_agg['hhi'] = HHI(dataframe = df_agg, level_of_detail = level_of_detail, market_measure = market_measure)

print(df_agg[df_agg['rt_market'] == 'AGP-BOD'])

       marketing_carrier rt_market  od_pax  total_market_size  market_share  \
0                     0B   AGP-BOD       1               7768      0.000129   
61418                 AF   AGP-BOD     425               7768      0.054712   
124022                BA   AGP-BOD       4               7768      0.000515   
208780                FR   AGP-BOD       6               7768      0.000772   
215506                IB   AGP-BOD    1287               7768      0.165680   
243024                KL   AGP-BOD      30               7768      0.003862   
299545                LX   AGP-BOD       2               7768      0.000257   
345980                SN   AGP-BOD       5               7768      0.000644   
358839                TP   AGP-BOD     471               7768      0.060633   
400496                UX   AGP-BOD       1               7768      0.000129   
405807                V7   AGP-BOD    3390               7768      0.436406   
409640                VY   AGP-BOD    2146          

In [104]:
# USAGE EXAMPLES - calculate total RMS:

level_of_detail = 'rt_market'
market_measure = 'od_pax'

rms_df = RMS(dataframe = df_agg, level_of_detail = level_of_detail, market_measure = market_measure)

#print(rms_df[rms_df['rt_market'] == 'AGP-BOD'])

print(rms_df.head())

  rt_market  market_share  share_rank  first_market_share  \
0   AGP-BOD      0.000129          12            0.436406   
1   AGP-BOD      0.054712           5            0.436406   
2   AGP-BOD      0.000515           9            0.436406   
3   AGP-BOD      0.000772           7            0.436406   
4   AGP-BOD      0.165680           3            0.436406   

   second_market_share  rms_compare  RMS_calc  
0             0.276262     0.436406  0.000295  
1             0.276262     0.436406  0.125369  
2             0.276262     0.436406  0.001180  
3             0.276262     0.436406  0.001770  
4             0.276262     0.436406  0.379646  
