<a href="https://colab.research.google.com/github/JSunkel/Forecasting-Models/blob/master/Line_Disconnects_Device_Group_Prophet_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
from fbprophet import Prophet

In [0]:
# Load the full dataset
fact_table = pd.read_csv('drive/My Drive/Churn/discos_device_grp_fact_table_mar.csv')

In [139]:
# Check the full dataset format (testing only)
fact_table.head()

Unnamed: 0,ds,basic_cdma,basic_non_cdma,sp_android,sp_apple,sp_other,tablet,mbb,hpc,hum,gizmo,watch,other
0,1/1/2018,2740,7,8543,11502,276,8263,2294,478,417,361,254,90
1,1/2/2018,3844,13,10269,14177,329,10462,2844,691,601,466,421,128
2,1/3/2018,3636,9,7825,11311,325,9030,2567,631,532,384,314,101
3,1/4/2018,4277,39,9204,12242,364,9761,2678,640,576,388,350,127
4,1/5/2018,1323,12,3271,4196,50,2335,766,257,203,144,188,83


In [0]:
# Split out indivudal datasets for forecasting
basic_cdma = fact_table[['ds', 'basic_cdma']].rename(columns = {'basic_cdma': 'y'})
basic_non_cdma = fact_table[['ds', 'basic_non_cdma']].rename(columns = {'basic_non_cdma': 'y'})
sp_android = fact_table[['ds', 'sp_android']].rename(columns = {'sp_android': 'y'})
sp_apple = fact_table[['ds', 'sp_apple']].rename(columns = {'sp_apple': 'y'})
sp_other = fact_table[['ds', 'sp_other']].rename(columns = {'sp_other': 'y'})
tablet = fact_table[['ds', 'tablet']].rename(columns = {'tablet': 'y'})
mbb = fact_table[['ds', 'mbb']].rename(columns = {'mbb': 'y'})
hpc = fact_table[['ds', 'hpc']].rename(columns = {'hpc': 'y'})
hum = fact_table[['ds', 'hum']].rename(columns = {'hum': 'y'})
gizmo = fact_table[['ds', 'gizmo']].rename(columns = {'gizmo': 'y'})
watch = fact_table[['ds', 'watch']].rename(columns = {'watch': 'y'})
other = fact_table[['ds', 'other']].rename(columns = {'other': 'y'})

In [141]:
# Check the individual dataset format (testing only)
basic_cdma.head()

Unnamed: 0,ds,y
0,1/1/2018,2740
1,1/2/2018,3844
2,1/3/2018,3636
3,1/4/2018,4277
4,1/5/2018,1323


In [0]:
# Define the holidays
jan_bill_cycle = pd.DataFrame({
    'holiday': 'jan_bill_cycle',
    'ds': pd.to_datetime(['2018-01-05',
                          '2018-01-11',
                          '2018-01-14',
                          '2018-01-17',
                          '2018-01-24',
                          '2018-01-27',
                          '2019-01-29',
                          '2018-01-30',
                          '2018-01-31',
                          '2019-01-05',
                          '2019-01-11',
                          '2019-01-14',
                          '2019-01-17',
                          '2019-01-24',
                          '2019-01-27',
                          '2019-01-29',
                          '2019-01-30',
                          '2019-01-31',
                          '2020-01-05',
                          '2020-01-11',
                          '2020-01-14',
                          '2020-01-17',
                          '2020-01-24',
                          '2020-01-27',
                          '2020-01-29',
                          '2020-01-30',
                          '2020-01-31']),
    'lower_window': 0,
    'upper_window': 0,
})

feb_bill_cycle = pd.DataFrame({
    'holiday': 'feb_bill_cycle',
    'ds': pd.to_datetime(['2018-02-05',
                          '2018-02-11',
                          '2018-02-14',
                          '2018-02-17',
                          '2018-02-24',
                          '2018-02-27',
                          '2019-02-05',
                          '2019-02-11',
                          '2019-02-14',
                          '2019-02-17',
                          '2019-02-24',
                          '2019-02-27',
                          '2020-02-05',
                          '2020-02-11',
                          '2020-02-14',
                          '2020-02-17',
                          '2020-02-24',
                          '2020-02-27',
                          '2020-02-29']),
    'lower_window': 0,
    'upper_window': 0,
})

mar_bill_cycle = pd.DataFrame({
    'holiday': 'mar_bill_cycle',
    'ds': pd.to_datetime(['2018-03-05',
                          '2018-03-11',
                          '2018-03-14',
                          '2018-03-17',
                          '2018-03-24',
                          '2018-03-27',
                          '2019-03-29',
                          '2018-03-30',
                          '2018-03-31',
                          '2019-03-05',
                          '2019-03-11',
                          '2019-03-14',
                          '2019-03-17',
                          '2019-03-24',
                          '2019-03-27',
                          '2019-03-29',
                          '2019-03-30',
                          '2019-03-31',
                          '2020-03-05',
                          '2020-03-11',
                          '2020-03-14',
                          '2020-03-17',
                          '2020-03-24',
                          '2020-03-27',
                          '2020-03-29',
                          '2020-03-30',
                          '2020-03-31']),
    'lower_window': 0,
    'upper_window': 0,
})

apr_bill_cycle = pd.DataFrame({
    'holiday': 'apr_bill_cycle',
    'ds': pd.to_datetime(['2018-04-05',
                          '2018-04-11',
                          '2018-04-14',
                          '2018-04-17',
                          '2018-04-24',
                          '2018-04-27',
                          '2019-04-29',
                          '2018-04-30',
                          '2019-04-05',
                          '2019-04-11',
                          '2019-04-14',
                          '2019-04-17',
                          '2019-04-24',
                          '2019-04-27',
                          '2019-04-29',
                          '2019-04-30',
                          '2020-04-05',
                          '2020-04-11',
                          '2020-04-14',
                          '2020-04-17',
                          '2020-04-24',
                          '2020-04-27',
                          '2020-04-29',
                          '2020-04-30']),
    'lower_window': 0,
    'upper_window': 0,
})

may_bill_cycle = pd.DataFrame({
    'holiday': 'may_bill_cycle',
    'ds': pd.to_datetime(['2018-05-05',
                          '2018-05-11',
                          '2018-05-14',
                          '2018-05-17',
                          '2018-05-24',
                          '2018-05-27',
                          '2019-05-29',
                          '2018-05-30',
                          '2018-05-31',
                          '2019-05-05',
                          '2019-05-11',
                          '2019-05-14',
                          '2019-05-17',
                          '2019-05-24',
                          '2019-05-27',
                          '2019-05-29',
                          '2019-05-30',
                          '2019-05-31',
                          '2020-05-05',
                          '2020-05-11',
                          '2020-05-14',
                          '2020-05-17',
                          '2020-05-24',
                          '2020-05-27',
                          '2020-05-29',
                          '2020-05-30',
                          '2020-05-31']),
    'lower_window': 0,
    'upper_window': 0,
})

jun_bill_cycle = pd.DataFrame({
    'holiday': 'jun_bill_cycle',
    'ds': pd.to_datetime(['2018-06-05',
                          '2018-06-11',
                          '2018-06-14',
                          '2018-06-17',
                          '2018-06-24',
                          '2018-06-27',
                          '2019-06-29',
                          '2018-06-30',
                          '2019-06-05',
                          '2019-06-11',
                          '2019-06-14',
                          '2019-06-17',
                          '2019-06-24',
                          '2019-06-27',
                          '2019-06-29',
                          '2019-06-30',
                          '2020-06-05',
                          '2020-06-11',
                          '2020-06-14',
                          '2020-06-17',
                          '2020-06-24',
                          '2020-06-27',
                          '2020-06-29',
                          '2020-06-30']),
    'lower_window': 0,
    'upper_window': 0,
})

jul_bill_cycle = pd.DataFrame({
    'holiday': 'jul_bill_cycle',
    'ds': pd.to_datetime(['2018-07-05',
                          '2018-07-11',
                          '2018-07-14',
                          '2018-07-17',
                          '2018-07-24',
                          '2018-07-27',
                          '2019-07-29',
                          '2018-07-30',
                          '2018-07-31',
                          '2019-07-05',
                          '2019-07-11',
                          '2019-07-14',
                          '2019-07-17',
                          '2019-07-24',
                          '2019-07-27',
                          '2019-07-29',
                          '2019-07-30',
                          '2019-07-31',
                          '2020-07-05',
                          '2020-07-11',
                          '2020-07-14',
                          '2020-07-17',
                          '2020-07-24',
                          '2020-07-27',
                          '2020-07-29',
                          '2020-07-30',
                          '2020-07-31']),
    'lower_window': 0,
    'upper_window': 0,
})

aug_bill_cycle = pd.DataFrame({
    'holiday': 'aug_bill_cycle',
    'ds': pd.to_datetime(['2018-08-05',
                          '2018-08-11',
                          '2018-08-14',
                          '2018-08-17',
                          '2018-08-24',
                          '2018-08-27',
                          '2019-08-29',
                          '2018-08-30',
                          '2018-08-31',
                          '2019-08-05',
                          '2019-08-11',
                          '2019-08-14',
                          '2019-08-17',
                          '2019-08-24',
                          '2019-08-27',
                          '2019-08-29',
                          '2019-08-30',
                          '2019-08-31',
                          '2020-08-05',
                          '2020-08-11',
                          '2020-08-14',
                          '2020-08-17',
                          '2020-08-24',
                          '2020-08-27',
                          '2020-08-29',
                          '2020-08-30',
                          '2020-08-31']),
    'lower_window': 0,
    'upper_window': 0,
})

sep_bill_cycle = pd.DataFrame({
    'holiday': 'sep_bill_cycle',
    'ds': pd.to_datetime(['2018-09-05',
                          '2018-09-11',
                          '2018-09-14',
                          '2018-09-17',
                          '2018-09-24',
                          '2018-09-27',
                          '2019-09-29',
                          '2018-09-30',
                          '2019-09-05',
                          '2019-09-11',
                          '2019-09-14',
                          '2019-09-17',
                          '2019-09-24',
                          '2019-09-27',
                          '2019-09-29',
                          '2019-09-30',
                          '2020-09-05',
                          '2020-09-11',
                          '2020-09-14',
                          '2020-09-17',
                          '2020-09-24',
                          '2020-09-27',
                          '2020-09-29',
                          '2020-09-30']),
    'lower_window': 0,
    'upper_window': 0,
})

oct_bill_cycle = pd.DataFrame({
    'holiday': 'oct_bill_cycle',
    'ds': pd.to_datetime(['2018-10-05',
                          '2018-10-11',
                          '2018-10-14',
                          '2018-10-17',
                          '2018-10-24',
                          '2018-10-27',
                          '2019-10-29',
                          '2018-10-30',
                          '2018-10-31',
                          '2019-10-05',
                          '2019-10-11',
                          '2019-10-14',
                          '2019-10-17',
                          '2019-10-24',
                          '2019-10-27',
                          '2019-10-29',
                          '2019-10-30',
                          '2019-10-31',
                          '2020-10-05',
                          '2020-10-11',
                          '2020-10-14',
                          '2020-10-17',
                          '2020-10-24',
                          '2020-10-27',
                          '2020-10-29',
                          '2020-10-30',
                          '2020-10-31']),
    'lower_window': 0,
    'upper_window': 0,
})

nov_bill_cycle = pd.DataFrame({
    'holiday': 'nov_bill_cycle',
    'ds': pd.to_datetime(['2018-11-05',
                          '2018-11-11',
                          '2018-11-14',
                          '2018-11-17',
                          '2018-11-24',
                          '2018-11-27',
                          '2019-11-29',
                          '2018-11-30',
                          '2019-11-05',
                          '2019-11-11',
                          '2019-11-14',
                          '2019-11-17',
                          '2019-11-24',
                          '2019-11-27',
                          '2019-11-29',
                          '2019-11-30',
                          '2020-11-05',
                          '2020-11-11',
                          '2020-11-14',
                          '2020-11-17',
                          '2020-11-24',
                          '2020-11-27',
                          '2020-11-29',
                          '2020-11-30']),
    'lower_window': 0,
    'upper_window': 0,
})

dec_bill_cycle = pd.DataFrame({
    'holiday': 'dec_bill_cycle',
    'ds': pd.to_datetime(['2018-12-05',
                          '2018-12-11',
                          '2018-12-14',
                          '2018-12-17',
                          '2018-12-24',
                          '2018-12-27',
                          '2019-12-29',
                          '2018-12-30',
                          '2018-12-31',
                          '2019-12-05',
                          '2019-12-11',
                          '2019-12-14',
                          '2019-12-17',
                          '2019-12-24',
                          '2019-12-27',
                          '2019-12-29',
                          '2019-12-30',
                          '2019-12-31',
                          '2020-12-05',
                          '2020-12-11',
                          '2020-12-14',
                          '2020-12-17',
                          '2020-12-24',
                          '2020-12-27',
                          '2020-12-29',
                          '2020-12-30',
                          '2020-12-31']),
    'lower_window': 0,
    'upper_window': 0,
})

In [0]:
holidays = pd.concat((jan_bill_cycle,
                      feb_bill_cycle,
                      mar_bill_cycle,
                      apr_bill_cycle,
                      may_bill_cycle,
                      jun_bill_cycle,
                      jul_bill_cycle,
                      aug_bill_cycle,
                      sep_bill_cycle,
                      oct_bill_cycle,
                      nov_bill_cycle,
                      dec_bill_cycle))

In [144]:
# Define and fit the models
basic_cdma_model = Prophet(holidays = holidays).fit(basic_cdma)
basic_non_cdma_model = Prophet(holidays = holidays).fit(basic_non_cdma)
sp_android_model = Prophet(holidays = holidays).fit(sp_android)
sp_apple_model = Prophet(holidays = holidays).fit(sp_apple)
sp_other_model = Prophet(holidays = holidays).fit(sp_other)
tablet_model = Prophet(holidays = holidays).fit(tablet)
mbb_model = Prophet(holidays = holidays).fit(mbb)
hpc_model = Prophet(holidays = holidays).fit(hpc)
hum_model = Prophet(holidays = holidays).fit(hum)
gizmo_model = Prophet(holidays = holidays).fit(gizmo)
watch_model = Prophet(holidays = holidays).fit(watch)
other_model = Prophet(holidays = holidays).fit(other)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=Tru

In [0]:
# NUmber of periods into the future to forecast
n = 30

In [0]:
# Create dataframes to hold predictions
basic_cdma_future = basic_cdma_model.make_future_dataframe(periods = n)
basic_non_cdma_future = basic_non_cdma_model.make_future_dataframe(periods = n)
sp_android_future = sp_android_model.make_future_dataframe(periods = n)
sp_apple_future = sp_apple_model.make_future_dataframe(periods = n)
sp_other_future = sp_other_model.make_future_dataframe(periods = n)
tablet_future = tablet_model.make_future_dataframe(periods = n)
mbb_future = mbb_model.make_future_dataframe(periods = n)
hpc_future = hpc_model.make_future_dataframe(periods = n)
hum_future = hum_model.make_future_dataframe(periods = n)
gizmo_future = gizmo_model.make_future_dataframe(periods = n)
watch_future = watch_model.make_future_dataframe(periods = n)
other_future = other_model.make_future_dataframe(periods = n)

In [0]:
# Make predictions
basic_cdma_forecast = basic_cdma_model.predict(basic_cdma_future)
basic_non_cdma_forecast = basic_non_cdma_model.predict(basic_non_cdma_future)
sp_android_forecast = sp_android_model.predict(sp_android_future)
sp_apple_forecast = sp_apple_model.predict(sp_apple_future)
sp_other_forecast = sp_other_model.predict(sp_other_future)
tablet_forecast = tablet_model.predict(tablet_future)
mbb_forecast = mbb_model.predict(mbb_future)
hpc_forecast = hpc_model.predict(hpc_future)
hum_forecast = hum_model.predict(hum_future)
gizmo_forecast = gizmo_model.predict(gizmo_future)
watch_forecast = watch_model.predict(watch_future)
other_forecast = other_model.predict(other_future)

In [0]:
# Get the datestamp and prediction columns
basic_cdma_forecast_subset = basic_cdma_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Basic_CDMA'})
basic_non_cdma_forecast_subset = basic_non_cdma_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Basic_non_CDMA'})
sp_android_forecast_subset = sp_android_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'SP_Android'})
sp_apple_forecast_subset = sp_apple_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'SP_Apple'})
sp_other_forecast_subset = sp_other_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'SP_Other'})
tablet_forecast_subset = tablet_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Tablet'})
mbb_forecast_subset = mbb_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'MBB'})
hpc_forecast_subset = hpc_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'HPC'})
hum_forecast_subset = hum_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Hum'})
gizmo_forecast_subset = gizmo_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Gizmo'})
watch_forecast_subset = watch_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Watch'})
other_forecast_subset = other_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Other'})

In [149]:
# Check the format of the subsetted forecasts (testing only)
basic_cdma_forecast_subset.tail()

Unnamed: 0,ds,Basic_CDMA
846,2020-04-26,1158.347995
847,2020-04-27,191.030689
848,2020-04-28,1662.477567
849,2020-04-29,139.958376
850,2020-04-30,142.396024


In [0]:
# Growth multiplier
gm = 0.77

In [0]:
# Replace any negative values and scale the forecast using the growth multiplier 
basic_cdma_forecast_subset['Basic_CDMA'] = basic_cdma_forecast_subset['Basic_CDMA'].abs() * gm
basic_non_cdma_forecast_subset['Basic_non_CDMA'] = basic_non_cdma_forecast_subset['Basic_non_CDMA'].abs() * gm
sp_android_forecast_subset['SP_Android'] = sp_android_forecast_subset['SP_Android'].abs() * gm
sp_apple_forecast_subset['SP_Apple'] = sp_apple_forecast_subset['SP_Apple'].abs() * gm
sp_other_forecast_subset['SP_Other'] = sp_other_forecast_subset['SP_Other'].abs() * gm
tablet_forecast_subset['Tablet'] = tablet_forecast_subset['Tablet'].abs() * gm
mbb_forecast_subset['MBB'] = mbb_forecast_subset['MBB'].abs() * gm
hpc_forecast_subset['HPC'] = hpc_forecast_subset['HPC'].abs() * gm
hum_forecast_subset['Hum'] = hum_forecast_subset['Hum'].abs() * gm
gizmo_forecast_subset['Gizmo'] = gizmo_forecast_subset['Gizmo'].abs() * gm
watch_forecast_subset['Watch'] = watch_forecast_subset['Watch'].abs() * gm
other_forecast_subset['Other'] = other_forecast_subset['Other'].abs() * gm

In [152]:
# Check the format of the adjusted forecasts (testing only)
sp_other_forecast_subset.tail()

Unnamed: 0,ds,SP_Other
846,2020-04-26,38.336202
847,2020-04-27,43.146235
848,2020-04-28,41.258416
849,2020-04-29,43.598797
850,2020-04-30,43.245406


In [0]:
# Combine the forecasts into one table
discos_device_grp_pred_table = basic_cdma_forecast_subset
discos_device_grp_pred_table.insert(2, 'Basic_non_CDMA', basic_non_cdma_forecast_subset['Basic_non_CDMA'])
discos_device_grp_pred_table.insert(3, 'SP_Android', sp_android_forecast_subset['SP_Android'])
discos_device_grp_pred_table.insert(4, 'SP_Apple', sp_apple_forecast_subset['SP_Apple'])
discos_device_grp_pred_table.insert(5, 'SP_Other', sp_other_forecast_subset['SP_Other'])
discos_device_grp_pred_table.insert(6, 'Tablet', tablet_forecast_subset['Tablet'])
discos_device_grp_pred_table.insert(7, 'MBB', mbb_forecast_subset['MBB'])
discos_device_grp_pred_table.insert(8, 'HPC', hpc_forecast_subset['HPC'])
discos_device_grp_pred_table.insert(9, 'Hum', hum_forecast_subset['Hum'])
discos_device_grp_pred_table.insert(10, 'Gizmo', gizmo_forecast_subset['Gizmo'])
discos_device_grp_pred_table.insert(11, 'Watch', watch_forecast_subset['Watch'])
discos_device_grp_pred_table.insert(12, 'Other', other_forecast_subset['Other'])

In [154]:
# Check the format of the final predictions table (testing only)
discos_device_grp_pred_table.tail()

Unnamed: 0,ds,Basic_CDMA,Basic_non_CDMA,SP_Android,SP_Apple,SP_Other,Tablet,MBB,HPC,Hum,Gizmo,Watch,Other
846,2020-04-26,891.927956,278.561151,4889.102282,6411.479922,38.336202,4097.295107,1228.059389,297.406886,655.088321,358.220888,1115.526373,50.216186
847,2020-04-27,147.093631,301.138955,1913.199086,2802.016564,43.146235,775.742468,308.849639,138.819382,236.901826,191.07576,859.255179,22.103879
848,2020-04-28,1280.107726,338.826026,5439.986183,7148.931992,41.258416,4725.791074,1452.824466,386.586063,820.255272,406.101542,1220.475186,66.114455
849,2020-04-29,107.76795,302.362241,2194.542004,3143.217041,43.598797,866.006955,322.873727,136.491461,252.213579,203.655998,905.402209,23.872291
850,2020-04-30,109.644938,307.129495,2207.738259,3145.049443,43.245406,874.702335,329.387544,144.656513,252.987504,207.873122,905.046877,24.962436


In [0]:
# Save the predictions table to a csv file
discos_device_grp_pred_table.to_csv('drive/My Drive/Churn/discos_device_grp_pred_table.csv')