<a href="https://colab.research.google.com/github/JSunkel/Forecasting-Models/blob/master/Line_Disconnects_Apple_Vol_Port_Carriers_Same_Day_Prophet_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from fbprophet import Prophet

In [3]:
# Read a comma-separated values (csv) file into a Pandas DataFrame.
fact_table = pd.read_csv('drive/My Drive/Churn/discos_fact_table_apple_vol_port_carriers_same_day_may.csv')

In [4]:
# Check the full dataset format (testing only)
fact_table.head()

Unnamed: 0,ds,vol_port_attonly,vol_port_attcc,vol_port_sprint,vol_port_tmo_only,vol_port_metro,vol_port_spectrum,vol_port_xfinity,vol_port_reseller,vol_port_other
0,12/1/2016,318,67,242,264,44,0,0,80,18
1,12/2/2016,401,69,247,288,41,0,0,47,24
2,12/3/2016,625,63,378,364,45,0,0,61,30
3,12/4/2016,480,56,326,372,57,0,0,52,13
4,12/5/2016,426,55,281,287,42,0,0,45,18


In [5]:
# Split out indivudal datasets for forecasting.
# AT&T
vol_port_attonly = fact_table[['ds', 'vol_port_attonly']].rename(columns = {'vol_port_attonly': 'y'})
vol_port_attcc = fact_table[['ds', 'vol_port_attcc']].rename(columns = {'vol_port_attcc': 'y'})

# Sprint
vol_port_sprint = fact_table[['ds', 'vol_port_sprint']].rename(columns = {'vol_port_sprint': 'y'})

# T-Mobile
vol_port_tmo_only = fact_table[['ds', 'vol_port_tmo_only']].rename(columns = {'vol_port_tmo_only': 'y'})
vol_port_metro = fact_table[['ds', 'vol_port_metro']].rename(columns = {'vol_port_metro': 'y'})

# Spectrum
vol_port_spectrum = fact_table[['ds', 'vol_port_spectrum']].rename(columns = {'vol_port_spectrum': 'y'})

# Xfinity
vol_port_xfinity = fact_table[['ds', 'vol_port_xfinity']].rename(columns = {'vol_port_xfinity': 'y'})

# Reseller
vol_port_reseller = fact_table[['ds', 'vol_port_reseller']].rename(columns = {'vol_port_reseller': 'y'})

# Other
vol_port_other = fact_table[['ds', 'vol_port_other']].rename(columns = {'vol_port_other': 'y'})

In [6]:
# Check the individual dataset format (testing only)
vol_port_sprint.head()

Unnamed: 0,ds,y
0,12/1/2016,242
1,12/2/2016,247
2,12/3/2016,378
3,12/4/2016,326
4,12/5/2016,281


In [7]:
# Define the holidays
new_years_day = pd.DataFrame({
    'holiday': 'new_years_day',
    'ds': pd.to_datetime(['2017-01-01',
                          '2018-01-01',
                          '2019-01-01',
                          '2020-01-01']),
    'lower_window': 0,
    'upper_window': 0,
})

mlk_day = pd.DataFrame({
    'holiday': 'mlk_day',
    'ds': pd.to_datetime(['2017-01-16',
                          '2018-01-15',
                          '2019-01-21',
                          '2020-01-20']),
    'lower_window': 0,
    'upper_window': 0,
})

easter_sunday = pd.DataFrame({
    'holiday': 'easter_sunday',
    'ds': pd.to_datetime(['2017-04-16',
                          '2018-04-01',
                          '2019-04-21',
                          '2020-04-12']),
    'lower_window': 0,
    'upper_window': 0,
})

memorial_day = pd.DataFrame({
    'holiday': 'memorial_day',
    'ds': pd.to_datetime(['2017-05-29',
                          '2018-05-28',
                          '2019-05-27',
                          '2020-05-25']),
    'lower_window': 0,
    'upper_window': 0,
})

independence_day = pd.DataFrame({
    'holiday': 'independence_day',
    'ds': pd.to_datetime(['2017-07-04',
                          '2018-07-04',
                          '2019-07-04',
                          '2020-07-04']),
    'lower_window': 0,
    'upper_window': 0,
})

labor_day = pd.DataFrame({
    'holiday': 'labor_day',
    'ds': pd.to_datetime(['2017-09-04',
                          '2018-09-03',
                          '2019-09-02',
                          '2020-09-07']),
    'lower_window': 0,
    'upper_window': 0,
})

thanksgiving_day = pd.DataFrame({
    'holiday': 'thanksgiving_day',
    'ds': pd.to_datetime(['2017-11-23',
                          '2018-11-22',
                          '2019-11-28',
                          '2020-11-26']),
    'lower_window': 0,
    'upper_window': 0,
})

black_friday = pd.DataFrame({
    'holiday': 'black_friday',
    'ds': pd.to_datetime(['2017-11-24',
                          '2018-11-23',
                          '2019-11-29',
                          '2020-11-27']),
    'lower_window': 0,
    'upper_window': 0,
})

cyber_monday = pd.DataFrame({
    'holiday': 'cyber_monday',
    'ds': pd.to_datetime(['2017-11-27',
                          '2018-11-26',
                          '2019-12-02',
                          '2020-11-30']),
    'lower_window': 0,
    'upper_window': 0,
})

christmas_day = pd.DataFrame({
    'holiday': 'christmas_day',
    'ds': pd.to_datetime(['2017-12-25',
                          '2018-12-25',
                          '2019-12-25',
                          '2020-12-25']),
    'lower_window': 0,
    'upper_window': 0,
})

In [8]:
holidays = pd.concat((new_years_day,
                      mlk_day,
                      easter_sunday,
                      memorial_day,
                      independence_day,
                      labor_day,
                      thanksgiving_day,
                      black_friday,
                      cyber_monday,
                      christmas_day))

In [31]:
# Define the models
# AT&T
vol_port_att_only_model = Prophet(holidays = holidays)
vol_port_att_cc_model = Prophet(holidays = holidays)

# Sprint
vol_port_sprint_model = Prophet(holidays = holidays)

# T-Mobile
vol_port_tmo_only_model = Prophet(holidays = holidays)
vol_port_metro_model = Prophet(holidays = holidays)

# Spectrum
vol_port_spectrum_model = Prophet(holidays = holidays)

# Xfinity
vol_port_xfinity_model = Prophet(holidays = holidays)

# Reseller
vol_port_reseller_model = Prophet(holidays = holidays)

# Other
vol_port_other_model = Prophet(holidays = holidays)

In [32]:
# Fit the models
# AT&T
vol_port_att_only_model.fit(vol_port_attonly)
vol_port_att_cc_model.fit(vol_port_attcc)

# Sprint
vol_port_sprint_model.fit(vol_port_sprint)

# T-Mobile
vol_port_tmo_only_model.fit(vol_port_tmo_only)
vol_port_metro_model.fit(vol_port_metro)

# Spectrum
vol_port_spectrum_model.fit(vol_port_spectrum)

# Xfinity
vol_port_xfinity_model.fit(vol_port_xfinity)

# Reseller
vol_port_reseller_model.fit(vol_port_reseller)

# Other
vol_port_other_model.fit(vol_port_other)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<fbprophet.forecaster.Prophet at 0x7fb8d1bc44e0>

In [33]:
# Number of periods into the future to forecast
n = 214

In [34]:
# Create dataframes to hold predictions
# AT&T
vol_port_att_only_future = vol_port_attonly_model.make_future_dataframe(periods = n)
vol_port_att_cc_future = vol_port_attcc_model.make_future_dataframe(periods = n)

# Sprint
vol_port_sprint_future = vol_port_sprint_model.make_future_dataframe(periods = n)

# T-Mobile
vol_port_tmo_only_future = vol_port_tmo_only_model.make_future_dataframe(periods = n)
vol_port_metro_future = vol_port_metro_model.make_future_dataframe(periods = n)

# Spectrum
vol_port_spectrum_future = vol_port_spectrum_model.make_future_dataframe(periods = n)

# Xfinity
vol_port_xfinity_future = vol_port_xfinity_model.make_future_dataframe(periods = n)

# Reseller
vol_port_reseller_future = vol_port_reseller_model.make_future_dataframe(periods = n)

# Other
vol_port_other_future = vol_port_other_model.make_future_dataframe(periods = n)

In [35]:
# Make predictions
# AT&T
vol_port_att_only_forecast = vol_port_att_only_model.predict(vol_port_att_only_future)
vol_port_att_cc_forecast = vol_port_attcc_model.predict(vol_port_attcc_future)

# Sprint
vol_port_sprint_forecast = vol_port_sprint_model.predict(vol_port_sprint_future)

# T-Mobile
vol_port_tmo_only_forecast = vol_port_tmo_only_model.predict(vol_port_tmo_only_future)
vol_port_metro_forecast = vol_port_metro_model.predict(vol_port_metro_future)

# Spectrum
vol_port_spectrum_forecast = vol_port_spectrum_model.predict(vol_port_spectrum_future)

# Xfinity
vol_port_xfinity_forecast = vol_port_xfinity_model.predict(vol_port_xfinity_future)

# Reseller
vol_port_reseller_forecast = vol_port_reseller_model.predict(vol_port_reseller_future)

# Other
vol_port_other_forecast = vol_port_other_model.predict(vol_port_other_future)

In [36]:
# Get the datestamp and prediction columns
# AT&T
vol_port_att_only_forecast_subset = vol_port_att_only_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Vol_Port_ATT_Only'})
vol_port_att_cc_forecast_subset = vol_port_att_cc_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Vol_Port_ATT_CC'})

# Sprint
vol_port_sprint_forecast_subset = vol_port_sprint_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Vol_Port_Sprint'})

# T-Mobile
vol_port_tmo_only_forecast_subset = vol_port_tmo_only_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Vol_Port_TMO_Only'})
vol_port_metro_forecast_subset = vol_port_metro_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Vol_Port_Metro'})

# Spectrum
vol_port_spectrum_forecast_subset = vol_port_spectrum_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Vol_Port_Spectrum'})

# Xfinity
vol_port_xfinity_forecast_subset = vol_port_xfinity_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Vol_Port_Xfinity'})

# Reseller
vol_port_reseller_forecast_subset = vol_port_reseller_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Vol_Port_Reseller'})

# Other
vol_port_other_forecast_subset = vol_port_other_forecast[['ds', 'yhat']].rename(columns = {'yhat': 'Vol_Port_Other'})

In [37]:
# Check the format of the subsetted forecasts (testing only)
vol_port_reseller_forecast_subset.tail()

Unnamed: 0,ds,Vol_Port_Reseller
1487,2020-12-27,27.766047
1488,2020-12-28,37.036351
1489,2020-12-29,35.791833
1490,2020-12-30,37.127242
1491,2020-12-31,38.273238


In [38]:
# Growth multiplier
gm = 1.00

In [39]:
# Replace any negative values and scale the forecast using the growth multiplier
# AT&T 
vol_port_att_only_forecast_subset['Vol_Port_ATT_Only'] = vol_port_att_only_forecast_subset['Vol_Port_ATT_Only'].abs() * gm
vol_port_att_cc_forecast_subset['Vol_Port_ATT_CC'] = vol_port_att_cc_forecast_subset['Vol_Port_ATT_CC'].abs() * gm

# Sprint
vol_port_sprint_forecast_subset['Vol_Port_Sprint'] = vol_port_sprint_forecast_subset['Vol_Port_Sprint'].abs() * gm

# T-Mobile
vol_port_tmo_only_forecast_subset['Vol_Port_TMO_Only'] = vol_port_tmo_only_forecast_subset['Vol_Port_TMO_Only'].abs() * gm
vol_port_metro_forecast_subset['Vol_Port_Metro'] = vol_port_metro_forecast_subset['Vol_Port_Metro'].abs() * gm

# Spectrum
vol_port_spectrum_forecast_subset['Vol_Port_Spectrum'] = vol_port_spectrum_forecast_subset['Vol_Port_Spectrum'].abs() * gm

# Xfinity
vol_port_xfinity_forecast_subset['Vol_Port_Xfinity'] = vol_port_xfinity_forecast_subset['Vol_Port_Xfinity'].abs() * gm

# Reseller
vol_port_reseller_forecast_subset['Vol_Port_Reseller'] = vol_port_reseller_forecast_subset['Vol_Port_Reseller'].abs() * gm

# Other
vol_port_other_forecast_subset['Vol_Port_Other'] = vol_port_other_forecast_subset['Vol_Port_Other'].abs() * gm

In [40]:
# Check the format of the adjusted forecasts (testing only)
vol_port_xfinity_forecast_subset.tail()

Unnamed: 0,ds,Vol_Port_Xfinity
1487,2020-12-27,48.137522
1488,2020-12-28,59.484934
1489,2020-12-29,56.066457
1490,2020-12-30,67.572325
1491,2020-12-31,63.825316


In [41]:
# Combine the forecasts into one table
# AT&T
discos_apple_carrier_vol_port_pred_table = vol_port_att_only_forecast_subset
discos_apple_carrier_vol_port_pred_table.insert(2, 'Vol_Port_ATT_CC', vol_port_att_cc_forecast_subset['Vol_Port_ATT_CC'])

# Sprint
discos_apple_carrier_vol_port_pred_table.insert(3, 'Vol_Port_Sprint', vol_port_sprint_forecast_subset['Vol_Port_Sprint'])

# T-Mobile
discos_apple_carrier_vol_port_pred_table.insert(4, 'Vol_Port_TMO_Only', vol_port_tmo_only_forecast_subset['Vol_Port_TMO_Only'])
discos_apple_carrier_vol_port_pred_table.insert(5, 'Vol_Port_Metro', vol_port_metro_forecast_subset['Vol_Port_Metro'])

# Spectrum
discos_apple_carrier_vol_port_pred_table.insert(6, 'Vol_Port_Spectrum', vol_port_spectrum_forecast_subset['Vol_Port_Spectrum'])

# Xfinity
discos_apple_carrier_vol_port_pred_table.insert(7, 'Vol_Port_Xfinity', vol_port_xfinity_forecast_subset['Vol_Port_Xfinity'])

# Reseller
discos_apple_carrier_vol_port_pred_table.insert(8, 'Vol_Port_Reseller', vol_port_reseller_forecast_subset['Vol_Port_Reseller'])

# Other
discos_apple_carrier_vol_port_pred_table.insert(9, 'Vol_Port_Other', vol_port_other_forecast_subset['Vol_Port_Other'])

In [42]:
# Check the format of the final predictions table (testing only)
discos_apple_carrier_vol_port_pred_table.tail()

Unnamed: 0,ds,Vol_Port_ATT_Only,Vol_Port_ATT_CC,Vol_Port_Sprint,Vol_Port_TMO_Only,Vol_Port_Metro,Vol_Port_Spectrum,Vol_Port_Xfinity,Vol_Port_Reseller,Vol_Port_Other
1487,2020-12-27,248.086328,24.145079,113.216837,155.127749,8.870192,81.137025,48.137522,27.766047,27.238617
1488,2020-12-28,301.525871,42.203088,138.084991,161.435277,19.474801,97.336973,59.484934,37.036351,37.743513
1489,2020-12-29,287.574256,39.043216,127.945452,146.31828,17.586608,96.392813,56.066457,35.791833,38.395313
1490,2020-12-30,286.379137,40.309363,129.472135,150.848661,19.106093,103.969226,67.572325,37.127242,39.470873
1491,2020-12-31,282.911129,41.481593,127.289567,143.887507,20.480294,101.426072,63.825316,38.273238,37.881649


In [46]:
# Calculate the root mean squared error (testing only)
print("Vol Port ATT Only RMSE: %f" % np.sqrt(np.mean((discos_apple_carrier_vol_port_pred_table.loc[:1307, 'Vol_Port_ATT_Only'] - fact_table['vol_port_attonly']) ** 2)))
print("Vol Port ATT CC RMSE: %f" % np.sqrt(np.mean((discos_apple_carrier_vol_port_pred_table.loc[:1307, 'Vol_Port_ATT_CC'] - fact_table['vol_port_attcc']) ** 2)))
print("Vol Port Sprint RMSE: %f" % np.sqrt(np.mean((discos_apple_carrier_vol_port_pred_table.loc[:1307, 'Vol_Port_Sprint'] - fact_table['vol_port_sprint']) ** 2)))
print("Vol Port TMO Only RMSE: %f" % np.sqrt(np.mean((discos_apple_carrier_vol_port_pred_table.loc[:1307, 'Vol_Port_TMO_Only'] - fact_table['vol_port_tmo_only']) ** 2)))
print("Vol Port Metro RMSE: %f" % np.sqrt(np.mean((discos_apple_carrier_vol_port_pred_table.loc[:1307, 'Vol_Port_Metro'] - fact_table['vol_port_metro']) ** 2)))
print("Vol Port Spectrum RMSE: %f" % np.sqrt(np.mean((discos_apple_carrier_vol_port_pred_table.loc[:1307, 'Vol_Port_Spectrum'] - fact_table['vol_port_spectrum']) ** 2)))
print("Vol Port Xfinity RMSE: %f" % np.sqrt(np.mean((discos_apple_carrier_vol_port_pred_table.loc[:1307, 'Vol_Port_Xfinity'] - fact_table['vol_port_xfinity']) ** 2)))
print("Vol Port Reseller RMSE: %f" % np.sqrt(np.mean((discos_apple_carrier_vol_port_pred_table.loc[:1307, 'Vol_Port_Reseller'] - fact_table['vol_port_reseller']) ** 2)))
print("Vol Port Other RMSE: %f" % np.sqrt(np.mean((discos_apple_carrier_vol_port_pred_table.loc[:1307, 'Vol_Port_Other'] - fact_table['vol_port_other']) ** 2)))

Vol Port ATT Only RMSE: 69.190961
Vol Port ATT CC RMSE: 11.360294
Vol Port Sprint RMSE: 44.034370
Vol Port TMO Only RMSE: 66.291213
Vol Port Metro RMSE: 8.415330
Vol Port Spectrum RMSE: 14.121824
Vol Port Xfinity RMSE: 22.175810
Vol Port Reseller RMSE: 11.038366
Vol Port Other RMSE: 8.644118


In [47]:
# Save the predictions table to a csv file
discos_apple_carrier_vol_port_pred_table.to_csv('drive/My Drive/Churn/discos_apple_carrier_vol_port_pred_table.csv')