# Predicting Sales - Data Preparation

To do:
 * don't include any rebalanced wins in test/validation data
 * Range validation, throw exceptions

https://www.geeksforgeeks.org/stratified-sampling-in-pandas/

In [64]:
# Define some exclusions for PEP8 that don't apply when the Jupyter Notebook
#   is exported to .py file
# pylint: disable=pointless-statement
# pylint: disable=fixme
# pylint: disable=expression-not-assigned
# pylint: disable=missing-module-docstring
# pylint: disable=invalid-name

import os
import importlib

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


import saleslib # my custom one
importlib.reload(saleslib)

pd.options.display.max_rows = 100
pd.options.display.max_columns = 999
sns.set_theme(style="darkgrid")

# Optional performance enhancements, don't always work in Lambda Labs instances
# %load_ext Cython
# import numba
# numba.set_num_threads(4)

In [65]:
input_filepath = os.path.join('data', 'raw_CRM_opps_export-dummydata.csv')
df = pd.read_csv(input_filepath)
df.head()

Unnamed: 0,Age of opp in days,CRM Identifier,Deal size (USD),Industry,Num times opp pushed,Sales Rep ID,Sales team name,Won,array_of_sfdc_formulas
0,502.0,opp_4f70539,82500.0,Technology,1,rep_9c52b45,Central - South 1,0,"{ ""quarter_created"": 2, ""quarter_closed"": 4, ""..."
1,214.0,opp_3ff4389,5000.0,Finance,2,rep_9c52b45,Central - South 1,0,"{ ""quarter_created"": 1, ""quarter_closed"": 4, ""..."
2,262.0,opp_2819115,12736.0,Finance,3,rep_9c52b45,Central - South 1,1,"{ ""quarter_created"": 1, ""quarter_closed"": 4, ""..."
3,69.0,opp_72dcc7b,57867.2,Technology,1,rep_9c52b45,Central - North,1,"{ ""quarter_created"": 3, ""quarter_closed"": 4, ""..."
4,134.0,opp_08a2884,30000.0,Finance,4,rep_9c52b45,Central - South 1,0,"{ ""quarter_created"": 2, ""quarter_closed"": 4, ""..."


# Pipelines

In [66]:
def start_pipeline(df):
    return df.copy()

#print(df.shape)

df_phase1 = (df.
                pipe(start_pipeline).
                pipe(saleslib.convert_json_to_features).
                pipe(saleslib.set_datatypes).
                pipe(saleslib.drop_unused_columns).
                pipe(saleslib.add_features_sales_type)
)

orig_num_rows = df.shape[0]

df_phase1.describe()

[Set Datatypes] Recast datatypes
[Drop Unused Columns] Dropped -1 columns


Unnamed: 0,Age of opp in days,Deal size (USD),Num times opp pushed,Won,quarter_created,quarter_closed,partner_involved
count,9463.0,9463.0,9463.0,9463.0,9463.0,9463.0,9463.0
mean,126.369862,44169.44394,1.824157,0.261545,2.436225,2.512417,0.371552
std,134.746506,44604.121307,1.93012,0.439499,1.107098,1.139419,0.483245
min,0.0,0.0,0.0,0.0,1.0,1.0,0.0
25%,37.0,11264.0,0.0,0.0,1.0,1.0,0.0
50%,84.0,30000.0,1.0,0.0,2.0,2.0,0.0
75%,168.0,65000.0,3.0,1.0,3.0,4.0,1.0
max,1463.0,424287.0,16.0,1.0,4.0,4.0,1.0


In [67]:
df_phase1.head(20)

print(df_phase1['positioning_category'].unique())
print(df_phase1['product_family'].unique())
print(df_phase1['hosting_location'].unique())

['product' 'managed_service' 'limited_service_engagement' None 'unclear']
['C' 'B' 'D' 'A' 'W' 'E' 'Q' None 'F']
['saas_platform' '' 'on_prem' None 'legacy_acquisition']


In [68]:
df_phase1[df_phase1['hosting_location']== '']

Unnamed: 0,Age of opp in days,Deal size (USD),Industry,Num times opp pushed,Sales Rep ID,Sales team name,Won,quarter_created,quarter_closed,partner_involved,positioning_category,product_family,hosting_location
15,343,12000,Healthcare,6,rep_9c52b45,Central - North,0,1,1,1,managed_service,C,
20,126,30000,,0,rep_9c52b45,Central - North,0,4,1,0,managed_service,A,
24,0,259606,Finance,0,rep_9c52b45,Central - South 1,1,1,1,1,limited_service_engagement,W,
26,154,60000,Services,2,rep_9c52b45,Central - North,0,4,2,0,managed_service,C,
28,112,20000,Manufacturing,1,rep_9c52b45,Central - North,0,1,2,1,managed_service,C,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9421,209,89000,Manufacturing,3,rep_a04127f,West - Rockies,0,4,2,1,managed_service,C,
9444,30,30000,Services,3,rep_e390d7d,East - North,0,3,4,1,managed_service,A,
9445,96,70125,Technology,4,rep_e390d7d,East - North,1,3,4,1,managed_service,C,
9449,257,15000,Finance,3,rep_e390d7d,East - North,0,1,4,0,managed_service,B,


In [69]:
df_phase2 = (df_phase1.
                pipe(saleslib.drop_rows_outside_ranges).
                pipe(saleslib.drop_rows_missing_values_in_columns)
)
df_phase2.describe()

[Drop Rows outside range] Dropping 867 rows (9.16%) from Age of opp in days because value was < 14
[Drop Rows outside range] Dropping 46 rows (0.49%) from Deal size (USD) because value was < 1500
[Drop Rows outside range] Dropping 64 rows (0.68%) from Age of opp in days because value was > 720
[Drop Rows outside range] Dropped a total of 977 rows (10.32%) because values were outside ranges


Unnamed: 0,Age of opp in days,Deal size (USD),Num times opp pushed,Won,quarter_created,quarter_closed,partner_involved
count,8486.0,8486.0,8486.0,8486.0,8486.0,8486.0,8486.0
mean,133.168277,46014.863186,1.952864,0.234268,2.430592,2.514612,0.377681
std,119.560638,44730.53191,1.893298,0.423566,1.101914,1.135265,0.484836
min,14.0,1500.0,0.0,0.0,1.0,1.0,0.0
25%,48.0,12375.0,1.0,0.0,1.0,1.0,0.0
50%,96.0,31468.0,1.0,0.0,2.0,2.0,0.0
75%,178.0,66000.0,3.0,0.0,3.0,4.0,1.0
max,720.0,424287.0,14.0,1.0,4.0,4.0,1.0


In [70]:
df_phase3 = (df_phase2.
                pipe(saleslib.winsorize_cols)
)
df_phase3.describe()

[Winsorized Low End] Winsorized 1030 rows (12.14%) in feature "Age of opp in days" because value was < 30
[Winsorized Low End] Winsorized 1453 rows (17.12%) in feature "Deal size (USD)" because value was < 10000
[Winsorized High End] Winsorized 478 rows (5.63%) in feature "Age of opp in days" because value was > 365
[Winsorized High End] Winsorized 164 rows (1.93%) in feature "Deal size (USD)" because value was > 180000
[Winsorized High End] Winsorized 269 rows (3.17%) in feature "Num times opp pushed" because value was > 6


Unnamed: 0,Age of opp in days,Deal size (USD),Num times opp pushed,Won,quarter_created,quarter_closed,partner_involved
count,8486.0,8486.0,8486.0,8486.0,8486.0,8486.0,8486.0
mean,127.51367,45745.887344,1.885812,0.234268,2.430592,2.514612,0.377681
std,98.470925,40215.369943,1.683724,0.423566,1.101914,1.135265,0.484836
min,30.0,10000.0,0.0,0.0,1.0,1.0,0.0
25%,48.0,12375.0,1.0,0.0,1.0,1.0,0.0
50%,96.0,31468.0,1.0,0.0,2.0,2.0,0.0
75%,178.0,66000.0,3.0,0.0,3.0,4.0,1.0
max,365.0,180000.0,6.0,1.0,4.0,4.0,1.0


In [71]:
df_phase4 = (df_phase3.
                pipe(saleslib.normalize_cols_to_fixed_range).
                pipe(saleslib.add_feature_sales_team)
)
df_phase4.describe()

[Normalized Absolute] Normalized range of feature "Age of opp in days" from [30,365] to [0.0,1.0] using absolute range of [30,365]
[Normalized Absolute] Normalized range of feature "Deal size (USD)" from [10000,180000] to [0.0,1.0] using absolute range of [10000,180000]
[Normalized Absolute] Normalized range of feature "Num times opp pushed" from [0,6] to [0.0,1.0] using absolute range of [0,6]
[Normalized Absolute] Normalized range of feature "quarter_created" from [1,4] to [0.0,1.0] using absolute range of [1,4]
[Normalized Absolute] Normalized range of feature "quarter_closed" from [1,4] to [0.0,1.0] using absolute range of [1,4]


Unnamed: 0,Age of opp in days,Deal size (USD),Num times opp pushed,Won,quarter_created,quarter_closed,partner_involved
count,8486.0,8486.0,8486.0,8486.0,8486.0,8486.0,8486.0
mean,0.291086,0.21027,0.314302,0.234268,0.476864,0.504871,0.377681
std,0.293943,0.236561,0.280621,0.423566,0.367305,0.378422,0.484836
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.053731,0.013971,0.166667,0.0,0.0,0.0,0.0
50%,0.197015,0.126282,0.166667,0.0,0.333333,0.333333,0.0
75%,0.441791,0.329412,0.5,0.0,0.666667,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [72]:
# # view unique values for string columns:
# import json
# xyz = saleslib.list_unique_string_values(df_phase4)
# print(xyz)

In [73]:
df_phase4.head(20)

Unnamed: 0,Age of opp in days,Deal size (USD),Industry,Num times opp pushed,Sales Rep ID,Sales team name,Won,quarter_created,quarter_closed,partner_involved,positioning_category,product_family,hosting_location,sales_territory
0,1.0,0.426471,Technology,0.166667,rep_9c52b45,Central - South 1,0,0.333333,1.0,0,product,C,saas_platform,Central
1,0.549254,0.0,Finance,0.333333,rep_9c52b45,Central - South 1,0,0.0,1.0,1,product,B,saas_platform,Central
2,0.692537,0.016094,Finance,0.5,rep_9c52b45,Central - South 1,1,0.0,1.0,0,product,B,saas_platform,Central
3,0.116418,0.281571,Technology,0.166667,rep_9c52b45,Central - North,1,0.666667,1.0,1,product,C,saas_platform,Central
4,0.310448,0.117647,Finance,0.666667,rep_9c52b45,Central - South 1,0,0.333333,1.0,1,product,B,saas_platform,Central
6,0.026866,0.088235,Utilities & Energy,0.0,rep_9c52b45,Central - South 1,0,1.0,1.0,1,product,C,saas_platform,Central
7,1.0,0.294118,Finance,0.5,rep_9c52b45,Central - North,0,0.333333,1.0,1,product,B,saas_platform,Central
8,0.035821,0.007859,Finance,0.166667,rep_9c52b45,Central - South 1,1,1.0,1.0,0,product,B,saas_platform,Central
9,0.447761,0.058824,Finance,0.833333,rep_9c52b45,Central - South 1,0,0.333333,1.0,1,product,B,saas_platform,Central
11,1.0,0.007435,Finance,0.833333,rep_9c52b45,Central - North,0,0.0,0.0,0,product,B,saas_platform,Central


In [74]:
df_phase5 = (df_phase4.
                pipe(saleslib.onehotencode_string_columns).
                pipe(saleslib.convert_all_boolean_cols_to_int).
                pipe(saleslib.upsample_wins).                       # near the end
                pipe(saleslib.rename_features).                     # next to last
                pipe(saleslib.assert_datatypes_ready_for_training)  # should prob be near the end
)
df_phase5.describe()

[One Hot Encode] Encoded feature "Industry", added 13 new columns. Dropped original.
[One Hot Encode] Encoded feature "Sales Rep ID", added 271 new columns. Dropped original.
[One Hot Encode] Encoded feature "Sales team name", added 13 new columns. Dropped original.
[One Hot Encode] Encoded feature "positioning_category", added 4 new columns. Dropped original.
[One Hot Encode] Encoded feature "product_family", added 8 new columns. Dropped original.
[One Hot Encode] Encoded feature "hosting_location", added 4 new columns. Dropped original.
[One Hot Encode] Encoded feature "sales_territory", added 3 new columns. Dropped original.
[Convert] Converted 316 columns from Boolean to uint8[pyarrow] for XGBoost
[WinLossInfo] Wins: 1988 23.4%  Losses: 6498  Total: 8486
[Upsampling Wins] Resampling wins from 1988 opps to 3180 opps...
[WinLossInfo] Wins: 3180 32.9%  Losses: 6498  Total: 9678
[Rename Features] Renamed column names


Unnamed: 0,age,revenue,pushes,Won,quarter_created,quarter_closed,partner,Industry_Communications & Media,Industry_Education,Industry_Finance,Industry_Government,Industry_Healthcare,Industry_Manufacturing,Industry_Other Industry,Industry_Real Estate,Industry_Retail,Industry_Services,Industry_Technology,Industry_Transportation,Industry_Utilities & Energy,Sales Rep ID_rep_013a112,Sales Rep ID_rep_037508f,Sales Rep ID_rep_04069ca,Sales Rep ID_rep_05fbad0,Sales Rep ID_rep_06c9522,Sales Rep ID_rep_071e409,Sales Rep ID_rep_074d4cc,Sales Rep ID_rep_0821b39,Sales Rep ID_rep_0a05cf9,Sales Rep ID_rep_0bd3379,Sales Rep ID_rep_0bfa997,Sales Rep ID_rep_0d79d17,Sales Rep ID_rep_0f46ca3,Sales Rep ID_rep_103e9e7,Sales Rep ID_rep_114b3c7,Sales Rep ID_rep_1199816,Sales Rep ID_rep_11aa01a,Sales Rep ID_rep_1221463,Sales Rep ID_rep_12c8fa0,Sales Rep ID_rep_12d263e,Sales Rep ID_rep_1366a31,Sales Rep ID_rep_136ab1d,Sales Rep ID_rep_139be21,Sales Rep ID_rep_13eec1f,Sales Rep ID_rep_1911baf,Sales Rep ID_rep_198b0be,Sales Rep ID_rep_1a362bd,Sales Rep ID_rep_1c0c915,Sales Rep ID_rep_1d0c578,Sales Rep ID_rep_1dbb64c,Sales Rep ID_rep_1e1b6b1,Sales Rep ID_rep_1ef511c,Sales Rep ID_rep_1f7feea,Sales Rep ID_rep_201e738,Sales Rep ID_rep_20c37f1,Sales Rep ID_rep_228133a,Sales Rep ID_rep_22d1012,Sales Rep ID_rep_2315b4b,Sales Rep ID_rep_23a1a55,Sales Rep ID_rep_23d0543,Sales Rep ID_rep_23ddec5,Sales Rep ID_rep_247be15,Sales Rep ID_rep_24bd6fd,Sales Rep ID_rep_263e0f2,Sales Rep ID_rep_27750b6,Sales Rep ID_rep_27b8985,Sales Rep ID_rep_289a13f,Sales Rep ID_rep_299b3af,Sales Rep ID_rep_29b5303,Sales Rep ID_rep_29e08a6,Sales Rep ID_rep_2a946da,Sales Rep ID_rep_2bdb202,Sales Rep ID_rep_2d76014,Sales Rep ID_rep_2e3de83,Sales Rep ID_rep_2e817b7,Sales Rep ID_rep_2e9de07,Sales Rep ID_rep_2fef61b,Sales Rep ID_rep_30e692c,Sales Rep ID_rep_3381ce0,Sales Rep ID_rep_339dd94,Sales Rep ID_rep_33d8e5f,Sales Rep ID_rep_3427e42,Sales Rep ID_rep_34a27e3,Sales Rep ID_rep_3582291,Sales Rep ID_rep_36c0074,Sales Rep ID_rep_373f7d0,Sales Rep ID_rep_388ecf9,Sales Rep ID_rep_38dd901,Sales Rep ID_rep_39f4987,Sales Rep ID_rep_3c02d74,Sales Rep ID_rep_3c17971,Sales Rep ID_rep_3eec4d1,Sales Rep ID_rep_4324a3d,Sales Rep ID_rep_442171a,Sales Rep ID_rep_4477cba,Sales Rep ID_rep_452a29c,Sales Rep ID_rep_45f5ba6,Sales Rep ID_rep_469891d,Sales Rep ID_rep_47b7e08,Sales Rep ID_rep_488c4bf,Sales Rep ID_rep_493273f,Sales Rep ID_rep_49d39a4,Sales Rep ID_rep_4a7474a,Sales Rep ID_rep_4c1407b,Sales Rep ID_rep_4cac043,Sales Rep ID_rep_4d5616b,Sales Rep ID_rep_4fbad23,Sales Rep ID_rep_4fcd9d8,Sales Rep ID_rep_523f514,Sales Rep ID_rep_52a8010,Sales Rep ID_rep_52c3eb3,Sales Rep ID_rep_536a60e,Sales Rep ID_rep_53c4626,Sales Rep ID_rep_567e972,Sales Rep ID_rep_584d289,Sales Rep ID_rep_58988cb,Sales Rep ID_rep_58f4498,Sales Rep ID_rep_5ad3489,Sales Rep ID_rep_5af5623,Sales Rep ID_rep_5b975b2,Sales Rep ID_rep_5bdd867,Sales Rep ID_rep_5bed9c7,Sales Rep ID_rep_5c8b2b8,Sales Rep ID_rep_5d21642,Sales Rep ID_rep_5d420f5,Sales Rep ID_rep_5f9b0b5,Sales Rep ID_rep_620efdc,Sales Rep ID_rep_627b9bb,Sales Rep ID_rep_6372974,Sales Rep ID_rep_639f828,Sales Rep ID_rep_652d327,Sales Rep ID_rep_6705ebb,Sales Rep ID_rep_684c2e1,Sales Rep ID_rep_6873b3c,Sales Rep ID_rep_6951398,Sales Rep ID_rep_69c4542,Sales Rep ID_rep_6a69c35,Sales Rep ID_rep_6a817aa,Sales Rep ID_rep_6a87d28,Sales Rep ID_rep_6ad6615,Sales Rep ID_rep_6cdce0d,Sales Rep ID_rep_6e553b7,Sales Rep ID_rep_6f2b653,Sales Rep ID_rep_6fc14f7,Sales Rep ID_rep_70630bf,Sales Rep ID_rep_7084626,Sales Rep ID_rep_7089803,Sales Rep ID_rep_71cd7dc,Sales Rep ID_rep_720c136,Sales Rep ID_rep_72c1303,Sales Rep ID_rep_74a4d9c,Sales Rep ID_rep_74edb95,Sales Rep ID_rep_74f24b9,Sales Rep ID_rep_76f5957,Sales Rep ID_rep_7740707,Sales Rep ID_rep_785b538,Sales Rep ID_rep_7b00993,Sales Rep ID_rep_7bc1730,Sales Rep ID_rep_7c5f2d7,Sales Rep ID_rep_7e853a4,Sales Rep ID_rep_82bc742,Sales Rep ID_rep_83cfd47,Sales Rep ID_rep_83ec8cf,Sales Rep ID_rep_852d43c,Sales Rep ID_rep_85503ea,Sales Rep ID_rep_85910c4,Sales Rep ID_rep_8799f1c,Sales Rep ID_rep_884ee4f,Sales Rep ID_rep_88a6abc,Sales Rep ID_rep_8913289,Sales Rep ID_rep_89e6dbc,Sales Rep ID_rep_8c50924,Sales Rep ID_rep_8cd0121,Sales Rep ID_rep_8e47ce9,Sales Rep ID_rep_8fa82a2,Sales Rep ID_rep_909e681,Sales Rep ID_rep_912d4cd,Sales Rep ID_rep_924be18,Sales Rep ID_rep_92631b9,Sales Rep ID_rep_92ac302,Sales Rep ID_rep_92fe9e4,Sales Rep ID_rep_93617a8,Sales Rep ID_rep_941f171,Sales Rep ID_rep_95d427d,Sales Rep ID_rep_960305f,Sales Rep ID_rep_97f2271,Sales Rep ID_rep_9952925,Sales Rep ID_rep_99ab94f,Sales Rep ID_rep_9a87508,Sales Rep ID_rep_9b3adba,Sales Rep ID_rep_9b838ce,Sales Rep ID_rep_9b96edf,Sales Rep ID_rep_9ba90a1,Sales Rep ID_rep_9c0b0ed,Sales Rep ID_rep_9c233f0,Sales Rep ID_rep_9c52b45,Sales Rep ID_rep_9e05d52,Sales Rep ID_rep_9fa6ec0,Sales Rep ID_rep_a04127f,Sales Rep ID_rep_a051497,Sales Rep ID_rep_a247a2e,Sales Rep ID_rep_a29b264,Sales Rep ID_rep_a33a510,Sales Rep ID_rep_a643883,Sales Rep ID_rep_a6b98f5,Sales Rep ID_rep_a730543,Sales Rep ID_rep_a8bc765,Sales Rep ID_rep_a9c2fb9,Sales Rep ID_rep_a9e4b79,Sales Rep ID_rep_a9f9437,Sales Rep ID_rep_aa798dc,Sales Rep ID_rep_aabce93,Sales Rep ID_rep_aafcc91,Sales Rep ID_rep_abecc7e,Sales Rep ID_rep_accee1c,Sales Rep ID_rep_ad96ddb,Sales Rep ID_rep_adc60e5,Sales Rep ID_rep_b32eb7d,Sales Rep ID_rep_b45961b,Sales Rep ID_rep_b51fa7a,Sales Rep ID_rep_b52f3af,Sales Rep ID_rep_b62908a,Sales Rep ID_rep_b6742b4,Sales Rep ID_rep_b6ee995,Sales Rep ID_rep_b72a293,Sales Rep ID_rep_b7387ae,Sales Rep ID_rep_b818aaa,Sales Rep ID_rep_b823209,Sales Rep ID_rep_b99b2da,Sales Rep ID_rep_ba7ad49,Sales Rep ID_rep_bb29c18,Sales Rep ID_rep_be9ece5,Sales Rep ID_rep_bf1275e,Sales Rep ID_rep_c181368,Sales Rep ID_rep_c18ae8c,Sales Rep ID_rep_c1a8de6,Sales Rep ID_rep_c293e6b,Sales Rep ID_rep_c34c5b3,Sales Rep ID_rep_c73a709,Sales Rep ID_rep_c75bed5,Sales Rep ID_rep_c88ca4e,Sales Rep ID_rep_c8e3703,Sales Rep ID_rep_c99d0ad,Sales Rep ID_rep_c9d3a5c,Sales Rep ID_rep_cc5749c,Sales Rep ID_rep_ccea417,Sales Rep ID_rep_cd5688f,Sales Rep ID_rep_cedea7b,Sales Rep ID_rep_d14d3c7,Sales Rep ID_rep_d4dca82,Sales Rep ID_rep_d77414c,Sales Rep ID_rep_d949a0b,Sales Rep ID_rep_d9af365,Sales Rep ID_rep_da258ff,Sales Rep ID_rep_da3bf0b,Sales Rep ID_rep_db58312,Sales Rep ID_rep_db88e76,Sales Rep ID_rep_ddf354d,Sales Rep ID_rep_de368f1,Sales Rep ID_rep_de911d5,Sales Rep ID_rep_df215c0,Sales Rep ID_rep_dfd35f0,Sales Rep ID_rep_e2e95ec,Sales Rep ID_rep_e353cac,Sales Rep ID_rep_e37b4cf,Sales Rep ID_rep_e390d7d,Sales Rep ID_rep_e4f98d4,Sales Rep ID_rep_e5c0eb2,Sales Rep ID_rep_e7c4527,Sales Rep ID_rep_ea3637e,Sales Rep ID_rep_ea7c100,Sales Rep ID_rep_f0a531e,Sales Rep ID_rep_f1c67dc,Sales Rep ID_rep_f2980e0,Sales Rep ID_rep_f33ef28,Sales Rep ID_rep_f487a89,Sales Rep ID_rep_f4b08a6,Sales Rep ID_rep_f4db9f9,Sales Rep ID_rep_f590d61,Sales Rep ID_rep_f8a0bb6,Sales Rep ID_rep_f8b3968,Sales Rep ID_rep_f9870c2,Sales Rep ID_rep_fa6672f,Sales Rep ID_rep_fa6a12b,Sales Rep ID_rep_fbc8835,Sales Rep ID_rep_fc569a3,Sales Rep ID_rep_fcdeb8d,Sales Rep ID_rep_fd24731,Sales Rep ID_rep_fe1277c,Sales Rep ID_rep_fe52a7e,Sales Rep ID_rep_ffb8c60,Sales team name_Central - Midwest,Sales team name_Central - North,Sales team name_Central - South 1,Sales team name_Central - Texas,Sales team name_East - Mid-Atlantic,Sales team name_East - NYC,Sales team name_East - North,Sales team name_East - South,Sales team name_East - Tri-State,Sales team name_West - Bay Area,Sales team name_West - North 1,Sales team name_West - Rockies,Sales team name_West - South,positioning_category_limited_service_engagement,positioning_category_managed_service,positioning_category_product,positioning_category_unclear,product_family_A,product_family_B,product_family_C,product_family_D,product_family_E,product_family_F,product_family_Q,product_family_W,hosting_location_,hosting_location_legacy_acquisition,hosting_location_on_prem,hosting_location_saas_platform,sales_territory_Central,sales_territory_East,sales_territory_West
count,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0,9678.0
mean,0.279641,0.208549,0.305177,0.32858,0.477854,0.510918,0.396983,0.071192,0.00589,0.183819,0.006613,0.097024,0.11397,0.010126,0.012813,0.103534,0.217814,0.129676,0.019012,0.020872,0.000103,0.003823,0.000103,0.00062,0.017876,0.000103,0.00124,0.005373,0.011469,0.00124,0.00093,0.00744,0.006923,0.007026,0.000413,0.000517,0.000207,0.011366,0.00062,0.007646,0.000207,0.001137,0.000103,0.004856,0.00434,0.001757,0.00031,0.00403,0.00031,0.000103,0.00124,0.017566,0.00217,0.000207,0.00124,0.000103,0.00031,0.000207,0.006303,0.014466,0.000207,0.001963,0.00589,0.019322,0.000413,0.004753,0.000207,0.000103,0.000103,0.012193,0.000103,0.000103,0.000103,0.001343,0.005063,0.000207,0.009919,0.000103,0.001033,0.011883,0.014569,0.002893,0.008576,0.000103,0.001137,0.000103,0.000103,0.00744,0.003926,0.011986,0.011469,0.000103,0.000207,0.013226,0.013743,0.002687,0.003203,0.011469,0.000413,0.002687,0.00155,0.020769,0.014672,0.000207,0.001033,0.000207,0.005063,0.016739,0.002377,0.000103,0.013949,0.001343,0.000103,0.000207,0.006613,0.000723,0.000103,0.003926,0.000103,0.011056,0.000103,0.000103,0.000207,0.012296,0.000103,0.000207,0.001447,0.000827,0.000827,0.012916,0.000103,0.006716,0.000103,0.004133,0.012193,0.000103,0.000103,0.00062,0.000413,0.010436,0.000103,0.002377,0.000103,0.000103,0.0,0.001137,0.008163,0.009713,0.002067,0.008989,0.000207,0.013433,0.000207,0.011469,0.011883,0.00062,0.00062,0.002583,0.00124,0.001653,0.001963,0.003203,0.00186,0.000517,0.000723,0.000827,0.008886,0.00031,0.008783,0.00062,0.00341,0.001137,0.016429,0.000413,0.0,0.000103,0.000103,0.010953,0.012089,0.006923,0.000103,0.004546,0.00124,0.00093,0.00248,0.001343,0.000207,0.00062,0.000207,0.000103,0.000207,0.002273,0.001757,0.000207,0.00031,0.008679,0.008886,0.001963,0.009609,0.000103,0.000103,0.001757,0.000207,0.004443,0.00031,0.007853,0.002583,0.000517,0.010436,0.000103,0.011779,0.001137,0.002996,0.000207,0.005683,0.005683,0.001033,0.007026,0.002067,0.001653,0.000207,0.000103,0.000207,0.009713,0.009609,0.000207,0.00186,0.006923,0.004856,0.00031,0.000207,0.001033,0.00372,0.00186,0.00124,0.004856,0.008886,0.00341,0.000103,0.000207,0.007853,0.000207,0.000103,0.00093,0.00031,0.00217,0.000827,0.016739,0.000207,0.00031,0.020355,0.003926,0.000207,0.000517,0.000103,0.003306,0.006613,0.015499,0.005476,0.000207,0.000103,0.000413,0.00186,0.003823,0.006303,0.00217,0.00031,0.00124,0.004856,0.000723,0.00031,0.000103,0.001447,0.001447,0.013123,0.000103,0.000103,0.000517,0.00031,0.005063,0.00031,0.000103,0.009093,0.016946,0.000207,0.015499,0.000207,0.0,0.000207,0.000207,0.002377,0.088448,0.066439,0.059,0.074602,0.071916,0.077599,0.054763,0.068299,0.071502,0.095578,0.073466,0.102397,0.095991,0.00558,0.193738,0.462906,0.05528,0.05373,0.251912,0.311221,0.029448,0.024179,0.029242,0.012193,0.00558,0.254598,0.031411,0.012193,0.419302,0.288489,0.344079,0.367431
std,0.289669,0.238063,0.279386,0.469721,0.366535,0.378773,0.489298,0.257159,0.076522,0.387357,0.081055,0.296006,0.317791,0.100123,0.112471,0.304671,0.412781,0.335963,0.136575,0.142963,0.010165,0.061716,0.010165,0.024893,0.132506,0.010165,0.035193,0.073107,0.106484,0.035193,0.030482,0.085936,0.08292,0.083532,0.020327,0.022725,0.014375,0.106009,0.024893,0.087112,0.014375,0.033696,0.010165,0.069522,0.065737,0.041877,0.017604,0.063356,0.017604,0.010165,0.035193,0.131373,0.046534,0.014375,0.035193,0.010165,0.017604,0.014375,0.079145,0.119407,0.014375,0.044267,0.076522,0.137662,0.020327,0.068782,0.014375,0.010165,0.010165,0.109751,0.010165,0.010165,0.010165,0.036628,0.070978,0.014375,0.099106,0.010165,0.03213,0.108363,0.119826,0.053713,0.092214,0.010165,0.033696,0.010165,0.010165,0.085936,0.062541,0.108828,0.106484,0.010165,0.014375,0.114247,0.116426,0.051765,0.056509,0.106484,0.020327,0.051765,0.03934,0.142617,0.120244,0.014375,0.03213,0.014375,0.070978,0.128298,0.048694,0.010165,0.117286,0.036628,0.010165,0.014375,0.081055,0.026886,0.010165,0.062541,0.010165,0.10457,0.010165,0.010165,0.014375,0.110209,0.010165,0.014375,0.038008,0.028741,0.028741,0.112918,0.010165,0.081681,0.010165,0.064159,0.109751,0.010165,0.010165,0.024893,0.020327,0.101628,0.010165,0.048694,0.010165,0.010165,0.0,0.033696,0.089984,0.098079,0.045415,0.094391,0.014375,0.115124,0.014375,0.106484,0.108363,0.024893,0.024893,0.050762,0.035193,0.040628,0.044267,0.056509,0.043089,0.022725,0.026886,0.028741,0.093851,0.017604,0.093309,0.024893,0.058297,0.033696,0.127125,0.020327,0.0,0.010165,0.010165,0.104086,0.10929,0.08292,0.010165,0.067277,0.035193,0.030482,0.049739,0.036628,0.014375,0.024893,0.014375,0.010165,0.014375,0.047626,0.041877,0.014375,0.017604,0.092763,0.093851,0.044267,0.097561,0.010165,0.010165,0.041877,0.014375,0.066512,0.017604,0.088272,0.050762,0.022725,0.101628,0.010165,0.107897,0.033696,0.054661,0.014375,0.075175,0.075175,0.03213,0.083532,0.045415,0.040628,0.014375,0.010165,0.014375,0.098079,0.097561,0.014375,0.043089,0.08292,0.069522,0.017604,0.014375,0.03213,0.06088,0.043089,0.035193,0.069522,0.093851,0.058297,0.010165,0.014375,0.088272,0.014375,0.010165,0.030482,0.017604,0.046534,0.028741,0.128298,0.014375,0.017604,0.14122,0.062541,0.014375,0.022725,0.010165,0.05741,0.081055,0.123533,0.073803,0.014375,0.010165,0.020327,0.043089,0.061716,0.079145,0.046534,0.017604,0.035193,0.069522,0.026886,0.017604,0.010165,0.038008,0.038008,0.113805,0.010165,0.010165,0.022725,0.017604,0.070978,0.017604,0.010165,0.094926,0.129074,0.014375,0.123533,0.014375,0.0,0.014375,0.014375,0.048694,0.28396,0.249061,0.235636,0.262762,0.258362,0.267553,0.22753,0.252272,0.257675,0.294026,0.260913,0.303186,0.294594,0.074492,0.395247,0.498648,0.228538,0.225496,0.434133,0.463017,0.169068,0.153611,0.168492,0.109751,0.074492,0.435658,0.174436,0.109751,0.49347,0.453083,0.475092,0.48213
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.047761,0.012428,0.166667,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.179104,0.117647,0.166667,0.0,0.333333,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.423881,0.329412,0.5,1.0,0.666667,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [75]:
final_num_rows = df_phase5.shape[0]
percent_row_change = 100 * (final_num_rows - orig_num_rows) / orig_num_rows
print(f'Num rows went from {orig_num_rows} to {final_num_rows}  {percent_row_change:0.2f}% change')

Num rows went from 9463 to 9678  2.27% change


In [76]:
df_phase5.head(3)

Unnamed: 0,age,revenue,pushes,Won,quarter_created,quarter_closed,partner,Industry_Communications & Media,Industry_Education,Industry_Finance,Industry_Government,Industry_Healthcare,Industry_Manufacturing,Industry_Other Industry,Industry_Real Estate,Industry_Retail,Industry_Services,Industry_Technology,Industry_Transportation,Industry_Utilities & Energy,Sales Rep ID_rep_013a112,Sales Rep ID_rep_037508f,Sales Rep ID_rep_04069ca,Sales Rep ID_rep_05fbad0,Sales Rep ID_rep_06c9522,Sales Rep ID_rep_071e409,Sales Rep ID_rep_074d4cc,Sales Rep ID_rep_0821b39,Sales Rep ID_rep_0a05cf9,Sales Rep ID_rep_0bd3379,Sales Rep ID_rep_0bfa997,Sales Rep ID_rep_0d79d17,Sales Rep ID_rep_0f46ca3,Sales Rep ID_rep_103e9e7,Sales Rep ID_rep_114b3c7,Sales Rep ID_rep_1199816,Sales Rep ID_rep_11aa01a,Sales Rep ID_rep_1221463,Sales Rep ID_rep_12c8fa0,Sales Rep ID_rep_12d263e,Sales Rep ID_rep_1366a31,Sales Rep ID_rep_136ab1d,Sales Rep ID_rep_139be21,Sales Rep ID_rep_13eec1f,Sales Rep ID_rep_1911baf,Sales Rep ID_rep_198b0be,Sales Rep ID_rep_1a362bd,Sales Rep ID_rep_1c0c915,Sales Rep ID_rep_1d0c578,Sales Rep ID_rep_1dbb64c,Sales Rep ID_rep_1e1b6b1,Sales Rep ID_rep_1ef511c,Sales Rep ID_rep_1f7feea,Sales Rep ID_rep_201e738,Sales Rep ID_rep_20c37f1,Sales Rep ID_rep_228133a,Sales Rep ID_rep_22d1012,Sales Rep ID_rep_2315b4b,Sales Rep ID_rep_23a1a55,Sales Rep ID_rep_23d0543,Sales Rep ID_rep_23ddec5,Sales Rep ID_rep_247be15,Sales Rep ID_rep_24bd6fd,Sales Rep ID_rep_263e0f2,Sales Rep ID_rep_27750b6,Sales Rep ID_rep_27b8985,Sales Rep ID_rep_289a13f,Sales Rep ID_rep_299b3af,Sales Rep ID_rep_29b5303,Sales Rep ID_rep_29e08a6,Sales Rep ID_rep_2a946da,Sales Rep ID_rep_2bdb202,Sales Rep ID_rep_2d76014,Sales Rep ID_rep_2e3de83,Sales Rep ID_rep_2e817b7,Sales Rep ID_rep_2e9de07,Sales Rep ID_rep_2fef61b,Sales Rep ID_rep_30e692c,Sales Rep ID_rep_3381ce0,Sales Rep ID_rep_339dd94,Sales Rep ID_rep_33d8e5f,Sales Rep ID_rep_3427e42,Sales Rep ID_rep_34a27e3,Sales Rep ID_rep_3582291,Sales Rep ID_rep_36c0074,Sales Rep ID_rep_373f7d0,Sales Rep ID_rep_388ecf9,Sales Rep ID_rep_38dd901,Sales Rep ID_rep_39f4987,Sales Rep ID_rep_3c02d74,Sales Rep ID_rep_3c17971,Sales Rep ID_rep_3eec4d1,Sales Rep ID_rep_4324a3d,Sales Rep ID_rep_442171a,Sales Rep ID_rep_4477cba,Sales Rep ID_rep_452a29c,Sales Rep ID_rep_45f5ba6,Sales Rep ID_rep_469891d,Sales Rep ID_rep_47b7e08,Sales Rep ID_rep_488c4bf,Sales Rep ID_rep_493273f,Sales Rep ID_rep_49d39a4,Sales Rep ID_rep_4a7474a,Sales Rep ID_rep_4c1407b,Sales Rep ID_rep_4cac043,Sales Rep ID_rep_4d5616b,Sales Rep ID_rep_4fbad23,Sales Rep ID_rep_4fcd9d8,Sales Rep ID_rep_523f514,Sales Rep ID_rep_52a8010,Sales Rep ID_rep_52c3eb3,Sales Rep ID_rep_536a60e,Sales Rep ID_rep_53c4626,Sales Rep ID_rep_567e972,Sales Rep ID_rep_584d289,Sales Rep ID_rep_58988cb,Sales Rep ID_rep_58f4498,Sales Rep ID_rep_5ad3489,Sales Rep ID_rep_5af5623,Sales Rep ID_rep_5b975b2,Sales Rep ID_rep_5bdd867,Sales Rep ID_rep_5bed9c7,Sales Rep ID_rep_5c8b2b8,Sales Rep ID_rep_5d21642,Sales Rep ID_rep_5d420f5,Sales Rep ID_rep_5f9b0b5,Sales Rep ID_rep_620efdc,Sales Rep ID_rep_627b9bb,Sales Rep ID_rep_6372974,Sales Rep ID_rep_639f828,Sales Rep ID_rep_652d327,Sales Rep ID_rep_6705ebb,Sales Rep ID_rep_684c2e1,Sales Rep ID_rep_6873b3c,Sales Rep ID_rep_6951398,Sales Rep ID_rep_69c4542,Sales Rep ID_rep_6a69c35,Sales Rep ID_rep_6a817aa,Sales Rep ID_rep_6a87d28,Sales Rep ID_rep_6ad6615,Sales Rep ID_rep_6cdce0d,Sales Rep ID_rep_6e553b7,Sales Rep ID_rep_6f2b653,Sales Rep ID_rep_6fc14f7,Sales Rep ID_rep_70630bf,Sales Rep ID_rep_7084626,Sales Rep ID_rep_7089803,Sales Rep ID_rep_71cd7dc,Sales Rep ID_rep_720c136,Sales Rep ID_rep_72c1303,Sales Rep ID_rep_74a4d9c,Sales Rep ID_rep_74edb95,Sales Rep ID_rep_74f24b9,Sales Rep ID_rep_76f5957,Sales Rep ID_rep_7740707,Sales Rep ID_rep_785b538,Sales Rep ID_rep_7b00993,Sales Rep ID_rep_7bc1730,Sales Rep ID_rep_7c5f2d7,Sales Rep ID_rep_7e853a4,Sales Rep ID_rep_82bc742,Sales Rep ID_rep_83cfd47,Sales Rep ID_rep_83ec8cf,Sales Rep ID_rep_852d43c,Sales Rep ID_rep_85503ea,Sales Rep ID_rep_85910c4,Sales Rep ID_rep_8799f1c,Sales Rep ID_rep_884ee4f,Sales Rep ID_rep_88a6abc,Sales Rep ID_rep_8913289,Sales Rep ID_rep_89e6dbc,Sales Rep ID_rep_8c50924,Sales Rep ID_rep_8cd0121,Sales Rep ID_rep_8e47ce9,Sales Rep ID_rep_8fa82a2,Sales Rep ID_rep_909e681,Sales Rep ID_rep_912d4cd,Sales Rep ID_rep_924be18,Sales Rep ID_rep_92631b9,Sales Rep ID_rep_92ac302,Sales Rep ID_rep_92fe9e4,Sales Rep ID_rep_93617a8,Sales Rep ID_rep_941f171,Sales Rep ID_rep_95d427d,Sales Rep ID_rep_960305f,Sales Rep ID_rep_97f2271,Sales Rep ID_rep_9952925,Sales Rep ID_rep_99ab94f,Sales Rep ID_rep_9a87508,Sales Rep ID_rep_9b3adba,Sales Rep ID_rep_9b838ce,Sales Rep ID_rep_9b96edf,Sales Rep ID_rep_9ba90a1,Sales Rep ID_rep_9c0b0ed,Sales Rep ID_rep_9c233f0,Sales Rep ID_rep_9c52b45,Sales Rep ID_rep_9e05d52,Sales Rep ID_rep_9fa6ec0,Sales Rep ID_rep_a04127f,Sales Rep ID_rep_a051497,Sales Rep ID_rep_a247a2e,Sales Rep ID_rep_a29b264,Sales Rep ID_rep_a33a510,Sales Rep ID_rep_a643883,Sales Rep ID_rep_a6b98f5,Sales Rep ID_rep_a730543,Sales Rep ID_rep_a8bc765,Sales Rep ID_rep_a9c2fb9,Sales Rep ID_rep_a9e4b79,Sales Rep ID_rep_a9f9437,Sales Rep ID_rep_aa798dc,Sales Rep ID_rep_aabce93,Sales Rep ID_rep_aafcc91,Sales Rep ID_rep_abecc7e,Sales Rep ID_rep_accee1c,Sales Rep ID_rep_ad96ddb,Sales Rep ID_rep_adc60e5,Sales Rep ID_rep_b32eb7d,Sales Rep ID_rep_b45961b,Sales Rep ID_rep_b51fa7a,Sales Rep ID_rep_b52f3af,Sales Rep ID_rep_b62908a,Sales Rep ID_rep_b6742b4,Sales Rep ID_rep_b6ee995,Sales Rep ID_rep_b72a293,Sales Rep ID_rep_b7387ae,Sales Rep ID_rep_b818aaa,Sales Rep ID_rep_b823209,Sales Rep ID_rep_b99b2da,Sales Rep ID_rep_ba7ad49,Sales Rep ID_rep_bb29c18,Sales Rep ID_rep_be9ece5,Sales Rep ID_rep_bf1275e,Sales Rep ID_rep_c181368,Sales Rep ID_rep_c18ae8c,Sales Rep ID_rep_c1a8de6,Sales Rep ID_rep_c293e6b,Sales Rep ID_rep_c34c5b3,Sales Rep ID_rep_c73a709,Sales Rep ID_rep_c75bed5,Sales Rep ID_rep_c88ca4e,Sales Rep ID_rep_c8e3703,Sales Rep ID_rep_c99d0ad,Sales Rep ID_rep_c9d3a5c,Sales Rep ID_rep_cc5749c,Sales Rep ID_rep_ccea417,Sales Rep ID_rep_cd5688f,Sales Rep ID_rep_cedea7b,Sales Rep ID_rep_d14d3c7,Sales Rep ID_rep_d4dca82,Sales Rep ID_rep_d77414c,Sales Rep ID_rep_d949a0b,Sales Rep ID_rep_d9af365,Sales Rep ID_rep_da258ff,Sales Rep ID_rep_da3bf0b,Sales Rep ID_rep_db58312,Sales Rep ID_rep_db88e76,Sales Rep ID_rep_ddf354d,Sales Rep ID_rep_de368f1,Sales Rep ID_rep_de911d5,Sales Rep ID_rep_df215c0,Sales Rep ID_rep_dfd35f0,Sales Rep ID_rep_e2e95ec,Sales Rep ID_rep_e353cac,Sales Rep ID_rep_e37b4cf,Sales Rep ID_rep_e390d7d,Sales Rep ID_rep_e4f98d4,Sales Rep ID_rep_e5c0eb2,Sales Rep ID_rep_e7c4527,Sales Rep ID_rep_ea3637e,Sales Rep ID_rep_ea7c100,Sales Rep ID_rep_f0a531e,Sales Rep ID_rep_f1c67dc,Sales Rep ID_rep_f2980e0,Sales Rep ID_rep_f33ef28,Sales Rep ID_rep_f487a89,Sales Rep ID_rep_f4b08a6,Sales Rep ID_rep_f4db9f9,Sales Rep ID_rep_f590d61,Sales Rep ID_rep_f8a0bb6,Sales Rep ID_rep_f8b3968,Sales Rep ID_rep_f9870c2,Sales Rep ID_rep_fa6672f,Sales Rep ID_rep_fa6a12b,Sales Rep ID_rep_fbc8835,Sales Rep ID_rep_fc569a3,Sales Rep ID_rep_fcdeb8d,Sales Rep ID_rep_fd24731,Sales Rep ID_rep_fe1277c,Sales Rep ID_rep_fe52a7e,Sales Rep ID_rep_ffb8c60,Sales team name_Central - Midwest,Sales team name_Central - North,Sales team name_Central - South 1,Sales team name_Central - Texas,Sales team name_East - Mid-Atlantic,Sales team name_East - NYC,Sales team name_East - North,Sales team name_East - South,Sales team name_East - Tri-State,Sales team name_West - Bay Area,Sales team name_West - North 1,Sales team name_West - Rockies,Sales team name_West - South,positioning_category_limited_service_engagement,positioning_category_managed_service,positioning_category_product,positioning_category_unclear,product_family_A,product_family_B,product_family_C,product_family_D,product_family_E,product_family_F,product_family_Q,product_family_W,hosting_location_,hosting_location_legacy_acquisition,hosting_location_on_prem,hosting_location_saas_platform,sales_territory_Central,sales_territory_East,sales_territory_West
4073,0.053731,0.0,0.166667,1,0.666667,1.0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1
3594,0.0,0.4213,0.166667,1,0.333333,0.666667,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
6404,0.143284,0.117624,0.166667,1,1.0,0.0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0


In [77]:
df_phase5.dtypes

age                               double[pyarrow]
revenue                                   float64
pushes                            double[pyarrow]
Won                                uint8[pyarrow]
quarter_created                   double[pyarrow]
                                       ...       
hosting_location_on_prem           uint8[pyarrow]
hosting_location_saas_platform     uint8[pyarrow]
sales_territory_Central            uint8[pyarrow]
sales_territory_East               uint8[pyarrow]
sales_territory_West               uint8[pyarrow]
Length: 323, dtype: object

In [78]:
df_phase5.dtypes.value_counts()

uint8[pyarrow]     318
double[pyarrow]      4
float64              1
Name: count, dtype: int64

In [79]:
# importlib.reload(saleslib)
# saleslib.get_stratified_sample(df_phase5, 0.01)

# Output to new file

In [80]:
output_filepath = os.path.splitext(input_filepath)[0] + "_prepped.csv"

df_phase5.to_csv(output_filepath, index=False)
print(f"Data saved to new CSV file:\n{os.path.abspath(output_filepath)}")

Data saved to new CSV file:
/Users/the-molecular-man/source_code/portfolio-private/sales_prediction_modeling/data/raw_CRM_opps_export-dummydata_prepped.csv
