In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import make_column_selector, make_column_transformer

In [2]:
data = pd.read_csv('../../../data/WA_Fn-UseC_-Telco-Customer-Churn.csv')
data.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [3]:
data['churn'] = data.Churn.map({'No':0, 'Yes':1})
data.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,0
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,No,No,No,One year,No,Mailed check,56.95,1889.5,No,0
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,0
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1


In [10]:
data.Churn.value_counts()

No     5174
Yes    1869
Name: Churn, dtype: int64

ValueError: could not convert string to float: ''

In [21]:
X = data.drop(['Churn', 'churn', 'customerID'], axis=1)
y = data['churn']

X_t, X_test, y_t, y_test = train_test_split(X, y, random_state=10)
X_train, X_val, y_train, y_val = train_test_split(X_t, y_t, random_state=10)

In [22]:
cat_columns = list(X.select_dtypes('object').columns)
cat_columns

['gender',
 'Partner',
 'Dependents',
 'PhoneService',
 'MultipleLines',
 'InternetService',
 'OnlineSecurity',
 'OnlineBackup',
 'DeviceProtection',
 'TechSupport',
 'StreamingTV',
 'StreamingMovies',
 'Contract',
 'PaperlessBilling',
 'PaymentMethod',
 'TotalCharges']

In [23]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [24]:
ohe = OneHotEncoder()

In [25]:
ohe.fit(X_train[cat_columns])

OneHotEncoder()

In [26]:
X_train_encoded = ohe.transform(X_train[cat_columns])
X_val_encoded = ohe.transform(X_val[cat_columns])

ValueError: Found unknown categories ['7365.7', '1905.4', '6555.2', '3930.55', '264.85', '1476.25', '7737.55', '755.6', '5536.5', '313.6', '4968', '7397', '1798.65', '333.6', '525.55', '74.4', '740.55', '2043.45', '7222.75', '211.95', '2773.9', '7299.65', '1375.6', '3019.7', '1375.15', '5848.6', '1923.5', '4549.45', '6589.6', '5817', '250.05', '1376.5', '116.95', '7244.7', '580.8', '4250.1', '889', '256.25', '3973.2', '220.65', '226.55', '6876.05', '836.35', '49.3', '6856.95', '7852.4', '4932.5', '1685.9', '4146.05', '5971.25', '272.95', '2879.2', '2416.1', '6014.85', '577.15', '197.4', '3810.55', '5232.9', '4534.45', '711.9', '2107.15', '854.8', '4615.9', '2021.35', '355.2', '2861.45', '810.3', '490.65', '1412.4', '4917.75', '4036.85', '4657.95', '961.4', '1144.5', '3886.85', '4807.35', '188.1', '325.45', '384.5', '4523.25', '161.65', '143.35', '2647.2', '4375.8', '5956.85', '1423.85', '178.85', '5375.15', '1389.85', '3958.2', '5552.05', '2000.2', '1094.35', '855.1', '105.5', '780.5', '2427.1', '1020.2', '3021.3', '1346.3', '799.65', '4590.35', '5290.45', '566.1', '4264.25', '1442', '2655.25', '1119.35', '6430.9', '6058.95', '528.45', '1001.5', '994.8', '4735.2', '7107', '1356.7', '8071.05', '71.25', '1398.6', '2122.05', '5265.55', '84.3', '1521.2', '172.35', '1336.8', '255.55', '2034.25', '3673.6', '2496.7', '1152.7', '414.95', '3001.2', '5931.75', '2647.1', '297.3', '894.3', '761.85', '791.75', '1506.4', '3691.2', '242', '6339.45', '107.25', '565.35', '5598.3', '134.6', '810.45', '189.95', '4925.35', '3665.55', '235.2', '4097.05', '775.6', '2278.75', '174.7', '908.15', '350.35', '2566.5', '3361.05', '44.05', '2419.55', '550.35', '3088.75', '252.75', '1144.6', '86', '3887.85', '4055.5', '682.1', '654.55', '96.1', '1555.65', '2460.35', '1312.45', '521.8', '5341.8', '2911.8', '3382.3', '5483.9', '3029.1', '1515.1', '68.5', '363.15', '4513.65', '2289.9', '8337.45', '70.5', '4189.7', '1181.75', '31.35', '454.65', '3221.25', '7362.9', '1334', '505.9', '5409.75', '139.35', '3450.15', '5443.65', '5742.9', '2656.5', '2754', '370.4', '7035.6', '5083.55', '2343.85', '745.3', '1164.3', '1387.45', '3309.25', '2640.55', '3371', '523.15', '6503.2', '223.75', '1212.1', '837.95', '7758.9', '3829.75', '765.5', '4063', '5918.8', '7412.25', '780.25', '340.85', '332.45', '4577.9', '689.35', '7690.9', '329.8', '370.5', '169.45', '322.5', '1758.9', '741', '8310.55', '6033.3', '1958.45', '335.75', '371.6', '7133.45', '3996.8', '420.45', '345.9', '267.6', '2341.5', '280', '743.5', '754', '1559.45', '7069.25', '1258.6', '3608', '207.4', '4400.75', '5435.6', '7904.25', '4003', '669.45', '345.5', '334.8', '6383.35', '869.9', '749.35', '1752.65', '1205.5', '8016.6', '5826.65', '2979.3', '61.45', '1430.95', '181.5', '5224.35', '5598', '162.15', '514', '176.3', '3024.15', '2793.55', '675.6', '1857.25', '1336.65', '2249.95', '4888.7', '981.45', '645.8', '5809.75', '100.2', '973.25', '296.15', '2248.05', '342.3', '638.95', '3532.25', '3171.6', '2020.9', '45.35', '1247.75', '5730.7', '957.1', '3419.5', '3233.6', '918.75', '2212.55', '2681.15', '3078.1', '4016.2', '3440.25', '3756.4', '1441.8', '20.95', '185.4', '754.5', '85', '1852.85', '451.55', '7159.05', '3876.2', '6161.9', '3409.6', '149.05', '5553.25', '5727.15', '5717.85', '2006.1', '3088.25', '43.8', '7181.95', '1440.75', '697.7', '8306.05', '1316.9', '518.75', '5703', '34.7', '1929', '78.05', '1927.3', '6912.7', '7576.7', '1513.6', '2806.9', '2193.65', '519.15', '7346.2', '749.25', '4953.25', '7234.8', '1477.65', '547.65', '741.7', '6339.3', '5980.75', '1862.9', '1183.8', '1035.5', '6859.5', '472.65', '4786.15', '717.95', '2010.55', '574.5', '1434.6', '1527.5', '2335.3', '74.25', '503.25', '6109.65', '697.25', '797.1', '217.1', '4473.45', '30.5', '8404.9', '8477.6', '21.1', '888.65', '7581.5', '3480', '6979.8', '1564.4', '5194.05', '4422.95', '2965.75', '25.3', '44.3', '5432.2', '314.95', '5459.2', '742.95', '793.55', '2302.35', '4265', '1009.25', '226.45', '1961.6', '295.55', '6754.35', '973.35', '3462.1', '1943.9', '1682.05', '703.55', '1794.8', '1673.8', '1253.5', '6869.7', '474.8', '1611', '3419.3', '1058.6', '554.45', '532.1', '1889.5', '6697.35', '2019.8', '41.2', '264.55', '263.05', '2510.7', '1028.9', '3678.3', '2203.7', '4166.35', '101.65', '216.2', '2845.15', '6179.35', '4520.15', '948.9', '3152.5', '3533.6', '250.8', '6302.85', '1429.65', '1070.7', '817.95', '521', '2042.05', '586.05', '69.85', '5264.5', '5812', '4525.8', '485.2', '329.75', '1776.45', '1938.9', '5336.35', '1536.75', '497.6', '1131.3', '3096.9', '1296.15', '7188.5', '6562.9', '289.1', '608.15', '3551.65', '2187.15', '6468.6', '6215.35', '6871.9', '518.3', '4664.15', '106.55', '89.15', '75.8', '6597.25', '4533.9', '1057.85', '6029.9', '4200.25', '535.05', '3436.1', '1709.1', '2184.35', '7553.6', '1331.05', '1275.6', '2157.3', '229.5', '4447.75', '181.6', '2509.95', '4965.1', '1415.55', '6819.45', '2583.75', '2032.3', '1267.05', '7475.1', '1046.5', '184.15', '6885.75', '3091.75', '870.25', '2109.35', '4264.6', '346.2', '8164.1', '3182.95', '2762.75', '3082.1', '1931.75', '821.6', '5431.9', '6069.25', '612.1', '1032.05', '3527.3', '1423.65', '5963.95', '966.55', '2656.3', '3370.2', '2688.85', '1208.15', '917.45', '1114.55', '5275.8', '38.7', '7537.5', '20.55', '1734.65', '1325.85', '4484.05', '3952.45', '1784.9', '7880.25', '1614.7', '3704.15', '759.35', '154.85', '1124.2', '4690.65', '5127.95', '5873.75', '1170.5', '4494.65', '1098.85', '3017.65', '255.25', '355.9', '5585.4', '424.45', '70.2', '6081.4', '5212.65', '5124.55', '2696.55', '2249.1', '129.6', '461.3', '4744.35', '1398.25', '1718.35', '125', '41.85', '903.7', '5231.2', '3688.6', '1747.2', '119.75', '422.4', '1131.5', '329.95', '467.55', '38', '2264.5', '1295.4', '701.05', '2962', '381.3', '3753.2', '38.45', '5936.55', '5969.85', '657.5', '4297.95', '68.45', '4036', '5948.7', '435.4', '590.35', '4479.2', '2104.55', '1037.75', '4052.4', '8331.95', '328.95', '501.35', '2184.85', '6281.45', '1258.3', '293.85', '6700.05', '3603.45', '512.25', '686.95', '3119.9', '6838.6', '832.05', '51.15', '83.3', '369.25', '1787.35', '5903.15', '1103.25', '1766.75', '8109.8', '5068.05', '2669.45', '706.6', '490.55', '6311.2', '4983.05', '2362.1', '196.9', '5762.95', '7113.75', '4884.85', '905.55', '2708.2', '1826.7', '4916.4', '407.05', '4299.2', '1993.2', '840.1', '44.55', '541.15', '6140.85', '3827.9', '4613.95', '253.9', '1742.75', '670.35', '6751.35', '1912.15', '4847.05', '5084.65', '996.95', '1742.5', '331.85', '3251.3', '1334.45', '5825.5', '84.4', '122.7', '6152.4', '751.65', '1790.8', '811.8', '6849.4', '6033.1', '7085.5', '1164.05', '4085.75', '4297.6', '1377.7', '2111.3', '4308.25', '109.2', '4949.1', '3914.05', '6448.85', '7567.2', '3959.35', '2281.6', '1388.75', '6548.65', '2633.4', '7578.05', '1782.05', '449.3', '93.7', '3465.7', '949.8', '3902.6', '1253.65', '3415.25', '1036', '3877.95', '3640.45', '1414.8', '1396', '3116.15', '4816.7', '784.45', '1992.85', '4368.85', '1940.8', '141.65', '2165.05', '452.8', '6219.6', '172.85', '5624.85', '67.1', '320.45', '1287.85', '908.55', '4976.15', '693.45', '39.65', '5669.5', '156.1', '3808', '764.55', '4029.95', '4991.5', '4719.75', '1727.5', '3027.25', '3321.35', '2799', '4863.85', '6424.25', '780.1', '1497.9', '2595.25', '825.7', '244.75', '113.35', '6392.85', '5839.3', '1266.1', '161.15', '7447.7', '450.4', '5706.2', '2898.95', '1274.05', '1327.15', '68.35', '1052.35', '2309.55', '5215.25', '633.85', '958.45', '1714.55', '1014.25', '498.1', '7320.9', '485.25', '877.35', '3275.15', '152.3', '367.55', '4589.85', '826', '70.4', '1218.25', '315.3', '7132.15', '4753.85', '1160.45', '670.65', '6688.1', '5315.8', '4188.4', '4213.9', '1169.35', '571.05', '1146.65', '1117.55', '2921.75', '122.9', '7251.9', '489.45', '4346.4', '4109', '226.8', '92.65', '1345.75', '3019.1', '5032.25', '436.6', '512.45', '3112.05', '317.25', '1493.2', '82.3', '707.5', '617.35', '856.5', '909.25', '2812.2', '19.15', '187.75', '1057.55', '2023.55', '5013', '5610.15', '166.3', '541.5', '1567', '4664.2', '2292.75', '684.05', '6717.9', '30.2', '1612.75', '4039.5', '169.65', '678.75', '95.05', '6669.05', '3715.65', '225.85', '4224.7', '71.35', '308.05', '5168.1', '717.3', '7661.8', '2200.7', '1143.8', '59.2', '2568.15', '1043.8', '471.85', '5681.1', '54.5', '6914.95', '2277.65', '2188.45', '4732.35', '3491.55', '1177.05', '6382.55', '5135.15', '523.5', '834.1', '174.65', '907.05', '458.1', '3899.05', '2884.9', '1534.75', '978.6', '1793.25', '3623.95', '48.55', '4138.7', '3460.95', '1448.8', '1776.55', '228.4', '470.95', '1815.65', '5589.3', '4981.15', '1614.9', '875.35', '516.3', '641.25', '91.3', '5703.25', '740', '758.6', '2375.2', '1328.15', '1083', '7009.5', '2570', '5776.45', '4900.65', '1764.75', '3097', '564.35', '389.95', '5976.9', '966.25', '886.4', '221.7', '3902.45', '1133.9', '4959.6', '7990.05', '225.55', '3207.55', '8333.95', '208.25', '189.1', '493.65', '4908.25', '185.6', '95.45', '1725', '1776.95', '44.1', '3465.05', '413.25', '4124.65', '4630.2', '1416.2', '973.55', '63.15', '1101.85', '4242.35', '746.5', '1115.6', '5639.05', '7985.9', '1049.05', '2391.8', '390.85', '428.45', '4331.4', '3055.5', '357.6', '3605.6', '2540.1', '6018.65', '224.05', '4237.5', '1113.95', '5405.8', '1738.9', '1818.3', '266.6', '417.65', '1798.9', '62.25', '401.85', '369.1', '1153.25', '2033.85', '5682.25', '654.85', '1849.95', '53.5', '4361.55', '8684.8', '973.65', '217.5', '2193', '5196.1', '717.5', '864.2', '2364', '242.05', '225.6', '672.55', '5244.45', '1326.35', '1778.5', '3264.45', '1465.75', '1390.85', '1643.55', '8127.6', '356.1', '935.9', '198.6', '247.25', '1072.6', '635.9', '1042.65', '370.25', '3013.05', '2263.45', '1556.85', '1193.05', '830.85', '20.6', '442.45', '2065.15', '1282.85', '6237.05', '3364.55', '1422.1', '7467.5', '3801.7', '2841.55', '2298.55', '692.35', '299.2', '2208.05', '383.55', '73.55', '2272.8', '5925.75', '818.05', '2658.4', '2979.2', '79.5', '3165.6', '354.05', '726.1', '3870', '3833.95', '3893.6', '7774.05', '1337.5', '443.9', '476.8', '58.85', '832.35', '815.55', '7887.25', '3211.2', '24', '7554.05', '4079.55', '967.85', '713', '1188.25', '4779.45', '283.95', '644.5', '3399.85', '4447.55', '7111.3', '7881.2', '813.85', '7508.55', '245.2', '3147.5', '655.9', '3958.25', '3244.4', '1759.4', '1566.9', '116.65', '24.25', '1396.25', '880.2', '8100.25', '1110.05', '6669.45', '5283.95', '239.05', '7795.95', '7895.15', '2236.2', '331.3', '7782.85', '775.3', '3846.35', '2351.8', '6241.35', '109.5', '1222.65', '5893.95', '892.65', '160.8', '294.95', '7348.8', '1620.8', '4784.45', '183.75', '8477.7', '2774.55', '1510.5', '5514.95', '7209', '1654.45', '2483.5', '77.6', '2287.25', '5099.15', '6697.2', '220.4', '6851.65', '1400.3', '6038.55', '809.75', '1680.25', '2483.05', '6333.4', '44.65', '49.25', '6157.6', '1850.65', '5500.6', '8297.5', '1623.4', '7493.05', '331.6', '332.5', '1406.65', '2535.55', '5655.45', '3653', '3527.6', '318.5', '449.75', '2929.75', '1384.75', '170.5', '952.3', '206.6', '6441.85', '1190.5', '3870.3', '1745.2', '307.6', '129.2', '368.1', '4882.8', '1652.4', '7875', '1882.8', '5617.95', '57.4', '6989.45', '893.2', '1815', '1297.35', '1752.55', '206.15', '533.6', '2007.25', '2715.3', '4517.25', '96.05', '42.7', '1664.3', '371.4', '1178.25', '373.5', '4664.5', '2959.8', '251.25', '8248.5', '3314.15', '1813.1', '6993.65', '5238.9', '3369.05', '3050.15', '605.9', '4903.2', '1893.5', '2015.8', '4805.3', '1426.45', '6096.9', '6632.75', '1565.7', '3988.5', '1462.6', '4016.75', '44.95', '521.3', '803.3', '377.55', '1253.8', '1013.2', '368.85', '5600.15', '2088.45', '1221.55', '1540.2', '8443.7', '387.2', '1611.65', '1663.5', '6858.9', '2933.95', '4039.3', '3334.9', '184.65', '1646.45', '85.7', '1841.9', '63.6', '1304.85', '1375.4', '3686.05', '130.15', '2282.55', '1315.35', '5757.2', '3208.65', '800.3', '3487.95', '970.55', '6585.2', '45.95', '7422.1', '1372.9', '3097.2', '1206.05', '1451.6', '7099', '4174.35', '3554.6', '4758.8', '1596.6', '2217.15', '2976.95', '2094.9', '498.25', '21', '1561.5', '5430.35', '1517.5', '116.6', '5878.9', '85.05', '475', '1327.4', '1445.3'] in column 15 during transform

In [30]:
data

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,0
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,No,No,No,One year,No,Mailed check,56.95,1889.5,No,0
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No,0
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No,0
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No,0
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No,0
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes,1


In [40]:
data.loc[data['churn']==1]

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,churn
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes,1
5,9305-CDSKC,Female,0,No,No,8,Yes,Yes,Fiber optic,No,...,No,Yes,Yes,Month-to-month,Yes,Electronic check,99.65,820.5,Yes,1
8,7892-POOKP,Female,0,Yes,No,28,Yes,Yes,Fiber optic,No,...,Yes,Yes,Yes,Month-to-month,Yes,Electronic check,104.80,3046.05,Yes,1
13,0280-XJGEX,Male,0,No,No,49,Yes,Yes,Fiber optic,No,...,No,Yes,Yes,Month-to-month,Yes,Bank transfer (automatic),103.70,5036.3,Yes,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7021,1699-HPSBG,Male,0,No,No,12,Yes,No,DSL,No,...,Yes,Yes,No,One year,Yes,Electronic check,59.80,727.8,Yes,1
7026,8775-CEBBJ,Female,0,No,No,9,Yes,No,DSL,No,...,No,No,No,Month-to-month,Yes,Bank transfer (automatic),44.20,403.35,Yes,1
7032,6894-LFHLY,Male,1,No,No,1,Yes,Yes,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Electronic check,75.75,75.75,Yes,1
7034,0639-TSIQW,Female,0,No,No,67,Yes,Yes,Fiber optic,Yes,...,No,Yes,No,Month-to-month,Yes,Credit card (automatic),102.95,6886.25,Yes,1


In [31]:
1142/7043

0.1621468124378816

In [42]:
data

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,0
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,No,No,No,One year,No,Mailed check,56.95,1889.5,No,0
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No,0
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No,0
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No,0
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No,0
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes,1


In [44]:
data.corr()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,churn
SeniorCitizen,1.0,0.016567,0.220173,0.150889
tenure,0.016567,1.0,0.2479,-0.352229
MonthlyCharges,0.220173,0.2479,1.0,0.193356
churn,0.150889,-0.352229,0.193356,1.0


In [45]:
X_train_sc_df = pd.read_csv('../Warren/X_train_sc.csv')
X_train_sc_df.head()

Unnamed: 0,gender,partner,dependents,phoneservice,multiplelines,internetservice,onlinesecurity,onlinebackup,deviceprotection,techsupport,streamingtv,streamingmovies,contract,paperlessbilling,paymentmethod,seniorcitizen,tenure,monthlycharges,totalcharges
0,0,1,1,1,0,0,2,2,0,2,0,0,0,0,0,-0.427069,-0.017944,-0.107867,-0.183246
1,0,1,0,1,2,1,2,0,0,2,2,0,0,1,1,-0.427069,-0.875263,1.056297,-0.468087
2,0,1,1,1,0,2,1,1,1,1,1,1,2,0,1,-0.427069,1.165973,-1.501185,-0.428393
3,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,-0.427069,-1.038562,0.174811,-0.781296
4,0,0,1,1,0,2,1,1,1,1,1,1,1,0,1,-0.427069,0.676076,-1.51122,-0.601398


In [48]:
y_train_enc_df = pd.read_csv('../Warren/y_train_enc.csv')
y_train_enc_df.head()

Unnamed: 0,churn
0,0
1,1
2,0
3,0
4,0


In [50]:
X_and_y_train_df = pd.concat([X_train_sc_df, y_train_enc_df], axis=1)

In [53]:
X_and_y_train_df.corr().sort_values('churn')

Unnamed: 0,gender,partner,dependents,phoneservice,multiplelines,internetservice,onlinesecurity,onlinebackup,deviceprotection,techsupport,streamingtv,streamingmovies,contract,paperlessbilling,paymentmethod,seniorcitizen,tenure,monthlycharges,totalcharges,churn
contract,-0.016255,0.290546,0.241531,-0.020188,0.107874,0.08158,0.365803,0.287436,0.348463,0.428114,0.198854,0.21637,1.0,-0.176169,-0.220984,-0.14089,0.66854,-0.073143,0.446352,-0.405451
tenure,-0.00752,0.381973,0.15301,-0.001749,0.33239,-0.041575,0.315016,0.373978,0.355334,0.315305,0.276553,0.281734,0.66854,0.006452,-0.367005,0.01876,1.0,0.247904,0.827485,-0.358398
onlinesecurity,-0.020516,0.145325,0.157988,-0.014071,-0.004261,-0.023282,1.0,0.182703,0.168825,0.27662,0.023402,0.035753,0.365803,-0.164518,-0.086876,-0.129141,0.315016,-0.05903,0.244852,-0.291231
techsupport,-0.016404,0.103605,0.132562,-0.021445,0.007628,-0.031707,0.27662,0.204859,0.236417,1.0,0.134233,0.145292,0.428114,-0.112696,-0.106037,-0.159099,0.315305,-0.025153,0.265912,-0.288184
onlinebackup,-0.019386,0.145162,0.101487,0.027518,0.117249,0.028697,0.182703,1.0,0.196503,0.204859,0.136926,0.121973,0.287436,-0.01403,-0.117107,-0.022804,0.373978,0.109867,0.376346,-0.219318
totalcharges,-0.012138,0.317613,0.061747,0.106943,0.441912,-0.179339,0.244852,0.376346,0.372641,0.265912,0.377512,0.379568,0.446352,0.158375,-0.328446,0.10022,0.827485,0.649257,1.0,-0.202782
deviceprotection,-0.010192,0.156635,0.081873,-0.00825,0.124389,0.043852,0.168825,0.196503,1.0,0.236417,0.270676,0.293376,0.348463,-0.049897,-0.136896,-0.02819,0.355334,0.156403,0.372641,-0.185515
dependents,0.008427,0.444457,1.0,-0.01433,-0.022439,0.028112,0.157988,0.101487,0.081873,0.132562,0.034963,0.020672,0.241531,-0.104829,-0.052242,-0.208476,0.15301,-0.10325,0.061747,-0.172287
partner,-0.007736,1.0,0.444457,0.007396,0.144504,-0.019289,0.145325,0.145162,0.156635,0.103605,0.130794,0.121959,0.290546,-0.013878,-0.137626,0.005567,0.381973,0.106316,0.317613,-0.146286
internetservice,-0.017792,-0.019289,0.028112,0.385418,-0.09853,1.0,-0.023282,0.028697,0.043852,-0.031707,0.104269,0.100662,0.08158,-0.13028,0.090197,-0.031819,-0.041575,-0.326385,-0.179339,-0.039844


In [54]:
X_and_y_train_df

Unnamed: 0,gender,partner,dependents,phoneservice,multiplelines,internetservice,onlinesecurity,onlinebackup,deviceprotection,techsupport,streamingtv,streamingmovies,contract,paperlessbilling,paymentmethod,seniorcitizen,tenure,monthlycharges,totalcharges,churn
0,0,1,1,1,0,0,2,2,0,2,0,0,0,0,0,-0.427069,-0.017944,-0.107867,-0.183246,0
1,0,1,0,1,2,1,2,0,0,2,2,0,0,1,1,-0.427069,-0.875263,1.056297,-0.468087,1
2,0,1,1,1,0,2,1,1,1,1,1,1,2,0,1,-0.427069,1.165973,-1.501185,-0.428393,0
3,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,-0.427069,-1.038562,0.174811,-0.781296,0
4,0,0,1,1,0,2,1,1,1,1,1,1,1,0,1,-0.427069,0.676076,-1.511220,-0.601398,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3956,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,-0.427069,0.716901,-0.663187,-0.020924,0
3957,1,1,0,1,2,1,2,2,2,0,2,2,1,1,2,-0.427069,1.574220,1.504567,2.471672,0
3958,0,0,0,1,2,0,2,0,2,2,2,2,1,1,1,-0.427069,0.104530,0.713404,0.368814,1
3959,0,1,1,1,0,0,2,2,0,0,2,2,0,0,3,-0.427069,-0.630315,0.332040,-0.470857,0
