# Importing Necessary Libraries

In [35]:
# libraries for data manipulation
import numpy as np
import pandas as pd
import re

# libraries for EDA and data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# libraries for model builing and evaluation
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB, GaussianNB
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score, auc, confusion_matrix, roc_auc_score, recall_score, precision_score, precision_recall_curve, mean_squared_error

import xgboost
from xgboost import XGBClassifier

# other libraries
import warnings
warnings.filterwarnings('ignore')

# Reading and Understanding data

In [36]:
data_train = pd.read_csv('train.csv')
data_test = pd.read_csv('test.csv')
data_dict = pd.read_csv('data_dictionary.csv')

In [37]:
data_dict

Unnamed: 0,Acronyms,Description
0,CIRCLE_ID,Telecom circle area to which the customer belo...
1,LOC,Local calls within same telecom circle
2,STD,STD calls outside the calling circle
3,IC,Incoming calls
4,OG,Outgoing calls
5,T2T,Operator T to T ie within same operator mobile...
6,T2M,Operator T to other operator mobile
7,T2O,Operator T to other operator fixed line
8,T2F,Operator T to fixed lines of T
9,T2C,Operator T to its own call center


In [38]:
data_train.head()

Unnamed: 0,id,circle_id,loc_og_t2o_mou,std_og_t2o_mou,loc_ic_t2o_mou,last_date_of_month_6,last_date_of_month_7,last_date_of_month_8,arpu_6,arpu_7,arpu_8,onnet_mou_6,onnet_mou_7,onnet_mou_8,offnet_mou_6,offnet_mou_7,offnet_mou_8,roam_ic_mou_6,roam_ic_mou_7,roam_ic_mou_8,roam_og_mou_6,roam_og_mou_7,roam_og_mou_8,loc_og_t2t_mou_6,loc_og_t2t_mou_7,loc_og_t2t_mou_8,loc_og_t2m_mou_6,loc_og_t2m_mou_7,loc_og_t2m_mou_8,loc_og_t2f_mou_6,loc_og_t2f_mou_7,loc_og_t2f_mou_8,loc_og_t2c_mou_6,loc_og_t2c_mou_7,loc_og_t2c_mou_8,loc_og_mou_6,loc_og_mou_7,loc_og_mou_8,std_og_t2t_mou_6,std_og_t2t_mou_7,...,count_rech_3g_7,count_rech_3g_8,av_rech_amt_data_6,av_rech_amt_data_7,av_rech_amt_data_8,vol_2g_mb_6,vol_2g_mb_7,vol_2g_mb_8,vol_3g_mb_6,vol_3g_mb_7,vol_3g_mb_8,arpu_3g_6,arpu_3g_7,arpu_3g_8,arpu_2g_6,arpu_2g_7,arpu_2g_8,night_pck_user_6,night_pck_user_7,night_pck_user_8,monthly_2g_6,monthly_2g_7,monthly_2g_8,sachet_2g_6,sachet_2g_7,sachet_2g_8,monthly_3g_6,monthly_3g_7,monthly_3g_8,sachet_3g_6,sachet_3g_7,sachet_3g_8,fb_user_6,fb_user_7,fb_user_8,aon,aug_vbc_3g,jul_vbc_3g,jun_vbc_3g,churn_probability
0,0,109,0.0,0.0,0.0,6/30/2014,7/31/2014,8/31/2014,31.277,87.009,7.527,48.58,124.38,1.29,32.24,96.68,2.33,0.0,0.0,0.0,0.0,0.0,0.0,2.23,0.0,0.28,5.29,16.04,2.33,0.0,0.0,0.0,0.0,0.0,0.0,7.53,16.04,2.61,46.34,124.38,...,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,,,,1958,0.0,0.0,0.0,0
1,1,109,0.0,0.0,0.0,6/30/2014,7/31/2014,8/31/2014,0.0,122.787,42.953,0.0,0.0,0.0,0.0,25.99,30.89,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.01,29.79,0.0,0.0,0.0,0.0,0.0,...,1.0,,,145.0,,0.0,352.91,0.0,0.0,3.96,0.0,,122.07,,,122.08,,,0.0,,0,0,0,0,0,0,0,1,0,0,0,0,,1.0,,710,0.0,0.0,0.0,0
2,2,109,0.0,0.0,0.0,6/30/2014,7/31/2014,8/31/2014,60.806,103.176,0.0,0.53,15.93,0.0,53.99,82.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.53,12.98,0.0,24.11,0.0,0.0,0.0,0.0,0.0,2.14,0.0,0.0,24.64,12.98,0.0,0.0,2.94,...,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,,,,882,0.0,0.0,0.0,0
3,3,109,0.0,0.0,0.0,6/30/2014,7/31/2014,8/31/2014,156.362,205.26,111.095,7.26,16.01,0.0,68.76,78.48,50.23,0.0,0.0,0.0,0.0,0.0,1.63,6.99,3.94,0.0,37.91,44.89,23.63,0.0,0.0,0.0,0.0,0.0,8.03,44.91,48.84,23.63,0.26,12.06,...,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,,,,982,0.0,0.0,0.0,0
4,4,109,0.0,0.0,0.0,6/30/2014,7/31/2014,8/31/2014,240.708,128.191,101.565,21.28,4.83,6.13,56.99,38.11,9.63,53.64,0.0,0.0,15.73,0.0,0.0,10.16,4.83,6.13,36.74,19.88,4.61,11.99,1.23,5.01,0.0,9.85,0.0,58.91,25.94,15.76,0.0,0.0,...,1.0,0.0,175.0,191.0,142.0,390.8,308.89,213.47,0.0,0.0,0.0,0.0,35.0,0.0,0.0,35.12,0.0,0.0,0.0,0.0,0,0,0,7,6,6,0,0,0,0,1,0,1.0,1.0,1.0,647,0.0,0.0,0.0,0


In [39]:
data_test.head()

Unnamed: 0,id,circle_id,loc_og_t2o_mou,std_og_t2o_mou,loc_ic_t2o_mou,last_date_of_month_6,last_date_of_month_7,last_date_of_month_8,arpu_6,arpu_7,arpu_8,onnet_mou_6,onnet_mou_7,onnet_mou_8,offnet_mou_6,offnet_mou_7,offnet_mou_8,roam_ic_mou_6,roam_ic_mou_7,roam_ic_mou_8,roam_og_mou_6,roam_og_mou_7,roam_og_mou_8,loc_og_t2t_mou_6,loc_og_t2t_mou_7,loc_og_t2t_mou_8,loc_og_t2m_mou_6,loc_og_t2m_mou_7,loc_og_t2m_mou_8,loc_og_t2f_mou_6,loc_og_t2f_mou_7,loc_og_t2f_mou_8,loc_og_t2c_mou_6,loc_og_t2c_mou_7,loc_og_t2c_mou_8,loc_og_mou_6,loc_og_mou_7,loc_og_mou_8,std_og_t2t_mou_6,std_og_t2t_mou_7,...,count_rech_3g_6,count_rech_3g_7,count_rech_3g_8,av_rech_amt_data_6,av_rech_amt_data_7,av_rech_amt_data_8,vol_2g_mb_6,vol_2g_mb_7,vol_2g_mb_8,vol_3g_mb_6,vol_3g_mb_7,vol_3g_mb_8,arpu_3g_6,arpu_3g_7,arpu_3g_8,arpu_2g_6,arpu_2g_7,arpu_2g_8,night_pck_user_6,night_pck_user_7,night_pck_user_8,monthly_2g_6,monthly_2g_7,monthly_2g_8,sachet_2g_6,sachet_2g_7,sachet_2g_8,monthly_3g_6,monthly_3g_7,monthly_3g_8,sachet_3g_6,sachet_3g_7,sachet_3g_8,fb_user_6,fb_user_7,fb_user_8,aon,aug_vbc_3g,jul_vbc_3g,jun_vbc_3g
0,69999,109,0.0,0.0,0.0,6/30/2014,7/31/2014,8/31/2014,91.882,65.33,64.445,31.78,20.23,23.11,60.16,32.16,34.83,0.0,0.0,0.0,0.0,0.0,0.0,24.88,20.23,21.06,18.13,10.89,8.36,0.0,13.58,0.0,0.0,0.0,0.03,43.01,44.71,29.43,6.9,0.0,...,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,,,,1692,0.0,0.0,0.0
1,70000,109,0.0,0.0,0.0,6/30/2014,7/31/2014,8/31/2014,414.168,515.568,360.868,75.51,41.21,19.84,474.34,621.84,394.94,0.0,0.0,0.0,0.0,0.0,0.0,75.51,41.21,19.84,473.61,598.08,377.26,0.73,0.0,0.0,0.0,0.0,0.0,549.86,639.29,397.11,0.0,0.0,...,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,,,,2533,0.0,0.0,0.0
2,70001,109,0.0,0.0,0.0,6/30/2014,7/31/2014,8/31/2014,329.844,434.884,746.239,7.54,7.86,8.4,16.98,45.81,45.04,22.81,103.38,26.08,24.53,53.68,54.44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,,,,277,525.61,758.41,241.84
3,70002,109,0.0,0.0,0.0,6/30/2014,7/31/2014,8/31/2014,43.55,171.39,24.4,5.31,2.16,0.0,40.04,205.01,24.01,0.0,0.0,0.0,0.0,0.0,0.0,5.31,0.0,0.0,2.94,98.61,20.51,0.0,0.0,2.35,0.0,6.18,0.0,8.26,98.61,22.86,0.0,2.16,...,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,,,,1244,0.0,0.0,0.0
4,70003,109,0.0,0.0,0.0,6/30/2014,7/31/2014,8/31/2014,306.854,406.289,413.329,450.93,609.03,700.68,60.94,23.84,74.16,0.0,0.0,0.0,0.0,0.0,0.0,0.45,0.78,14.56,2.39,2.66,10.94,0.0,0.0,0.0,0.0,0.0,0.0,2.84,3.44,25.51,450.48,608.24,...,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,,,,462,0.0,0.0,0.0


In [40]:
data_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69999 entries, 0 to 69998
Columns: 172 entries, id to churn_probability
dtypes: float64(135), int64(28), object(9)
memory usage: 91.9+ MB


In [41]:
data_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Columns: 171 entries, id to jun_vbc_3g
dtypes: float64(135), int64(27), object(9)
memory usage: 39.1+ MB


In [42]:
len(data_train['circle_id'].unique())

1

- Dropping the column **circle_id** as it seems to have a single entry in the entire train dataset.
- Removing the **id** column as it is unnecessary for predictive analysis.

In [43]:
data_train.pop('circle_id')

0        109
1        109
2        109
3        109
4        109
        ... 
69994    109
69995    109
69996    109
69997    109
69998    109
Name: circle_id, Length: 69999, dtype: int64

In [44]:
data_train.pop('id')

0            0
1            1
2            2
3            3
4            4
         ...  
69994    69994
69995    69995
69996    69996
69997    69997
69998    69998
Name: id, Length: 69999, dtype: int64

# Data Cleaning and Preparation

Performing primary feature elimination by keeping only relevant features. Columns having total, max, average and count are kept along with certain other business necessary columns

In [45]:
total_amounts = [i for i in list(data_train.columns) if re.search('total',i)]
avg_amounts = [i for i in list(data_train.columns) if re.search('av',i)]
max_amounts = [i for i in list(data_train.columns) if re.search('max',i)]
count_amounts = [i for i in list(data_train.columns) if re.search('count',i)]
offnetwork_minutes = [i for i in list(data_train.columns) if re.search('offnet',i)]
average_revenue_3g = [i for i in list(data_train.columns) if re.search('arpu.+3g',i)]
average_revenue_2g = [i for i in list(data_train.columns) if re.search('arpu.+2g',i)]
volume_3g = [i for i in list(data_train.columns) if re.search('vol.+3g',i)]
volume_2g = [i for i in list(data_train.columns) if re.search('vol.+2g',i)]
age_on_network = [i for i in list(data_train.columns) if re.search('aon',i)]

In [46]:
variables = [*total_amounts,
             *avg_amounts,
             *max_amounts,
             *count_amounts,
             *offnetwork_minutes, 
             *average_revenue_3g, 
             *average_revenue_2g,
             *volume_3g,
             *volume_2g,
             *age_on_network, 
             'churn_probability']

In [47]:
data_train = data_train[variables]
data_train.head()

Unnamed: 0,total_og_mou_6,total_og_mou_7,total_og_mou_8,total_ic_mou_6,total_ic_mou_7,total_ic_mou_8,total_rech_num_6,total_rech_num_7,total_rech_num_8,total_rech_amt_6,total_rech_amt_7,total_rech_amt_8,total_rech_data_6,total_rech_data_7,total_rech_data_8,av_rech_amt_data_6,av_rech_amt_data_7,av_rech_amt_data_8,max_rech_amt_6,max_rech_amt_7,max_rech_amt_8,max_rech_data_6,max_rech_data_7,max_rech_data_8,count_rech_2g_6,count_rech_2g_7,count_rech_2g_8,count_rech_3g_6,count_rech_3g_7,count_rech_3g_8,offnet_mou_6,offnet_mou_7,offnet_mou_8,arpu_3g_6,arpu_3g_7,arpu_3g_8,arpu_2g_6,arpu_2g_7,arpu_2g_8,vol_3g_mb_6,vol_3g_mb_7,vol_3g_mb_8,vol_2g_mb_6,vol_2g_mb_7,vol_2g_mb_8,aon,churn_probability
0,81.21,221.68,3.63,11.84,53.04,40.56,3,2,2,77,65,10,,,,,,,65,65,10,,,,,,,,,,32.24,96.68,2.33,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1958,0
1,0.0,30.73,31.66,3.44,39.44,25.04,3,4,5,0,145,50,,1.0,,,145.0,,0,145,50,,145.0,,,0.0,,,1.0,,0.0,25.99,30.89,,122.07,,,122.08,,0.0,3.96,0.0,0.0,352.91,0.0,710,0
2,56.49,99.36,0.0,124.29,33.83,36.64,2,4,2,70,120,0,,,,,,,70,70,0,,,,,,,,,,53.99,82.05,0.0,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,882,0
3,76.03,95.98,53.84,95.11,50.18,83.84,2,4,3,160,240,130,,,,,,,110,110,50,,,,,,,,,,68.76,78.48,50.23,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,982,0
4,63.26,42.94,15.76,21.54,9.36,28.31,13,10,8,290,136,122,7.0,7.0,6.0,175.0,191.0,142.0,50,41,30,25.0,41.0,25.0,7.0,6.0,6.0,0.0,1.0,0.0,56.99,38.11,9.63,0.0,35.0,0.0,0.0,35.12,0.0,0.0,0.0,0.0,390.8,308.89,213.47,647,0


In [48]:
data_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69999 entries, 0 to 69998
Data columns (total 47 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   total_og_mou_6      69999 non-null  float64
 1   total_og_mou_7      69999 non-null  float64
 2   total_og_mou_8      69999 non-null  float64
 3   total_ic_mou_6      69999 non-null  float64
 4   total_ic_mou_7      69999 non-null  float64
 5   total_ic_mou_8      69999 non-null  float64
 6   total_rech_num_6    69999 non-null  int64  
 7   total_rech_num_7    69999 non-null  int64  
 8   total_rech_num_8    69999 non-null  int64  
 9   total_rech_amt_6    69999 non-null  int64  
 10  total_rech_amt_7    69999 non-null  int64  
 11  total_rech_amt_8    69999 non-null  int64  
 12  total_rech_data_6   17568 non-null  float64
 13  total_rech_data_7   17865 non-null  float64
 14  total_rech_data_8   18417 non-null  float64
 15  av_rech_amt_data_6  17568 non-null  float64
 16  av_r

## Missing Data Treatment

In [49]:
data_train.isna().sum()/len(data_train)*100

total_og_mou_6         0.000000
total_og_mou_7         0.000000
total_og_mou_8         0.000000
total_ic_mou_6         0.000000
total_ic_mou_7         0.000000
total_ic_mou_8         0.000000
total_rech_num_6       0.000000
total_rech_num_7       0.000000
total_rech_num_8       0.000000
total_rech_amt_6       0.000000
total_rech_amt_7       0.000000
total_rech_amt_8       0.000000
total_rech_data_6     74.902499
total_rech_data_7     74.478207
total_rech_data_8     73.689624
av_rech_amt_data_6    74.902499
av_rech_amt_data_7    74.478207
av_rech_amt_data_8    73.689624
max_rech_amt_6         0.000000
max_rech_amt_7         0.000000
max_rech_amt_8         0.000000
max_rech_data_6       74.902499
max_rech_data_7       74.478207
max_rech_data_8       73.689624
count_rech_2g_6       74.902499
count_rech_2g_7       74.478207
count_rech_2g_8       73.689624
count_rech_3g_6       74.902499
count_rech_3g_7       74.478207
count_rech_3g_8       73.689624
offnet_mou_6           3.954342
offnet_m

We see that the following columns have more than **70%** data missing. Hence removing the columns from the dataset:
- **total_rech_data_6**
- **total_rech_data_6**
- **total_rech_data_7**
- **total_rech_data_8**
- **av_rech_amt_data_6**
- **av_rech_amt_data_7**
- **av_rech_amt_data_8**
- **max_rech_data_6**
- **max_rech_data_7**
- **max_rech_data_8**
- **count_rech_2g_6**
- **count_rech_2g_7**
- **count_rech_2g_8**
- **count_rech_3g_6**
- **count_rech_3g_7**
- **count_rech_3g_8**
- **arpu_3g_6**
- **arpu_3g_7**
- **arpu_3g_8**
- **arpu_2g_6**
- **arpu_2g_7**
- **arpu_2g_8** 

In [51]:
data_train.drop(columns=['total_rech_data_6', 'total_rech_data_6', 'total_rech_data_7', 'total_rech_data_8', 
                         'av_rech_amt_data_6','av_rech_amt_data_7', 'av_rech_amt_data_8', 'max_rech_data_6',
                         'max_rech_data_7', 'max_rech_data_8','count_rech_2g_6', 'count_rech_2g_7', 'count_rech_2g_8', 
                         'count_rech_3g_6', 'count_rech_3g_7','count_rech_3g_8', 'arpu_3g_6', 'arpu_3g_7', 
                         'arpu_3g_8', 'arpu_2g_6', 'arpu_2g_7','arpu_2g_8'], inplace=True)

In [60]:
data_train.isna().sum()/len(data_train)*100

total_og_mou_6       0.000000
total_og_mou_7       0.000000
total_og_mou_8       0.000000
total_ic_mou_6       0.000000
total_ic_mou_7       0.000000
total_ic_mou_8       0.000000
total_rech_num_6     0.000000
total_rech_num_7     0.000000
total_rech_num_8     0.000000
total_rech_amt_6     0.000000
total_rech_amt_7     0.000000
total_rech_amt_8     0.000000
max_rech_amt_6       0.000000
max_rech_amt_7       0.000000
max_rech_amt_8       0.000000
offnet_mou_6         3.954342
offnet_mou_7         3.838626
offnet_mou_8         5.290076
vol_3g_mb_6          0.000000
vol_3g_mb_7          0.000000
vol_3g_mb_8          0.000000
vol_2g_mb_6          0.000000
vol_2g_mb_7          0.000000
vol_2g_mb_8          0.000000
aon                  0.000000
churn_probability    0.000000
dtype: float64

In [61]:
data_train.describe()

Unnamed: 0,total_og_mou_6,total_og_mou_7,total_og_mou_8,total_ic_mou_6,total_ic_mou_7,total_ic_mou_8,total_rech_num_6,total_rech_num_7,total_rech_num_8,total_rech_amt_6,total_rech_amt_7,total_rech_amt_8,max_rech_amt_6,max_rech_amt_7,max_rech_amt_8,offnet_mou_6,offnet_mou_7,offnet_mou_8,vol_3g_mb_6,vol_3g_mb_7,vol_3g_mb_8,vol_2g_mb_6,vol_2g_mb_7,vol_2g_mb_8,aon,churn_probability
count,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,67231.0,67312.0,66296.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0,69999.0
mean,306.451436,310.572674,304.513065,199.71064,201.878029,198.486034,7.566522,7.706667,7.224932,328.139788,322.376363,323.846355,104.569265,104.137573,107.540351,198.874771,197.153383,196.543577,122.171882,128.934444,135.486541,51.773924,51.240204,50.127506,1220.639709,0.101887
std,465.502866,479.13177,477.936832,290.114823,296.771338,288.336731,7.041452,7.050614,7.195597,404.211068,411.07012,426.181405,121.407701,120.782543,124.39675,316.818355,322.482226,324.089234,554.869965,554.096072,568.310234,212.513909,211.114667,213.101403,952.426321,0.302502
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,180.0,0.0
25%,44.78,42.91,38.71,38.64,41.34,38.29,3.0,3.0,3.0,110.0,100.0,90.0,30.0,30.0,30.0,34.86,32.24,31.575,0.0,0.0,0.0,0.0,0.0,0.0,468.0,0.0
50%,145.28,141.23,138.36,114.78,116.33,114.61,6.0,6.0,5.0,229.0,220.0,225.0,110.0,110.0,98.0,96.48,91.885,91.8,0.0,0.0,0.0,0.0,0.0,0.0,868.0,0.0
75%,374.305,380.045,370.895,251.07,249.47,249.71,9.0,10.0,9.0,438.0,430.0,436.0,120.0,128.0,144.0,232.99,227.63,229.345,0.0,0.0,0.0,0.0,0.0,0.0,1813.0,0.0
max,10674.03,8285.64,14043.06,7716.14,9699.01,10830.38,170.0,138.0,138.0,35190.0,40335.0,45320.0,4010.0,3299.0,4449.0,8362.36,7043.98,14007.34,45735.4,28144.12,30036.06,10285.9,7873.55,11117.61,4337.0,1.0


The following columns have less than 5% missing data:
- **offnet_mou_6         3.954342**
- **offnet_mou_7         3.838626**
- **offnet_mou_8         5.290076**

We also see that the above columns are highly right skewed. Hence imputing the columns with their median value.

In [67]:
columns = ['offnet_mou_7', 'offnet_mou_6', 'offnet_mou_8']

for column in columns:
    data_train[column].fillna(data_train[column].median(), inplace=True)

In [69]:
data_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69999 entries, 0 to 69998
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   total_og_mou_6     69999 non-null  float64
 1   total_og_mou_7     69999 non-null  float64
 2   total_og_mou_8     69999 non-null  float64
 3   total_ic_mou_6     69999 non-null  float64
 4   total_ic_mou_7     69999 non-null  float64
 5   total_ic_mou_8     69999 non-null  float64
 6   total_rech_num_6   69999 non-null  int64  
 7   total_rech_num_7   69999 non-null  int64  
 8   total_rech_num_8   69999 non-null  int64  
 9   total_rech_amt_6   69999 non-null  int64  
 10  total_rech_amt_7   69999 non-null  int64  
 11  total_rech_amt_8   69999 non-null  int64  
 12  max_rech_amt_6     69999 non-null  int64  
 13  max_rech_amt_7     69999 non-null  int64  
 14  max_rech_amt_8     69999 non-null  int64  
 15  offnet_mou_6       69999 non-null  float64
 16  offnet_mou_7       699