In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [2]:

# Load dataset (replace 'telecom_dataset.csv' with your dataset)
telecom_data = pd.read_csv('telecom.csv')


In [5]:
# Get value counts for all columns

for column in telecom_data.columns:
    print(f"Value counts for column '{column}':")
    print(telecom_data[column].value_counts())
    print()  # Empty line for readability


Value counts for column 'Account length':
93     35
105    33
87     33
101    32
99     32
       ..
186     1
6       1
195     1
243     1
221     1
Name: Account length, Length: 205, dtype: int64

Value counts for column 'Area code':
415    1318
510     679
408     669
Name: Area code, dtype: int64

Value counts for column 'Number vmail messages':
0     1933
31      50
28      42
29      39
24      37
33      37
30      35
27      34
25      33
32      33
26      32
23      30
37      25
22      24
35      24
34      23
36      23
38      22
39      22
21      22
20      14
19      14
42      13
40      13
17      12
16      11
43       9
15       8
44       7
41       7
18       6
12       6
14       5
45       4
46       3
47       3
13       3
8        2
50       2
9        2
4        1
10       1
Name: Number vmail messages, dtype: int64

Value counts for column 'Total day minutes':
162.3    7
183.4    7
216.0    6
175.4    6
159.5    6
        ..
209.6    1
247.0    1
321.3   

In [None]:
#Total intl calls,  Area code , Customer service calls, Churn

In [6]:
telecom_data['Total intl calls'].value_counts()

3     544
4     503
2     388
5     376
6     267
7     172
1     125
8      90
9      83
10     37
11     25
0      15
13     13
12     12
14      5
15      4
18      2
16      2
19      1
20      1
17      1
Name: Total intl calls, dtype: int64

In [7]:
telecom_data['Area code'].value_counts()

415    1318
510     679
408     669
Name: Area code, dtype: int64

In [8]:
telecom_data['Customer service calls'].value_counts()

1    945
2    608
0    555
3    348
4    133
5     49
6     17
7      8
9      2
8      1
Name: Customer service calls, dtype: int64

In [9]:
telecom_data['Churn'].value_counts()

False    2278
True      388
Name: Churn, dtype: int64

In [16]:
#import LabelEncoder
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit LabelEncoder and transform the 'Churn' column
telecom_data['Churn'] = label_encoder.fit_transform(telecom_data['Churn'])


In [19]:
# Print the updated DataFrame
telecom_data['Churn'].value_counts()

0    2278
1     388
Name: Churn, dtype: int64

In [21]:
feature=['Total intl calls','Area code','Customer service calls']
X=telecom_data[feature]
y=telecom_data['Churn']

#Total intl calls,  Area code , Customer service calls, Churn

In [22]:
X

Unnamed: 0,Total intl calls,Area code,Customer service calls
0,3,415,1
1,3,415,1
2,5,415,0
3,7,408,2
4,3,415,3
...,...,...,...
2661,5,415,2
2662,6,415,2
2663,4,415,3
2664,6,510,2


In [23]:
y

0       0
1       0
2       0
3       0
4       0
       ..
2661    0
2662    0
2663    0
2664    0
2665    0
Name: Churn, Length: 2666, dtype: int64

In [24]:

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [25]:
X_train

Unnamed: 0,Total intl calls,Area code,Customer service calls
1855,6,415,2
2244,6,415,1
2525,2,408,5
961,5,415,2
1618,2,415,1
...,...,...,...
1638,9,415,0
1095,6,510,1
1130,6,510,2
1294,5,510,2


In [26]:
2132/2666

0.7996999249812453

In [27]:
X_test

Unnamed: 0,Total intl calls,Area code,Customer service calls
2087,8,510,1
1915,5,415,0
479,5,415,1
1898,3,415,2
741,4,510,1
...,...,...,...
1196,6,415,1
958,3,415,3
861,3,510,0
1510,4,415,1


In [28]:
534/2666

0.2003000750187547

In [29]:
y_train

1855    0
2244    0
2525    1
961     0
1618    0
       ..
1638    0
1095    0
1130    0
1294    0
860     0
Name: Churn, Length: 2132, dtype: int64

In [30]:
2132/2666

0.7996999249812453

In [31]:
y_test

2087    1
1915    0
479     0
1898    1
741     1
       ..
1196    0
958     0
861     0
1510    0
1992    1
Name: Churn, Length: 534, dtype: int64

In [32]:
534/2666

0.2003000750187547

In [33]:

# Initialize scalers
min_max_scaler = MinMaxScaler()
standard_scaler = StandardScaler()
robust_scaler = RobustScaler()


In [34]:

# Apply scaling to features
X_train_minmax = min_max_scaler.fit_transform(X_train)
X_test_minmax = min_max_scaler.transform(X_test)


In [35]:
X_train_minmax 

array([[0.3       , 0.06862745, 0.22222222],
       [0.3       , 0.06862745, 0.11111111],
       [0.1       , 0.        , 0.55555556],
       ...,
       [0.3       , 1.        , 0.22222222],
       [0.25      , 1.        , 0.22222222],
       [0.2       , 1.        , 0.22222222]])

In [36]:
X_test_minmax

array([[0.4       , 1.        , 0.11111111],
       [0.25      , 0.06862745, 0.        ],
       [0.25      , 0.06862745, 0.11111111],
       ...,
       [0.15      , 1.        , 0.        ],
       [0.2       , 0.06862745, 0.11111111],
       [0.3       , 0.06862745, 0.11111111]])

In [37]:

X_train_standard = standard_scaler.fit_transform(X_train)
X_test_standard = standard_scaler.transform(X_test)

X_train_robust = robust_scaler.fit_transform(X_train)
X_test_robust = robust_scaler.transform(X_test)


In [38]:
X_train_standard

array([[ 0.61677678, -0.53379076,  0.3481153 ],
       [ 0.61677678, -0.53379076, -0.42338348],
       [-0.99915944, -0.69765905,  2.66261165],
       ...,
       [ 0.61677678,  1.69013609,  0.3481153 ],
       [ 0.21279273,  1.69013609,  0.3481153 ],
       [-0.19119133,  1.69013609,  0.3481153 ]])

In [39]:
X_test_standard

array([[ 1.42474489,  1.69013609, -0.42338348],
       [ 0.21279273, -0.53379076, -1.19488226],
       [ 0.21279273, -0.53379076, -0.42338348],
       ...,
       [-0.59517538,  1.69013609, -1.19488226],
       [-0.19119133, -0.53379076, -0.42338348],
       [ 0.61677678, -0.53379076, -0.42338348]])

In [40]:
X_train_robust

array([[ 0.66666667,  0.        ,  1.        ],
       [ 0.66666667,  0.        ,  0.        ],
       [-0.66666667, -0.07235142,  4.        ],
       ...,
       [ 0.66666667,  0.98191214,  1.        ],
       [ 0.33333333,  0.98191214,  1.        ],
       [ 0.        ,  0.98191214,  1.        ]])

In [41]:
X_test_robust

array([[ 1.33333333,  0.98191214,  0.        ],
       [ 0.33333333,  0.        , -1.        ],
       [ 0.33333333,  0.        ,  0.        ],
       ...,
       [-0.33333333,  0.98191214, -1.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.66666667,  0.        ,  0.        ]])

In [42]:

# Example: Logistic Regression with different scaling methods
log_reg = LogisticRegression()


In [43]:

# Fit the model and make predictions
log_reg.fit(X_train_minmax, y_train)
y_pred_minmax = log_reg.predict(X_test_minmax)
accuracy_minmax = accuracy_score(y_test, y_pred_minmax)


In [44]:
y_pred_minmax

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [45]:
accuracy_minmax

0.8539325842696629

In [None]:

# Repeat for standard and robust scaling methods...


In [46]:
log_reg.fit(X_train_standard, y_train)
y_pred_standard = log_reg.predict(X_test_standard)
accuracy_standard = accuracy_score(y_test, y_pred_standard)


In [48]:
accuracy_standard

0.8520599250936329

In [None]:
# min_max_scaler = MinMaxScaler()
# standard_scaler = StandardScaler()
# robust_scaler = RobustScaler()

In [47]:
log_reg.fit(X_train_robust, y_train)
y_pred_robust = log_reg.predict(X_test_robust)
accuracy_robust = accuracy_score(y_test, y_pred_robust)


In [49]:
accuracy_robust

0.8520599250936329

In [53]:

# Evaluate accuracy for each scaling method
print("Accuracy with Min-Max Scaling: ", accuracy_minmax)
print("Accuracy with Standard Scaling:", accuracy_standard)
print("Accuracy with Robust Scaling:  ", accuracy_robust)


Accuracy with Min-Max Scaling:  0.8539325842696629
Accuracy with Standard Scaling: 0.8520599250936329
Accuracy with Robust Scaling:   0.8520599250936329
