# Customer Churn Analysis 
## Deliverables: Data Preparation and Modeling 

### Data Preparation 

In [1]:
# Understanding Dataset in dept and processing it as required
import pandas as pd
df = pd.read_csv (r'C:\Users\Lahiru Yasanga\Documents\WILDA\Customer_Churn_data.csv.csv')
df
print(df.head())
print(df.tail())
print(df.info())
print(df.dtypes)
print(df.duplicated())
print(df.describe())
# Save the descriptive statistics to a CSV file
df.describe().to_csv('preprocess_dataset.csv')
df.duplicated().to_csv('PreprocessDataduplicated.csv', index=False)

   gender  SeniorCitizen Dependents  tenure PhoneService MultipleLines  \
0  Female              0         No       1           No            No   
1    Male              0         No      34          Yes            No   
2    Male              0         No       2          Yes            No   
3    Male              0         No      45           No            No   
4  Female              0         No       2          Yes            No   

  InternetService        Contract  MonthlyCharges Churn  
0             DSL  Month-to-month           29.85    No  
1             DSL        One year           56.95    No  
2             DSL  Month-to-month           53.85   Yes  
3             DSL        One year           42.30    No  
4     Fiber optic  Month-to-month           70.70   Yes  
      gender  SeniorCitizen Dependents  tenure PhoneService MultipleLines  \
7038    Male              0        Yes      24          Yes           Yes   
7039  Female              0        Yes      72       

#### Processing

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
df = pd.read_csv(r'C:\Users\Lahiru Yasanga\Documents\WILDA\Customer_Churn_data.csv.csv')

# Splitting the data into 80% training and 20% testing
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)

# Perform one-hot encoding (Encoding Categorical Variable)
train_set_encoded = pd.get_dummies(train_set, drop_first=True)
test_set_encoded = pd.get_dummies(test_set, drop_first=True)

# Scale numeric features
numeric_features = ['tenure', 'MonthlyCharges']
scaler = StandardScaler()
train_set_encoded[numeric_features] = scaler.fit_transform(train_set_encoded[numeric_features])
test_set_encoded[numeric_features] = scaler.transform(test_set_encoded[numeric_features])

In [3]:
# Displaying Training Set
print("Final Training Set:\n", train_set_encoded)
# Save the dataset to a CSV file
train_set_encoded.to_csv('training_set.csv', index=False)

Final Training Set:
       SeniorCitizen    tenure  MonthlyCharges  gender_Male  Dependents_Yes  \
2142              0 -0.465683       -0.000474            0               1   
1623              0  0.885537        1.074754            0               0   
6074              0 -1.284605       -1.376499            1               0   
1362              0 -1.161766        0.177346            1               0   
6754              0 -1.325551       -0.098524            1               1   
...             ...       ...             ...          ...             ...   
3772              0 -1.284605        1.001632            1               0   
5191              0 -0.383791        0.872006            0               1   
5226              0 -0.834198       -1.452945            1               1   
5390              1 -0.834198        1.149538            1               0   
860               0 -0.260953       -1.497815            1               0   

      PhoneService_Yes  MultipleLines_Yes 

In [4]:
# Displaying Testing Set
print("\nFinal Testing Set:\n", test_set_encoded)
# Save the dataset to a CSV file
test_set_encoded.to_csv('testing_set.csv', index=False)


Final Testing Set:
       SeniorCitizen    tenure  MonthlyCharges  gender_Male  Dependents_Yes  \
185               0 -1.284605       -1.331629            0               0   
2715              0  0.353238       -1.316672            1               0   
3825              0  0.803645       -1.512772            0               1   
1807              0 -1.284605        0.381756            0               0   
132               0  1.417836       -0.475768            1               0   
...             ...       ...             ...          ...             ...   
6366              0  1.294997        0.114195            0               0   
315               0  0.762699        1.501854            1               1   
2439              0 -0.629468       -1.494492            1               1   
5002              0  1.499728       -0.695134            0               1   
1161              0 -1.284605       -1.113924            1               1   

      PhoneService_Yes  MultipleLines_Yes 

In [5]:
# Displaying Scaled Training Set
print("Scaled Training Set:\n", train_set)

Scaled Training Set:
       gender  SeniorCitizen Dependents  tenure PhoneService MultipleLines  \
2142  Female              0        Yes      21          Yes            No   
1623  Female              0         No      54          Yes           Yes   
6074    Male              0         No       1           No            No   
1362    Male              0         No       4          Yes            No   
6754    Male              0        Yes       0          Yes           Yes   
...      ...            ...        ...     ...          ...           ...   
3772    Male              0         No       1          Yes            No   
5191  Female              0        Yes      23          Yes           Yes   
5226    Male              0        Yes      12          Yes            No   
5390    Male              1         No      12          Yes           Yes   
860     Male              0         No      26          Yes            No   

     InternetService        Contract  MonthlyCharges 

In [6]:
# Displaying Scaled Testing Set
print("\nScaled Testing Set:\n", test_set)


Scaled Testing Set:
       gender  SeniorCitizen Dependents  tenure PhoneService MultipleLines  \
185   Female              0         No       1           No            No   
2715    Male              0         No      41          Yes           Yes   
3825  Female              0        Yes      52          Yes            No   
1807  Female              0         No       1          Yes            No   
132     Male              0         No      67          Yes            No   
...      ...            ...        ...     ...          ...           ...   
6366  Female              0         No      64          Yes            No   
315     Male              0        Yes      51          Yes           Yes   
2439    Male              0        Yes      17          Yes            No   
5002  Female              0        Yes      69           No            No   
1161    Male              0        Yes       1           No            No   

     InternetService        Contract  MonthlyCharges 