<a href="https://colab.research.google.com/github/Karrrmma/Facial-recognition/blob/main/Customer_Churn_pynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset
data = pd.read_csv('telco_customer_churn.csv')

# Handle missing values
data.fillna(method='ffill', inplace=True)

# Encode categorical variables
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Split dataset into features and target variable
X = data.drop('Churn', axis=1)
y = data['Churn']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [17]:
data.head()


Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,5375,0,0,1,0,1,0,1,0,0,...,0,0,0,0,0,1,2,29.85,2505,0
1,3962,1,0,0,0,34,1,0,0,2,...,2,0,0,0,1,0,3,56.95,1466,0
2,2564,1,0,0,0,2,1,0,0,2,...,0,0,0,0,0,1,3,53.85,157,1
3,5535,1,0,0,0,45,0,1,0,2,...,2,2,0,0,1,0,0,42.3,1400,0
4,6511,0,0,0,0,2,1,0,1,0,...,0,0,0,0,0,1,2,70.7,925,1


In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

# Initialize and train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# Evaluate model
print(classification_report(y_test, y_pred))
print('AUC-ROC:', roc_auc_score(y_test, y_proba))


              precision    recall  f1-score   support

           0       0.83      0.91      0.87      1036
           1       0.66      0.48      0.56       373

    accuracy                           0.80      1409
   macro avg       0.74      0.70      0.71      1409
weighted avg       0.78      0.80      0.79      1409

AUC-ROC: 0.8388781351247839


In [9]:
import pickle

# Save the trained model
with open('churn_model.pkl', 'wb') as file:
    pickle.dump(model, file)


In [15]:
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score

# Load the trained model
with open('churn_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

# Predict on the entire test set
y_pred = loaded_model.predict(X_test)

# Predict probabilities for AUC-ROC
y_proba = loaded_model.predict_proba(X_test)[:, 1]
#reshape = y_proba.reshape(-1,1)
#print('yprob',reshape)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Generate classification report
report = classification_report(y_test, y_pred)
print(f'Classification Report:\n{report}')

# Calculate AUC-ROC
auc_roc = roc_auc_score(y_test, y_proba)
print(f'AUC-ROC: {auc_roc}')


yprob [[0.76]
 [0.21]
 [0.04]
 ...
 [0.  ]
 [0.07]
 [0.5 ]]
Accuracy: 0.7970191625266146
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.91      0.87      1036
           1       0.66      0.48      0.56       373

    accuracy                           0.80      1409
   macro avg       0.74      0.70      0.71      1409
weighted avg       0.78      0.80      0.79      1409

AUC-ROC: 0.8388781351247839


Code implementation of keeping churn customers using random forest clasifier


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset
data = pd.read_csv('telco_customer_churn.csv')

# Handle missing values
data.fillna(method='ffill', inplace=True)

# Encode categorical variables
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Split dataset into features and target variable
X = data.drop('Churn', axis=1)
y = data['Churn']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [16]:
from sklearn.ensemble import RandomForestClassifier

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [17]:
# Predict on the test set
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# Identify high-risk customers (threshold can be adjusted)
threshold = 0.5
high_risk_customers = X_test[y_proba > threshold]


In [18]:
# Convert scaled features back to original values (for easy identification)
high_risk_customers_original = scaler.inverse_transform(high_risk_customers)
high_risk_customers_df = pd.DataFrame(high_risk_customers_original, columns=data.columns[:-1])

# Optionally, include predicted probabilities
high_risk_customers_df['Churn_Probability'] = y_proba[y_proba > threshold]

# Save high-risk customers to a CSV file for further actions
high_risk_customers_df.to_csv('high_risk_customers.csv', index=False)


In [19]:
def send_personalized_email(customer_id, offer_details):
    # Function to send a personalized email (mock implementation)
    print(f"Sending email to {customer_id}: {offer_details}")

# Example intervention strategy
offer_details = "Special 20% discount on your next month's subscription!"

# Assuming customer data includes an 'Email' column for sending emails
for index, customer in high_risk_customers_df.iterrows():
    customer_id = customer['customerID']  # Assuming there's a CustomerID column

    send_personalized_email(customer_id,  offer_details)


Sending email to 692.0: Special 20% discount on your next month's subscription!
Sending email to 4911.0: Special 20% discount on your next month's subscription!
Sending email to 4084.0: Special 20% discount on your next month's subscription!
Sending email to 4483.0: Special 20% discount on your next month's subscription!
Sending email to 1255.0: Special 20% discount on your next month's subscription!
Sending email to 1122.0: Special 20% discount on your next month's subscription!
Sending email to 2575.0: Special 20% discount on your next month's subscription!
Sending email to 6617.0: Special 20% discount on your next month's subscription!
Sending email to 3911.0: Special 20% discount on your next month's subscription!
Sending email to 1393.0: Special 20% discount on your next month's subscription!
Sending email to 5072.0: Special 20% discount on your next month's subscription!
Sending email to 3813.0: Special 20% discount on your next month's subscription!
Sending email to 1356.0: Spec

In [22]:
print(f'X_train shape: {X_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'y_test shape: {y_test.shape}')


X_train shape: (5634, 20)
X_test shape: (1409, 20)
y_train shape: (5634,)
y_test shape: (1409,)


In [21]:
from sklearn.metrics import accuracy_score, classification_report

report = classification_report(y_test, y_pred)
print(f'Classification Report:\n{report}')

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.91      0.87      1036
           1       0.66      0.48      0.56       373

    accuracy                           0.80      1409
   macro avg       0.74      0.70      0.71      1409
weighted avg       0.78      0.80      0.79      1409

