# 📌 Churn Prediction and Retention System
This notebook implements a complete end-to-end pipeline for customer churn prediction and retention strategies using MySQL, Machine Learning, NLP, and Generative AI Chatbots.

## 📌 Step 1: Connect to MySQL & Load Data

In [None]:

# Install necessary libraries
!pip install mysql-connector-python sqlalchemy pandas

import mysql.connector
import pandas as pd
from sqlalchemy import create_engine

# Connect to MySQL Database
db_connection_str = "mysql+mysqlconnector://<user>:<password>@<host>:<port>/<database>"
engine = create_engine(db_connection_str)

# Load dataset from MySQL into Pandas DataFrame
query = "SELECT * FROM customer_churn_data;"
df = pd.read_sql(query, con=engine)

# Display data structure
df.head()


## 📌 Step 2: Exploratory Data Analysis (EDA)

In [None]:

import matplotlib.pyplot as plt
import seaborn as sns

# Check for missing values
print("Missing Values:")
print(df.isnull().sum())

# Visualize churn distribution
plt.figure(figsize=(6, 4))
sns.countplot(x='churn', data=df)
plt.title('Churn Distribution')
plt.show()

# Correlation heatmap
plt.figure(figsize=(12, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Feature Correlations')
plt.show()


## 📌 Step 3: Data Preprocessing & Feature Engineering

In [None]:

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Encode categorical variables
encoder = LabelEncoder()
df['gender'] = encoder.fit_transform(df['gender'])
df['churn'] = encoder.fit_transform(df['churn'])

# Feature Selection
X = df.drop(columns=['customer_id', 'churn'])
y = df['churn']

# Standardize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


## 📌 Step 4: Build Churn Prediction Models

In [None]:

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# Train Logistic Regression Model
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

# Train Random Forest Model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)


## 📌 Step 5: Evaluate & Compare Model Performance

In [None]:

from sklearn.metrics import accuracy_score, classification_report

# Evaluate models
models = {'Logistic Regression': log_reg, 'Random Forest': rf}

for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"Model: {name}")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:
", classification_report(y_test, y_pred))
    print("-" * 50)


## 📌 Step 6: NLP for Customer Feedback Analysis

In [None]:

!pip install nltk textblob

from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

# Sample feedback column
df['feedback'] = df['feedback'].fillna('')  # Handle missing feedback

# Sentiment Analysis
sia = SentimentIntensityAnalyzer()
df['sentiment_score'] = df['feedback'].apply(lambda x: sia.polarity_scores(x)['compound'])

# Display sentiment distribution
plt.figure(figsize=(6, 4))
sns.histplot(df['sentiment_score'], bins=20, kde=True)
plt.title('Sentiment Score Distribution')
plt.show()

# Sample insights
df[['feedback', 'sentiment_score']].head(10)


## 📌 Step 7: Generative AI Chatbot for Retention Strategies

In [None]:

!pip install transformers

from transformers import pipeline

# Load a generative model for chatbot responses
chatbot = pipeline("text-generation", model="gpt2")

def chatbot_response(user_query):
    response = chatbot(user_query, max_length=50, num_return_sequences=1)
    return response[0]['generated_text']

# Example chat
query = "My debit card is not working."
print("User:", query)
print("Chatbot:", chatbot_response(query))


## 📌 Step 8: Deployment & API Integration

In [None]:

!pip install fastapi uvicorn

from fastapi import FastAPI
import uvicorn

app = FastAPI()

@app.get("/predict_churn/")
def predict_churn(customer_id: int):
    customer_data = df[df['customer_id'] == customer_id].drop(columns=['customer_id', 'churn'])
    customer_data_scaled = scaler.transform(customer_data)
    prediction = rf.predict(customer_data_scaled)[0]
    return {"Customer ID": customer_id, "Churn Prediction": bool(prediction)}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)
