In [12]:
from fpdf import FPDF

class PDFReport(FPDF):
    def header(self):
        if self.page_no() == 1:
            self.set_font("Arial", "B", 14)
            self.cell(0, 10, "Credit Card Customer Churn Prediction", ln=True, align="C")
            self.ln(5)

    def chapter_title(self, title):
        self.set_font("Arial", "B", 12)
        self.set_fill_color(200, 220, 255)
        self.cell(0, 10, title, ln=True, fill=True)
        self.ln(1)

    def chapter_body(self, body):
        self.set_font("Arial", "", 11)
        self.multi_cell(0, 10, body)
        self.ln()

pdf = PDFReport()
pdf.add_page()

# Sections
sections = {
    "1. Business Situation": (
        "A manager at a bank is disturbed with more and more customers leaving their credit card services. "
        "He is looking for a data-driven solution to proactively identify "
        "customers likely to churn, so they can take preventive actions and retain them."
    ),
    "2. Key Problems and Objective": (
        "The key problem is the increasing number of customers leaving the credit card services. "
        "The objective of this project is to build a predictive model that can accurately classify "
        "whether a customer is likely to churn or not, enabling targeted retention strategies."
    ),
    "3. Dataset Overview": (
        "- Source: Kaggle - Credit Card customers (www.kaggle.com/datasets/sakshigoyal7/credit-card-customers)\n"
        "- Records: 10,127 customers\n"
        "- Features: 21 features including demographics, credit usage, and customer activity\n"
        "- Target Variable: Attrition_Flag (Existing vs. Attrited Customer)\n"
        "- Churn Rate: 16.07%"
    ),
    "4. Tools and Techniques Used": (
        "- Programming: Python\n"
        "- Libraries: pandas, numpy, matplotlib, seaborn, scikit-learn, xgboost, shap, flask, joblib\n"
        "- Visualization: seaborn, matplotlib, SHAP\n"
        "- Model Deployment: Flask Web API with HTML forms\n"
        "- Model Persistence: joblib\n"
        "- Environment: Jupyter Lab, Anaconda"
    ),
    "5. Data Preprocessing": (
        "- Dropped irrelevant column: CLIENTNUM\n"
        "- Handled ordinal features with category ordering (Education_Level, Income_Category)\n"
        "- Applied one-hot encoding to categorical features\n"
        "- Performed feature selection based on correlation analysis\n"
        "- Standardized numerical features using StandardScaler"
    ),
    "6. Exploratory Data Analysis": (
        "- Distribution analysis of churn vs. non-churn\n"
        "- Count plots of categorical variables against churn\n"
        "- Box plots of numerical variables against churn\n"
        "- Correlation heatmaps for feature relationships\n"
        "- Target-wise percentage breakdown per category for insights"
    ),
    "7. Model Building": (
        "Trained and evaluated four classification models:\n"
        "- Logistic Regression\n"
        "- Naive Bayes Classifier\n"
        "- Random Forest Classifier\n"
        "- XGBoost Classifier (final model)\n"
        "All models trained on a stratified 80/20 train-test split."
    ),
    "8. Evaluation Metrics": (
        "- Accuracy\n"
        "- Precision, Recall, F1-Score\n"
        "- ROC-AUC Score\n"
        "- XGBoost outperformed all models with the best F1 and ROC-AUC scores\n"
        "- Feature importance and SHAP values used for interpretability"
    ),
    "9. Key Takeaways": (
        "- Churn is low rate (16%) (classification report will be needed for evaluation)\n"
        "- Transaction-related features (Total_Trans_Ct, Amt_Change, Utilization) are key churn indicators\n"
        "- XGBoost provides the best balance of accuracy and interpretability\n"
        "- Flask app allows easy real-time predictions via user input"
    ),
    "10. Resources": (  "" ),
}

# Add all sections to PDF
for title, content in sections.items():
    pdf.chapter_title(title)
    pdf.chapter_body(content)

# Add clickable resource links
pdf.set_text_color(0, 0, 255)
pdf.set_font("Arial", 'U', 12)
pdf.cell(0, 10, "GitHub Repo", ln=1, link="https://github.com/Sahnoun-A/Credit_Card_Customer_Churn_Prediction")
pdf.cell(0, 10, "Kaggle Notebook", ln=1, link="https://www.kaggle.com/code/abdelkabirsahnoun/credit-card-customer-churn-prediction")
pdf.cell(0, 10, "Flask API Demo", ln=1, link="http://www.customer-churn.sahnoun.us:8080")

# Save
output_path = "Customer_Churn_Project_Summary.pdf"
pdf.output(output_path)

output_path

readme_content = """
# Credit Card Customer Churn Prediction

This project aims to predict whether a bank customer will churn (i.e., stop using the bank's credit card service) based on their profile and transaction behavior.
A predictive model was built and deployed using Flask for real-time inference.

---

## 1. Business Situation

A manager at a bank is disturbed with more and more customers leaving their credit card services.
He is looking for a data-driven solution to proactively identify
customers likely to churn, so they can take preventive actions and retain them. 

---

## 2. Key Problems and Objective

The key problem is the increasing number of customers leaving the credit card services.
The objective of this project is to build a predictive model that can accurately classify
whether a customer is likely to churn or not, enabling targeted retention strategies.

---

## 3. Dataset Overview

- **Source:** Kaggle - Credit Card customers (www.kaggle.com/datasets/sakshigoyal7/credit-card-customers)
- **Records:** 10,127 customers
- **Features:** 21 features including demographics, credit usage, and customer activity
- **Target Variable:** Attrition_Flag (Existing vs. Attrited Customer)
- **Churn Rate:** 16.07%

---

## 4. Tools & Techniques Used

- **Programming:** Python
- **Libraries:** pandas, numpy, matplotlib, seaborn, scikit-learn, xgboost, shap, flask, joblib
- **Visualization:** seaborn, matplotlib, SHAP
- **Model Deployment:** Flask Web API with HTML forms
- **Model Persistence:** joblib
- **Environment:** Jupyter Lab, Anaconda

---

## 5. Data Preprocessing

- Dropped irrelevant column: CLIENTNUM
- Handled ordinal features with category ordering (Education_Level, Income_Category)
- Applied one-hot encoding to categorical features
- Performed feature selection based on correlation analysis
- Standardized numerical features using StandardScaler

---

## 6. Exploratory Data Analysis

- Distribution analysis of churn vs. non-churn
- Count plots of categorical variables against churn
- Box plots of numerical variables against churn
- Correlation heatmaps for feature relationships
- Target-wise percentage breakdown per category for insights

---

## 7. Model Building

Trained four models:
Trained and evaluated four classification models:
- Logistic Regression
- Naive Bayes Classifier
- Random Forest Classifier
- XGBoost Classifier (final model)
All models trained on a stratified 80/20 train-test split.

---

## 8. Evaluation Metrics

- Accuracy
- Precision, Recall, F1-Score
- ROC-AUC Score
- XGBoost outperformed all models with the best F1 and ROC-AUC scores
- Feature importance and SHAP values used for interpretability

---

## 9. Key Takeaways

- Churn is low rate (16%) (classification report will be needed for evaluation)
- Transaction-related features (Total_Trans_Ct, Amt_Change, Utilization) are key churn indicators
- XGBoost provides the best balance of accuracy and interpretability
- Flask app allows easy real-time predictions via user input

---

## 10. Resources

- [**GitHub Repo**](https://github.com/Sahnoun-A/Credit_Card_Customer_Churn_Prediction)
- [**Kaggle Notebook**](https://www.kaggle.com/code/abdelkabirsahnoun/credit-card-customer-churn-prediction)
- [**Flask API Demo**](http://www.customer-churn.sahnoun.us:8080)
"""

# Save the README content to a markdown file
readme_path = "README.md"
with open(readme_path, "w") as f:
    f.write(readme_content)

readme_path


  self.set_font("Arial", "B", 14)
  self.cell(0, 10, "Credit Card Customer Churn Prediction", ln=True, align="C")
  self.set_font("Arial", "B", 12)
  self.cell(0, 10, title, ln=True, fill=True)
  self.set_font("Arial", "", 11)
  pdf.set_font("Arial", 'U', 12)
  pdf.cell(0, 10, "GitHub Repo", ln=1, link="https://github.com/Sahnoun-A/Credit_Card_Customer_Churn_Prediction")
  pdf.cell(0, 10, "Kaggle Notebook", ln=1, link="https://www.kaggle.com/code/abdelkabirsahnoun/credit-card-customer-churn-prediction")
  pdf.cell(0, 10, "Flask API Demo", ln=1, link="http://www.customer-churn.sahnoun.us:8080")


'README.md'