In [10]:
from pptx import Presentation
from pptx.util import Inches

# Create a PowerPoint presentation object
prs = Presentation()

# Slide 1: Title Slide
slide = prs.slides.add_slide(prs.slide_layouts[0])
title = slide.shapes.title
subtitle = slide.placeholders[1]
title.text = "Predicting Coronary Heart Disease Using Machine Learning"
subtitle.text = "Brian Reppeto\nDSC 630 T303 2251\nInstructor: Frank Neigebauer\nNovember 7, 2024"

# Slide 2: Introduction
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Introduction"
content = slide.placeholders[1]
content.text = "Overview of coronary heart disease (CHD) and the objective of this project:\nTo develop a predictive model for CHD using the Framingham Heart Disease dataset."

# Slide 3: Problem Statement
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Problem Statement"
content = slide.placeholders[1]
content.text = ("CHD is a leading cause of death globally, imposing a significant burden on healthcare systems. "
                "This project aims to create a predictive model for early intervention to mitigate CHD risks.")

# Slide 4: Importance of the Problem
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Importance of the Problem"
content = slide.placeholders[1]
content.text = ("Early prediction allows preventive measures and prioritizes healthcare resources. Accurate models "
                "help in healthcare planning and public health policies to reduce CHD incidence.")

# Slide 5: Target Audience
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Target Audience"
content = slide.placeholders[1]
content.text = ("Healthcare providers, public health officials, and insurance companies are key stakeholders. "
                "The model can help providers identify at-risk individuals and inform targeted interventions.")

# Slide 6: Data Source
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Data Source"
content = slide.placeholders[1]
content.text = ("The Framingham Heart Study dataset includes over 4,000 records with 15 attributes such as age, "
                "cholesterol levels, blood pressure, and smoking status, all relevant for predicting CHD risk.")

# Slide 7: Data Relevance
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Data Relevance"
content = slide.placeholders[1]
content.text = ("The Framingham dataset's 10-year span and comprehensive participant diversity enable robust and "
                "generalizable predictions, making it a valuable resource for CHD modeling.")

# Slide 8: Data Analysis and Preprocessing
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Data Analysis and Preprocessing"
content = slide.placeholders[1]
content.text = ("Data cleaning included imputation of missing values and feature selection using SelectKBest. "
                "SMOTE was applied to address class imbalance in the dataset.")

# Slide 9: Model Selection
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Model Selection"
content = slide.placeholders[1]
content.text = ("Logistic Regression: Chosen for simplicity and interpretability.\n"
                "Random Forest: A robust model known for accuracy, especially with imbalanced data.")

# Slide 10: Performance Metrics
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Performance Metrics"
content = slide.placeholders[1]
content.text = ("ROC-AUC, Precision, Recall, F1-Score, and Accuracy were used to assess model performance. "
                "These metrics are essential for evaluating CHD prediction accuracy on imbalanced data.")

# Slide 11: Results
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Results"
content = slide.placeholders[1]
content.text = ("Random Forest achieved 87% accuracy and a ROC-AUC score of 0.8706, demonstrating strong performance. "
                "Logistic Regression, while less accurate, remains useful for interpretable cases.")

# Slide 12: Conclusion
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Conclusion"
content = slide.placeholders[1]
content.text = ("Random Forest with SMOTE is recommended for high accuracy and balanced handling of classes. "
                "Future improvements include feature engineering and testing on diverse datasets.")

# Slide 13: Ethical Considerations
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "Ethical Considerations"
content = slide.placeholders[1]
content.text = ("Considerations include data privacy, fairness across demographics, transparency, and real-world impacts. "
                "Explainable AI techniques and regular audits ensure ethical, equitable use of the model.")

# Slide 14: References
slide = prs.slides.add_slide(prs.slide_layouts[1])
title = slide.shapes.title
title.text = "References"
content = slide.placeholders[1]
content.text = ("Ashish Bhardwaj. (2022). Framingham heart study dataset [Data set]. Kaggle.\n"
                "Basu, T., Menzer, O., & Engel-Wolf, S. (2020). The ethics of machine learning in medical sciences.\n"
                "Khan, H., Bilal, A., Aslam, M., & Mustafa, H. (2024). Heart disease detection analysis.")

# Save the presentation
pptx_file_path = r'C:\Users\brepp\BER Data Science\DSC630\CHD_Predictive_Model_Presentation_final_version.pptx'
prs.save(pptx_file_path)

pptx_file_path


'C:\\Users\\brepp\\BER Data Science\\DSC630\\CHD_Predictive_Model_Presentation_final_version.pptx'