In [None]:
!pip install python-pptx



Collecting python-pptx
  Downloading python_pptx-1.0.2-py3-none-any.whl (472 kB)
Collecting typing-extensions>=4.9.0
  Downloading typing_extensions-4.13.2-py3-none-any.whl (45 kB)
Collecting XlsxWriter>=0.5.7
  Downloading xlsxwriter-3.2.5-py3-none-any.whl (172 kB)
Installing collected packages: typing-extensions, XlsxWriter, python-pptx
Successfully installed XlsxWriter-3.2.5 python-pptx-1.0.2 typing-extensions-4.13.2


In [4]:
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN

# Initialize presentation
prs = Presentation()

# Helper to add a slide with title and content
def add_slide(title, content_lines, bullet=True):
    slide_layout = prs.slide_layouts[1]  # Title and Content
    slide = prs.slides.add_slide(slide_layout)
    title_placeholder = slide.shapes.title
    content_placeholder = slide.placeholders[1]

    title_placeholder.text = title
    tf = content_placeholder.text_frame
    tf.clear()
    
    for line in content_lines:
        p = tf.add_paragraph()
        p.text = line
        p.font.size = Pt(18)
        p.space_after = Pt(8)
        p.level = 0
        p.alignment = PP_ALIGN.LEFT

# Slide 1 - Title
slide_layout = prs.slide_layouts[0]  # Title Slide
slide = prs.slides.add_slide(slide_layout)
slide.shapes.title.text = "Waterpoint Functionality Classification"
slide.placeholders[1].text = "Predicting Functional Status of Waterpoints in Tanzania\nYour Name\nDate"

# Slide 2 - Introduction
add_slide("Introduction", [
    "Reliable water access is crucial for community health and development.",
    "Many waterpoints fail and need timely maintenance.",
    "Goal: Build machine learning models to classify waterpoints as functional or needs_attention."
])

# Slide 3 - Dataset Overview
add_slide("Dataset Overview", [
    "Waterpoint data with multiple features (e.g., location, age, water amount).",
    "Target classes: functional, needs_attention.",
    "Challenges: Imbalanced classes, Features on different scales."
])

# Slide 4 - Data Preprocessing
add_slide("Data Preprocessing", [
    "Log-transformed skewed features like amount_tsh and population.",
    "Standardized numerical features using StandardScaler to normalize scales.",
    "Split into training and validation sets."
])

# Slide 5 - Modeling Approach
add_slide("Modeling Approach", [
    "Developed and compared three base models:",
    "1. Logistic Regression",
    "2. Decision Tree Classifier",
    "3. Random Forest Classifier",
    "Evaluated based on accuracy, precision, recall, and F1-score.",
    "Tuned Random Forest with class weights for balance."
])

# Slide 6 - Logistic Regression Results
add_slide("Logistic Regression", [
    "Accuracy: ~77.5%.",
    "Precision for 'needs_attention': 0.80.",
    "Recall for 'needs_attention': 0.68 (misses 32% failing wells).",
    "Strength: Good at identifying functional wells."
])

# Slide 7 - Decision Tree Results
add_slide("Decision Tree", [
    "Accuracy: ~77.0%.",
    "Recall for 'functional': 0.87.",
    "Recall for 'needs_attention': 0.64 (misses 36%).",
    "Slight overfitting to majority class observed."
])

# Slide 8 - Random Forest Results
add_slide("Random Forest", [
    "Accuracy: ~81.9%.",
    "Balanced precision and recall for both classes.",
    "Recall for 'needs_attention': 0.77 (improved detection).",
    "Most robust and reliable base model."
])

# Slide 9 - Hyperparameter Tuning
add_slide("Hyperparameter Tuning", [
    "Applied class_weight='balanced' to Random Forest.",
    "Final accuracy: ~82%.",
    "Balanced precision (0.82) and recall (0.77) for 'needs_attention'.",
    "Improved detection of failing wells."
])

# Slide 10 - ROC Curve & AUC
add_slide("ROC Curve & AUC", [
    "AUC score: 0.88.",
    "Indicates strong model discrimination ability.",
    "Visualization: ROC curve shows true positive rate vs false positive rate."
])

# Slide 11 - Recommendations
add_slide("Recommendations", [
    "Deploy Random Forest model for practical waterpoint monitoring.",
    "Regularly retrain the model with new data to improve accuracy.",
    "Investigate false positives carefully to avoid unnecessary maintenance.",
    "Use model predictions to prioritize inspections and repairs."
])

# Slide 12 - Conclusion
add_slide("Conclusion", [
    "Machine learning can effectively classify waterpoint status.",
    "Random Forest provided best balance of accuracy and recall.",
    "Supports proactive maintenance and better resource allocation.",
    "Future work: Incorporate more features, explore ensemble methods, deploy in field."
])

# Slide 13 - Visual Context (placeholders for images)
slide_layout = prs.slide_layouts[6]  # Blank slide
slide = prs.slides.add_slide(slide_layout)
left = Inches(0.5)
top = Inches(0.5)
width = Inches(4.5)
height = Inches(3.5)
txBox = slide.shapes.add_textbox(left, top, width, height)
tf = txBox.text_frame
tf.text = "Image: Water Source (Add image manually)"

left2 = Inches(5)
top2 = Inches(0.5)
width2 = Inches(4.5)
height2 = Inches(3.5)
txBox2 = slide.shapes.add_textbox(left2, top2, width2, height2)
tf2 = txBox2.text_frame
tf2.text = "Image: Community fetching water (Add image manually)"

# Slide 14 - Thank You
add_slide("Thank You!", [
    "Questions?",
    "Contact: Charity mwangangi"
], bullet=False)

# Save the presentation
prs.save("Waterpoint_Classification_Project_Presentation.pptx")


In [7]:
import requests

url1 = "https://images.unsplash.com/photo-1506744038136-46273834b3fb"
url2 = "https://images.unsplash.com/photo-1500534623283-312aade485b7"

filename1 = "people_fetching_water.jpg"
filename2 = "water_source.jpg"

headers = {
    "User-Agent": "Mozilla/5.0"
}

def download_image(url, filename):
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        with open(filename, "wb") as f:
            f.write(response.content)
        print(f"Downloaded {filename}")
    else:
        print(f"Failed to download {filename}, status code: {response.status_code}")

download_image(url1, filename1)
download_image(url2, filename2)




Downloaded people_fetching_water.jpg
Downloaded water_source.jpg


In [8]:
from pptx import Presentation
from pptx.util import Inches

# Load your presentation or create a new one
prs = Presentation()

# Slide layout 5 = Title and Content (for image and caption)
slide_layout = prs.slide_layouts[5]

# Add first slide for "People Fetching Water"
slide1 = prs.slides.add_slide(slide_layout)
title1 = slide1.shapes.title
title1.text = "People Fetching Water"

# Add the image (adjust the position and size as needed)
img_path1 = 'people_fetching_water.jpg'
left = Inches(1)
top = Inches(1.5)
height = Inches(4)
slide1.shapes.add_picture(img_path1, left, top, height=height)

# Add second slide for "Water Source"
slide2 = prs.slides.add_slide(slide_layout)
title2 = slide2.shapes.title
title2.text = "Water Source"

img_path2 = 'water_source.jpg'
slide2.shapes.add_picture(img_path2, left, top, height=height)

# Save the presentation
prs.save('Water_Project_With_Images.pptx')
print("Presentation saved as 'Water_Project_With_Images.pptx'")


Presentation saved as 'Water_Project_With_Images.pptx'


In [10]:
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN

# Initialize presentation
prs = Presentation()

# Define slide layouts and styles for reuse
title_slide_layout = prs.slide_layouts[0]
title_and_content_layout = prs.slide_layouts[5]

def add_title_slide(title, subtitle):
    slide = prs.slides.add_slide(title_slide_layout)
    slide.shapes.title.text = title
    slide.placeholders[1].text = subtitle

def add_bullet_slide(title, bullets):
    slide = prs.slides.add_slide(title_and_content_layout)
    slide.shapes.title.text = title
    text_frame = slide.shapes.placeholders[1].text_frame
    text_frame.clear()
    for bullet in bullets:
        p = text_frame.add_paragraph()
        p.text = bullet
        p.level = 0

def add_image_slide(title, image_path):
    slide = prs.slides.add_slide(title_and_content_layout)
    slide.shapes.title.te
