In [22]:
# ----------------------------------------------
# TITANIC EDA (Only PassengerId & Survived)
# + PDF Report Generator
# ----------------------------------------------

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.pagesizes import letter

# ----------------------------------------------
# LOAD YOUR OLD FILE
# ----------------------------------------------
df = pd.read_excel(r"E:\DA Internship\Day-5\Titanic_dataset.csv.xlsx")

# ----------------------------------------------
# CREATE & SAVE PLOTS
# ----------------------------------------------
sns.set(style="whitegrid")

plot_files = []

# 1. Survival Count Plot
plt.figure(figsize=(6,4))
sns.countplot(x='Survived', data=df)
plt.title("Survival Count")
survival_plot = "survival_count.png"
plt.savefig(survival_plot)
plt.close()
plot_files.append(survival_plot)

# 2. PassengerId vs Survival Scatter Plot
plt.figure(figsize=(8,5))
sns.scatterplot(x='PassengerId', y='Survived', data=df)
plt.title("PassengerId vs Survived")
scatter_plot = "passengerid_vs_survival.png"
plt.savefig(scatter_plot)
plt.close()
plot_files.append(scatter_plot)

# ----------------------------------------------
# GENERATE PDF REPORT
# ----------------------------------------------
pdf_path = "Titanic_EDA_Report.pdf"
styles = getSampleStyleSheet()
doc = SimpleDocTemplate(pdf_path, pagesize=letter)

story = []

# Title
story.append(Paragraph("<b>Titanic Dataset - EDA Report (Limited Dataset)</b>", styles["Title"]))
story.append(Spacer(1, 12))

# Intro
intro = """
This Exploratory Data Analysis (EDA) is based on a limited Titanic dataset
containing only two columns: PassengerId and Survived. Although the dataset
is incomplete, meaningful insights about survival distribution are extracted.
"""
story.append(Paragraph(intro, styles["BodyText"]))
story.append(Spacer(1, 12))

# Add images to PDF
captions = [
    "Survival Count (0 = Not Survived, 1 = Survived)",
    "PassengerId vs Survival Scatter Plot"
]

for img, cap in zip(plot_files, captions):
    story.append(Paragraph(f"<b>{cap}</b>", styles["Heading3"]))
    story.append(Image(img, width=400, height=300))
    story.append(Spacer(1, 12))

# Summary Insights
summary = f"""
<b>Key Insights:</b><br/>
• Dataset contains only 2 columns: PassengerId and Survived.<br/>
• The survival count plot shows how many survived vs died.<br/>
• PassengerId does not influence survival, but helps visualize distribution.<br/>
• Overall survival rate: {df['Survived'].mean() * 100:.2f}%<br/>
• For deeper analysis, more columns like Age, Sex, Fare, and Pclass are required.<br/>
"""
story.append(Paragraph(summary, styles["BodyText"]))

# Build PDF
doc.build(story)

print("PDF Generated Successfully:", pdf_path)


PDF Generated Successfully: Titanic_EDA_Report.pdf
