In [1]:
pip install pandas fpdf2 matplotlib


Collecting fpdf2
  Downloading fpdf2-2.8.3-py2.py3-none-any.whl.metadata (69 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Downloading fpdf2-2.8.3-py2.py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.7/245.7 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fpdf2
Successfully installed fpdf2-2.8.3


In [2]:
import pandas as pd

In [18]:
import pandas as pd
from fpdf import FPDF
import os

# --- 1. Read Data ---
def load_data(file_path):
    df = pd.read_csv(file_path)
    return df

# --- 2. Analyze Data ---
def analyze_data(df):
    summary = {
        "Total Employees": len(df),
        "Average Age": round(df["Age"].mean(), 2),
        "Average Salary": round(df["Salary"].mean(), 2),
        "Highest Salary": df["Salary"].max(),
        "Departments": df["Department"].nunique()
    }

    salary_by_dept = df.groupby("Department")["Salary"].mean().round(2).to_dict()
    summary["Average Salary by Department"] = salary_by_dept

    return summary

# --- 3. Create PDF Report ---
class PDF(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 16)
        self.cell(0, 10, 'Employee Data Analysis Report', align='C', ln=True)
        self.ln(10)

    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, ln=True)
        self.ln(5)

    def chapter_body(self, body):
        self.set_font('Arial', '', 12)
        self.multi_cell(0, 10, body)
        self.ln()

    def add_summary_table(self, summary):
        self.set_font("Arial", size=12)
        col_width = self.epw / 2  # 2 columns
        self.cell(col_width, 10, "Metric", border=1)
        self.cell(col_width, 10, "Value", border=1, ln=True)

        for key, value in summary.items():
            if isinstance(value, dict):
                continue  # skip nested dicts for now
            self.cell(col_width, 10, str(key), border=1)
            self.cell(col_width, 10, str(value), border=1, ln=True)

    def add_salary_by_department(self, dept_data):
        self.chapter_title("Average Salary by Department")
        col_width = self.epw / 2
        self.cell(col_width, 10, "Department", border=1)
        self.cell(col_width, 10, "Avg Salary", border=1, ln=True)

        for dept, salary in dept_data.items():
            self.cell(col_width, 10, dept)
            self.cell(col_width, 10, str(salary), ln=True, border=1)


def generate_pdf(summary, dept_data, filename="employee_report.pdf"):
    pdf = PDF()
    pdf.add_page()

    pdf.chapter_title("Summary Statistics")
    pdf.add_summary_table(summary)

    pdf.add_page()
    pdf.add_salary_by_department(dept_data)

    pdf.output(filename)
    print(f"✅ Report saved as '{filename}'")

# --- Main Execution ---
# Removed the check for __name__ == "__main__" as it's not needed in a notebook
# and replaced __file__ with the direct path to the data file
data_path = "/content/Emp.csv"  # Use the explicit path

# Load and analyze data
df = load_data(data_path)
summary = analyze_data(df)

# Generate PDF
generate_pdf(summary, summary["Average Salary by Department"])

✅ Report saved as 'employee_report.pdf'


  self.set_font('Arial', 'B', 16)
  self.cell(0, 10, 'Employee Data Analysis Report', align='C', ln=True)
  self.set_font('Arial', 'B', 12)
  self.cell(0, 10, title, ln=True)
  self.set_font("Arial", size=12)
  self.cell(col_width, 10, "Value", border=1, ln=True)
  self.cell(col_width, 10, str(value), border=1, ln=True)
  self.cell(col_width, 10, "Avg Salary", border=1, ln=True)
  self.cell(col_width, 10, str(salary), ln=True, border=1)
