In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from fpdf import FPDF

# Load the analysis results from the second notebook
# Assuming the models and their predictions are available from the previous notebook

In [None]:
# 1. Distribution of the Target Variable
plt.figure(figsize=(10, 6))
sns.histplot(df[target_column], kde=True, color='skyblue')
plt.title('Distribution of Target Variable')
plt.xlabel(target_column)
plt.ylabel('Frequency')
plt.savefig('target_distribution.png')
plt.show()

# 2. Feature Importance (using Random Forest as an example)
importances = forest_reg.feature_importances_
indices = np.argsort(importances)[::-1]

plt.figure(figsize=(12, 8))
sns.barplot(x=X.columns[indices], y=importances[indices], palette='viridis')
plt.title('Feature Importances (Random Forest)')
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.xticks(rotation=45)
plt.savefig('feature_importance.png')
plt.show()

# 3. Actual vs Predicted (from the best model)
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_forest, color='blue', alpha=0.5)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=3)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Actual vs Predicted Values (Random Forest)')
plt.savefig('actual_vs_predicted.png')
plt.show()

In [None]:
class PDFReport(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'AI Employee Data Analysis Report', 0, 1, 'C')

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

    def add_title(self, title):
        self.set_font('Arial', 'B', 14)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(5)

    def add_paragraph(self, text):
        self.set_font('Arial', '', 12)
        self.multi_cell(0, 10, text)
        self.ln()

    def add_image(self, image_path, title):
        self.add_title(title)
        self.image(image_path, w=150)
        self.ln(10)

# Creating a new PDF report
pdf = PDFReport()
pdf.add_page()

# Adding analysis summaries
pdf.add_title('1. Analysis Summary')
pdf.add_paragraph(f'We analyzed the dataset using three different models: Linear Regression, Decision Tree, and Random Forest. The best performing model was Random Forest with an R2 Score of {r2_score(y_test, y_pred_forest):.2f} and a Mean Squared Error of {mean_squared_error(y_test, y_pred_forest):.2f}. The following sections detail the key insights derived from the analysis.')

# Adding visualizations
pdf.add_image('target_distribution.png', '2. Distribution of Target Variable')
pdf.add_image('feature_importance.png', '3. Feature Importance (Random Forest)')
pdf.add_image('actual_vs_predicted.png', '4. Actual vs Predicted Values (Random Forest)')

# Saving the PDF report
pdf_file_path = 'AI_Employee_Data_Analysis_Report.pdf'
pdf.output(pdf_file_path)
print(f'Report saved as {pdf_file_path}')