In [3]:
import sys
!{sys.executable} -m pip install reportlab

Collecting reportlab
  Downloading reportlab-4.4.5-py3-none-any.whl (2.0 MB)
     ---------------------------------------- 2.0/2.0 MB 3.0 MB/s eta 0:00:00
Installing collected packages: reportlab
Successfully installed reportlab-4.4.5


You should consider upgrading via the 'C:\Users\u1029526\Downloads\spotfusion_plus\spotfusion_venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [5]:
from fpdf import FPDF
import json
from pathlib import Path
from datetime import datetime

# --- CONFIGURATION ---
BASE_DIR = Path(r"C:\Users\u1029526\Downloads\spotfusion_plus")
REPORTS = BASE_DIR / "reports"

class PDF(FPDF):
    def header(self):
        # Logo (Optional - using a standard font as logo placeholder)
        self.set_font('Arial', 'B', 10)
        self.set_text_color(150)
        self.cell(0, 10, 'SpotFusion+ Analytics Pipeline', 0, 0, 'R')
        self.ln(15)

    def footer(self):
        # Position at 1.5 cm from bottom
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.set_text_color(128)
        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

    def chapter_title(self, label):
        self.set_font('Arial', 'B', 14)
        self.set_text_color(30, 215, 96) # Spotify Green
        self.cell(0, 10, label, 0, 1, 'L')
        self.ln(2)

    def chapter_body(self, text):
        self.set_font('Arial', '', 11)
        self.set_text_color(0)
        self.multi_cell(0, 6, text)
        self.ln()

    def add_image(self, image_path, title):
        if image_path.exists():
            self.ln(5)
            self.set_font('Arial', 'B', 10)
            self.cell(0, 10, title, 0, 1, 'C')
            # Width=170 ensures it fits nicely within margins (A4 is 210mm wide)
            self.image(str(image_path), x=20, w=170)
            self.ln(10)
        else:
            self.set_font('Courier', 'I', 10)
            self.set_text_color(200, 0, 0)
            self.cell(0, 10, f"[Image Missing: {image_path.name}]", 0, 1, 'C')
            self.set_text_color(0)

def generate_pdf():
    print("ðŸ“„ Generating PDF Report...")
    
    # 1. Load Metrics
    metrics_path = REPORTS / "model_metrics.json"
    if metrics_path.exists():
        with open(metrics_path, 'r') as f:
            m = json.load(f)
    else:
        m = {"r2": 0, "rmse": 0, "mae": 0, "n_samples": 0}

    # 2. Setup PDF
    pdf = PDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()

    # --- TITLE PAGE ---
    pdf.set_font('Arial', 'B', 24)
    pdf.ln(40)
    pdf.cell(0, 10, "SpotFusion+", 0, 1, 'C')
    
    pdf.set_font('Arial', '', 16)
    pdf.cell(0, 10, "End-to-End Music Analytics Pipeline", 0, 1, 'C')
    
    pdf.ln(20)
    pdf.set_font('Arial', '', 12)
    pdf.cell(0, 10, f"Generated on: {datetime.now().strftime('%Y-%m-%d')}", 0, 1, 'C')
    pdf.cell(0, 10, "Author: Mohnish P Nair", 0, 1, 'C')
    
    pdf.ln(30)
    pdf.set_font('Arial', 'B', 12)
    pdf.cell(0, 10, "Executive Summary", 0, 1, 'C')
    pdf.set_font('Arial', '', 11)
    summary_text = (
        f"This report describes the results of the SpotFusion+ project. "
        f"The pipeline handled {m.get('n_samples', 'N/A')} tracks to to predict song popularity "
        f"as well as musical genres clustering. By using Target Encoding for Artist and Genre context,"
        f"the model attained an R-Squared value of {m.get('r2', 0):.4f}, which is substantially better than "
        f"traditional audio-only models."
    )
    pdf.multi_cell(0, 6, summary_text, align='C')

    # --- PAGE 2: MODEL PERFORMANCE ---
    pdf.add_page()
    pdf.chapter_title("1. Model Performance Evaluation")
    
    perf_text = (
        f"The predictive model (XGBoost) was made to predict popularity scores (0-100). "
        f"Key performance indicators are as follows:\n\n"
        f"   - R-Squared (Variance Explained): {m.get('r2', 0):.4f}\n"
        f"   - RMSE (Root Mean Squared Error): {m.get('rmse', 0):.2f}\n"
        f"   - MAE (Mean Absolute Error): {m.get('mae', 0):.2f}\n\n"
        f"The high R2 score shows that Artist Reputation and Genre Trends play the "
        f"largest roles in streaming success."
    )
    pdf.chapter_body(perf_text)
    
    # Add Feature Importance Image
    pdf.add_image(REPORTS / "xgb_feature_importance.png", "Figure 1: Top Features Driving Popularity")
   #pdf.add_image(REPORTS/ "")

    # --- PAGE 3: ADVANCED ANALYTICS ---
    pdf.add_page()
    pdf.chapter_title("2. Advanced Analytics & Discrimination")
    
    pdf.chapter_body(
        "To check the utility of the model, we checked the performance of the model to differentiate between 'Hits' "
        "and 'Flops'. The distribution below demonstrates good separation, which means that "
        "the model can filter high potential tracks effectively."
    )
    pdf.add_image(REPORTS / "tiered_violin.png", "Figure 2: Predicted Scores vs Actual Success Tiers")

    pdf.ln(5)
    pdf.chapter_body(
        "Furthermore, SHAP (SHapley Additive exPlanations) analysis confirms that the recent releases "
        "and high number of artist collaboration have a positive effect on popularity prediction.\n"
    )
    pdf.add_image(REPORTS / "shap_summary.png", "\n Figure 3: SHAP Value Impact Analysis")

    # --- PAGE 4: CLUSTERING & NETWORK ---
    pdf.add_page()
    pdf.chapter_title("3. Musical Clusters & Network Graph")
    
    pdf.chapter_body(
        "Using UMAP dimensionality reduction, tracks were grouped using just their Audio DNA "
        "and Lyrical Sentiment. This does a good job separating high energy tracks from acoustic ballads."
    )
    pdf.add_image(REPORTS / "cluster_plot_3d.png", "Figure 4: 3D Projection of Musical Clusters")

    pdf.ln(5)
    pdf.chapter_body(
        "Finally, the Artist Collaboration Network displays key influencers in the dataset. "
        "Nodes represent artists, and edges represent shared tracks.\n"
    )
    pdf.add_image(REPORTS / "artist_graph.png", "\n Figure 5: Artist Collaboration Graph")

    # --- SAVE ---
    output_filename = REPORTS / "SpotFusion_Final_Report.pdf"
    pdf.output(output_filename)
    print(f"âœ” PDF generated successfully at: {output_filename}")

if __name__ == "__main__":
    generate_pdf()

ðŸ“„ Generating PDF Report...
âœ” PDF generated successfully at: C:\Users\u1029526\Downloads\spotfusion_plus\reports\SpotFusion_Final_Report.pdf


In [1]:
import sys
!{sys.executable} -m pip install fpdf



You should consider upgrading via the 'C:\Users\u1029526\Downloads\spotfusion_plus\spotfusion_venv\Scripts\python.exe -m pip install --upgrade pip' command.
