# Portfolio Universe Analysis - HTML Report Generator
This notebook creates visualizations using Seaborn and generates an HTML report with the results.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import base64
from io import BytesIO
from datetime import datetime

# Set style for better looking charts
sns.set_style("whitegrid")
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

In [None]:
# Read the most recent data file
base_dir = Path("Universe Data")
file_path = max(
    base_dir.glob("P123_Screen_*.csv"),
    key=lambda p: p.stat().st_mtime
)

# Read metadata from first 3 lines
with file_path.open("r", encoding="utf-8-sig") as f:
    lines = [line.rstrip("\n") for line in f]

def clean_metadata_line(line):
    parts = [p.strip() for p in line.split(",")]
    for p in parts:
        if p:
            return p
    return None

metadata = {
    "title": clean_metadata_line(lines[0]) if len(lines) > 0 else None,
    "date": clean_metadata_line(lines[1]) if len(lines) > 1 else None,
    "notes": clean_metadata_line(lines[2]) if len(lines) > 2 else None,
}

print(f"Screen: {metadata['title']}")
print(f"Date: {metadata['date']}")
print(f"File: {file_path.name}")

In [None]:
# Read the dataframe
df = pd.read_csv(file_path, skiprows=3)
df = df.loc[:, ~df.columns.str.startswith("Unnamed")]

# Convert MktCap to numeric
df['MktCap'] = pd.to_numeric(df['MktCap'].astype(str).str.replace(',', ''), errors='coerce')

# Convert yield columns to numeric
df['EarnYield'] = pd.to_numeric(df['EarnYield'].astype(str).str.replace(',', ''), errors='coerce')
df['FCFYield'] = pd.to_numeric(df['FCFYield'].astype(str).str.replace(',', ''), errors='coerce')

print(f"Total stocks: {len(df):,}")
print(f"\nColumns: {list(df.columns)}")
df.head(10)

In [None]:
# Calculate key statistics
total_stocks = len(df)
total_mktcap = df['MktCap'].sum()
avg_mktcap = df['MktCap'].mean()
median_mktcap = df['MktCap'].median()
num_sectors = df['SectorCode'].nunique()

print(f"Total Stocks: {total_stocks:,}")
print(f"Total Market Cap: ${total_mktcap:,.2f}M")
print(f"Average Market Cap: ${avg_mktcap:,.2f}M")
print(f"Median Market Cap: ${median_mktcap:,.2f}M")
print(f"Number of Sectors: {num_sectors}")

In [None]:
# Helper function to convert matplotlib figure to base64 encoded image
def fig_to_base64(fig):
    """Convert matplotlib figure to base64 encoded PNG image (without closing the figure)"""
    buffer = BytesIO()
    fig.savefig(buffer, format='png', dpi=150, bbox_inches='tight')
    buffer.seek(0)
    img_str = base64.b64encode(buffer.read()).decode()
    buffer.close()
    return f'data:image/png;base64,{img_str}'

## Create Visualizations

In [None]:
# Store all chart images
chart_images = {}

In [None]:
# Chart 1: Sector Distribution (Count)
fig, ax = plt.subplots(figsize=(12, 8))
sector_counts = df['SectorCode'].value_counts().sort_values(ascending=True)
sector_counts.plot(kind='barh', ax=ax, color=sns.color_palette("husl", len(sector_counts)))
ax.set_title('Number of Stocks by Sector', fontsize=16, fontweight='bold')
ax.set_xlabel('Number of Stocks', fontsize=12)
ax.set_ylabel('Sector', fontsize=12)

# Add value labels on bars
for i, v in enumerate(sector_counts):
    ax.text(v + 10, i, str(v), va='center', fontsize=10)

plt.tight_layout()
plt.show()
chart_images['sector_distribution'] = fig_to_base64(fig)
plt.close(fig)

In [None]:
# Chart 2: Sector Market Cap Distribution
fig, ax = plt.subplots(figsize=(12, 8))
sector_mktcap = df.groupby('SectorCode')['MktCap'].sum().sort_values(ascending=True)
sector_mktcap.plot(kind='barh', ax=ax, color=sns.color_palette("husl", len(sector_mktcap)))
ax.set_title('Total Market Cap by Sector ($M)', fontsize=16, fontweight='bold')
ax.set_xlabel('Market Cap ($M)', fontsize=12)
ax.set_ylabel('Sector', fontsize=12)

# Add value labels on bars
for i, v in enumerate(sector_mktcap):
    ax.text(v + max(sector_mktcap)*0.01, i, f'${v:,.0f}M', va='center', fontsize=9)

plt.tight_layout()
plt.show()
chart_images['sector_marketcap'] = fig_to_base64(fig)
plt.close(fig)

In [None]:
# Chart 3: Top 20 Stocks by Market Cap
fig, ax = plt.subplots(figsize=(12, 10))
top_20 = df.nlargest(20, 'MktCap')[['Ticker', 'Name', 'MktCap']].copy()
top_20['Label'] = top_20['Ticker'] + ' - ' + top_20['Name'].str[:25]
top_20 = top_20.sort_values('MktCap', ascending=True)

bars = ax.barh(range(len(top_20)), top_20['MktCap'], color=sns.color_palette("viridis", len(top_20)))
ax.set_yticks(range(len(top_20)))
ax.set_yticklabels(top_20['Label'], fontsize=9)
ax.set_xlabel('Market Cap ($M)', fontsize=12)
ax.set_title('Top 20 Stocks by Market Capitalization', fontsize=16, fontweight='bold')

# Add value labels
for i, (idx, row) in enumerate(top_20.iterrows()):
    ax.text(row['MktCap'] + max(top_20['MktCap'])*0.01, i, f"${row['MktCap']:,.0f}M", 
             va='center', fontsize=8)

plt.tight_layout()
plt.show()
chart_images['top_20_stocks'] = fig_to_base64(fig)
plt.close(fig)

In [None]:
# Chart 4: Market Cap Distribution (Histogram with log scale)
fig, ax = plt.subplots(figsize=(12, 6))
ax.hist(df['MktCap'].dropna(), bins=50, edgecolor='black', alpha=0.7, color='steelblue')
ax.set_xlabel('Market Cap ($M)', fontsize=12)
ax.set_ylabel('Number of Stocks', fontsize=12)
ax.set_title('Market Cap Distribution (All Stocks)', fontsize=16, fontweight='bold')
ax.set_yscale('log')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
chart_images['marketcap_distribution'] = fig_to_base64(fig)
plt.close(fig)

In [None]:
# Chart 5: Market Cap by Sector (Box Plot)
fig, ax = plt.subplots(figsize=(14, 8))
# Sort sectors by median market cap for better visualization
sector_order = df.groupby('SectorCode')['MktCap'].median().sort_values(ascending=False).index
sns.boxplot(data=df, x='SectorCode', y='MktCap', order=sector_order, palette='Set2', ax=ax)
ax.set_yscale('log')
ax.set_xlabel('Sector', fontsize=12)
ax.set_ylabel('Market Cap ($M, log scale)', fontsize=12)
ax.set_title('Market Cap Distribution by Sector', fontsize=16, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
chart_images['marketcap_by_sector_boxplot'] = fig_to_base64(fig)
plt.close(fig)

In [None]:
# Chart 6: Earnings Yield vs FCF Yield Scatter Plot
fig, ax = plt.subplots(figsize=(12, 8))

# Filter out extreme outliers for better visualization
plot_df = df[(df['EarnYield'].between(-50, 50)) & (df['FCFYield'].between(-50, 50))].copy()

# Create scatter plot with color by sector
sectors = plot_df['SectorCode'].unique()
colors = sns.color_palette("husl", len(sectors))
sector_colors = dict(zip(sectors, colors))

for sector in sectors:
    sector_data = plot_df[plot_df['SectorCode'] == sector]
    ax.scatter(sector_data['EarnYield'], sector_data['FCFYield'], 
                alpha=0.6, s=50, label=sector, color=sector_colors[sector])

ax.set_xlabel('Earnings Yield (%)', fontsize=12)
ax.set_ylabel('FCF Yield (%)', fontsize=12)
ax.set_title('Earnings Yield vs Free Cash Flow Yield by Sector', fontsize=16, fontweight='bold')
ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
ax.axvline(x=0, color='gray', linestyle='--', alpha=0.5)
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
chart_images['yield_comparison'] = fig_to_base64(fig)
plt.close(fig)

## Generate HTML Report

In [None]:
# Create sector breakdown table HTML
sector_stats = df.groupby('SectorCode').agg({
    'Ticker': 'count',
    'MktCap': ['sum', 'mean', 'median']
}).round(2)
sector_stats.columns = ['Count', 'Total MktCap ($M)', 'Avg MktCap ($M)', 'Median MktCap ($M)']
sector_stats = sector_stats.sort_values('Total MktCap ($M)', ascending=False)
sector_stats['Percentage'] = (sector_stats['Count'] / total_stocks * 100).round(1)

# Format the numbers
sector_table_html = sector_stats.to_html(classes='data-table', float_format=lambda x: f'{x:,.2f}')

# Create top 20 stocks table
top_20_table = df.nlargest(20, 'MktCap')[['Ticker', 'Name', 'MktCap', 'SectorCode', 'EarnYield', 'FCFYield']].copy()
top_20_table['MktCap'] = top_20_table['MktCap'].apply(lambda x: f'${x:,.2f}M')
top_20_table['EarnYield'] = top_20_table['EarnYield'].apply(lambda x: f'{x:.2f}%' if pd.notna(x) else 'N/A')
top_20_table['FCFYield'] = top_20_table['FCFYield'].apply(lambda x: f'{x:.2f}%' if pd.notna(x) else 'N/A')
top_20_table.columns = ['Ticker', 'Name', 'Market Cap', 'Sector', 'Earnings Yield', 'FCF Yield']
top_20_table_html = top_20_table.to_html(index=False, classes='data-table')

In [None]:
# Generate the HTML report
html_content = f"""
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Portfolio Universe Analysis Report</title>
    <style>
        * {{
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }}
        
        body {{
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            line-height: 1.6;
            color: #333;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            padding: 20px;
        }}
        
        .container {{
            max-width: 1400px;
            margin: 0 auto;
            background: white;
            border-radius: 10px;
            box-shadow: 0 10px 40px rgba(0,0,0,0.2);
            overflow: hidden;
        }}
        
        .header {{
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            padding: 40px;
            text-align: center;
        }}
        
        .header h1 {{
            font-size: 2.5em;
            margin-bottom: 10px;
        }}
        
        .header p {{
            font-size: 1.2em;
            opacity: 0.9;
        }}
        
        .content {{
            padding: 40px;
        }}
        
        .summary-cards {{
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
            gap: 20px;
            margin-bottom: 40px;
        }}
        
        .card {{
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            padding: 25px;
            border-radius: 10px;
            box-shadow: 0 4px 15px rgba(0,0,0,0.1);
            transition: transform 0.3s ease;
        }}
        
        .card:hover {{
            transform: translateY(-5px);
        }}
        
        .card h3 {{
            font-size: 0.9em;
            opacity: 0.9;
            margin-bottom: 10px;
            text-transform: uppercase;
            letter-spacing: 1px;
        }}
        
        .card .value {{
            font-size: 2em;
            font-weight: bold;
        }}
        
        .section {{
            margin-bottom: 50px;
        }}
        
        .section h2 {{
            color: #667eea;
            font-size: 2em;
            margin-bottom: 20px;
            padding-bottom: 10px;
            border-bottom: 3px solid #667eea;
        }}
        
        .chart-container {{
            margin: 30px 0;
            text-align: center;
        }}
        
        .chart-container img {{
            max-width: 100%;
            height: auto;
            border-radius: 8px;
            box-shadow: 0 4px 15px rgba(0,0,0,0.1);
        }}
        
        .data-table {{
            width: 100%;
            border-collapse: collapse;
            margin: 20px 0;
            font-size: 0.9em;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
            border-radius: 8px;
            overflow: hidden;
        }}
        
        .data-table thead tr {{
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            text-align: left;
            font-weight: bold;
        }}
        
        .data-table th,
        .data-table td {{
            padding: 12px 15px;
        }}
        
        .data-table tbody tr {{
            border-bottom: 1px solid #ddd;
        }}
        
        .data-table tbody tr:nth-of-type(even) {{
            background-color: #f3f3f3;
        }}
        
        .data-table tbody tr:hover {{
            background-color: #e8e8e8;
        }}
        
        .footer {{
            background: #f8f9fa;
            padding: 20px;
            text-align: center;
            color: #666;
            font-size: 0.9em;
        }}
        
        @media print {{
            body {{
                background: white;
                padding: 0;
            }}
            .container {{
                box-shadow: none;
            }}
        }}
    </style>
</head>
<body>
    <div class="container">
        <div class="header">
            <h1>{metadata['title']}</h1>
            <p>Analysis Date: {metadata['date']}</p>
            <p>Report Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
        </div>
        
        <div class="content">
            <div class="section">
                <h2>Executive Summary</h2>
                <div class="summary-cards">
                    <div class="card">
                        <h3>Total Stocks</h3>
                        <div class="value">{total_stocks:,}</div>
                    </div>
                    <div class="card">
                        <h3>Total Market Cap</h3>
                        <div class="value">${total_mktcap:,.0f}M</div>
                    </div>
                    <div class="card">
                        <h3>Average Market Cap</h3>
                        <div class="value">${avg_mktcap:,.2f}M</div>
                    </div>
                    <div class="card">
                        <h3>Median Market Cap</h3>
                        <div class="value">${median_mktcap:,.2f}M</div>
                    </div>
                    <div class="card">
                        <h3>Number of Sectors</h3>
                        <div class="value">{num_sectors}</div>
                    </div>
                </div>
            </div>
            
            <div class="section">
                <h2>Sector Analysis</h2>
                <div class="chart-container">
                    <img src="{chart_images['sector_distribution']}" alt="Sector Distribution">
                </div>
                <div class="chart-container">
                    <img src="{chart_images['sector_marketcap']}" alt="Sector Market Cap">
                </div>
                <h3>Detailed Sector Statistics</h3>
                {sector_table_html}
            </div>
            
            <div class="section">
                <h2>Top Holdings</h2>
                <div class="chart-container">
                    <img src="{chart_images['top_20_stocks']}" alt="Top 20 Stocks">
                </div>
                <h3>Top 20 Stocks Details</h3>
                {top_20_table_html}
            </div>
            
            <div class="section">
                <h2>Market Capitalization Analysis</h2>
                <div class="chart-container">
                    <img src="{chart_images['marketcap_distribution']}" alt="Market Cap Distribution">
                </div>
                <div class="chart-container">
                    <img src="{chart_images['marketcap_by_sector_boxplot']}" alt="Market Cap by Sector">
                </div>
            </div>
            
            <div class="section">
                <h2>Yield Analysis</h2>
                <div class="chart-container">
                    <img src="{chart_images['yield_comparison']}" alt="Yield Comparison">
                </div>
            </div>
        </div>
        
        <div class="footer">
            <p>Portfolio Universe Analysis Report | Generated from {file_path.name}</p>
        </div>
    </div>
</body>
</html>
"""

# Save the HTML report
output_path = Path("results.html")
with output_path.open('w', encoding='utf-8') as f:
    f.write(html_content)

print(f"\n{'='*60}")
print(f"HTML Report created successfully!")
print(f"Location: {output_path.absolute()}")
print(f"\nYou can open this file in any web browser.")
print(f"{'='*60}")