# Problem Statement: News Prioritizer
> ## Problem: General news feeds don't know what we care about. They can't tell which stories affect your stocks, your business, or your industry—leaving you to sort through everything manually.

### Solution: Build an AI tool that prioritizes news by personal impact.

#### How it works:
- Pull news from sources (Reuters)
- Match each story against user's profile:
- Stock portfolio
- Industry focus
- Business type
- Location
- Custom keywords
- Score impact from 0-10
- Surface high-priority stories first
- Send alerts for critical news

```
Real Examples:
High Priority (9/10): "TSMC chip factory shuts down"
Why: You own NVIDIA and Apple stock
Action: Review tech holdings
Medium Priority (6/10): "New data privacy law in California"
Why: Your company processes CA user data
Action: Update privacy policy by deadline
Low Priority (1/10): "French wine exports rise 10%"
Why: No connection to your interests
Action: Skip
```

### Success Metrics
```
Find important news 5x faster
Never miss critical updates
Users engage with 80%+ of top-scored stories
```




# Scoring mechanism 

**Base Scoring:**
- Company name match: +5 points
- Stock ticker match: +3 points  
- Each risk factor keyword: +1 point

**Risk Multiplier:**
- If article contains crisis words ("risk", "volatility", "regulation", "crisis", "shortage", "disruption"): multiply by 1.5x
- Otherwise: multiply by 1.0x

**Final Score:**
- Cap at 10 maximum

**Example:**
```
Article: "Tesla faces battery shortage crisis"
- "Tesla" (company name): +5
- "battery" (risk factor): +1
- "shortage" + "crisis" (crisis words): 1.5x multiplier
- Final: (5+1) × 1.5 = 9/10
```





In [None]:

import numpy as np, torch, pickle, feedparser, requests, sqlite3, re
from datetime import datetime
from pathlib import Path
from typing import Union, List, Dict, Tuple, Optional, Callable, Set
from dataclasses import dataclass
from fastcore.utils import *

@dataclass
class RiskProfile:
    "Enhanced company profile with risk factors"
    name: str
    ticker: Optional[str] = None
    industry: Optional[str] = None
    gics_code: Optional[str] = None
    risk_factors: Set[str] = None
    
    def __post_init__(self):
        if self.risk_factors is None: self.risk_factors = set()
        self.risk_factors.add(self.name.lower())
        if self.ticker: self.risk_factors.add(self.ticker.lower())
        if self.industry: self.risk_factors.add(self.industry.lower())

class RSScraper:
    "RSS feed scraper with article extraction"
    def __init__(self, feeds): self.feeds = feeds if isinstance(feeds, list) else [feeds]
    def fetch_feed(self, url): return feedparser.parse(url)
    def get_articles(self, limit=50):
        articles = []
        for feed_url in self.feeds:
            feed = self.fetch_feed(feed_url)
            for entry in feed.entries[:limit]: articles.append(self._parse_entry(entry))
        return articles[:limit]
    def _parse_entry(self, entry): return dict(title=entry.get('title', ''), link=entry.get('link', ''), published=entry.get('published', ''), summary=entry.get('summary', ''))

@patch
def score_risk_impact(self:RSScraper, article, risk_profiles):
    "Score articles based on risk factor relevance"
    text = f"{article['title']} {article['summary']}".lower()
    risk_scores = []
    for profile in risk_profiles:
        base_score = 0
        risk_multiplier = 1
        if profile.name.lower() in text: base_score += 5
        if profile.ticker and profile.ticker.lower() in text: base_score += 3
        for factor in profile.risk_factors:
            if factor in text: base_score += 1
        risk_keywords = ["risk", "volatility", "regulation", "crisis", "shortage", "disruption"]
        if any(keyword in text for keyword in risk_keywords): risk_multiplier = 1.5
        final_score = min(int(base_score * risk_multiplier), 10)
        if final_score > 0: risk_scores.append((profile, final_score))
    return sorted(risk_scores, key=lambda x: x[1], reverse=True)

class NewsPrioritizer:
    "Prioritize news articles based on user profiles"
    def __init__(self, scraper, profiles): self.scraper,self.profiles = scraper,profiles
    def get_prioritized_news(self, limit=10):
        articles = self.scraper.get_articles(limit)
        prioritized = []
        for article in articles:
            scores = self.scraper.score_risk_impact(article, self.profiles)
            max_score = scores[0][1] if scores else 0
            prioritized.append((article, max_score, scores))
        return sorted(prioritized, key=lambda x: x[1], reverse=True)
    def show_dashboard(self, limit=5):
        news = self.get_prioritized_news(limit)
        for i, (article, score, matches) in enumerate(news):
            if score > 0:
                print(f"\n{i+1}. PRIORITY {score}/10: {article['title']}")
                print(f"   Published: {article['published']}")
                for profile, pscore in matches: print(f"   Impact: {profile.name} ({pscore}/10)")
                print(f"   Link: {article['link']}")





In [None]:
financial_rss_feeds = [
    # Yahoo Finance
    "https://finance.yahoo.com/news/rssindex",
    "https://feeds.finance.yahoo.com/rss/2.0/headline?s=AAPL,MSFT,TSLA,GOOGL,AMZN&region=US&lang=en-US",
    
    # Bloomberg
    "https://feeds.bloomberg.com/businessweek/news.rss",
    
    # Fortune
    "https://fortune.com/feed",
    "https://fortune.com/section/finance/feed",
    
    # Seeking Alpha
    "https://seekingalpha.com/feed.xml",
    
    # CNN Money
    "http://rss.cnn.com/rss/money_markets.rss",
    "http://rss.cnn.com/rss/money_news_economy.rss",
    "http://rss.cnn.com/rss/money_news_companies.rss",
    
    # The Motley Fool
    "https://fool.com/a/feeds/partner/google/",
    
    # 247 Wall Street
    "http://247wallst.com/feed",
    
    # MarketBeat
    "https://www.marketbeat.com/feed",
    
    # Kiplinger
    "https://www.kiplinger.com/feed/all"
]

In [None]:
finance_scraper = RSScraper(financial_rss_feeds)

In [None]:
finance_articles = finance_scraper.get_articles()

In [None]:
finance_articles

[{'title': 'TON Surges on UAE Golden Visa News; Crypto Community Reacts With Excitement and Doubt',
  'link': 'https://finance.yahoo.com/news/ton-surges-uae-golden-visa-163356181.html',
  'published': '2025-07-06T16:33:56Z',
  'summary': ''},
 {'title': "A chaotic 6 months for stocks shows investors are still leaning 'bullish' headed into the second half of 2025",
  'link': 'https://finance.yahoo.com/news/a-chaotic-6-months-for-stocks-shows-investors-are-still-leaning-bullish-headed-into-the-second-half-of-2025-191024372.html',
  'published': '2025-06-30T19:10:24Z',
  'summary': ''},
 {'title': 'The real risk of a recession, and other key points about the US economy right now',
  'link': 'https://finance.yahoo.com/news/the-real-risk-of-a-recession-and-other-key-points-about-the-us-economy-right-now-164822925.html',
  'published': '2025-07-06T16:48:22Z',
  'summary': ''},
 {'title': 'Tariff Dealine, Fed Minutes and Other Key Thing to Watch this Week',
  'link': 'https://finance.yahoo.co

In [None]:



finance_profiles = [
    RiskProfile("Tesla", "TSLA", "Automobiles", "251020", 
               {"tesla", "tsla", "elon musk", "cybertruck", "electric vehicle", "ev", "autopilot", "robotaxi", "delivery", "production", "gigafactory"}),
    RiskProfile("Microsoft", "MSFT", "Software", "451030", 
               {"microsoft", "msft", "ai", "artificial intelligence", "cloud", "azure", "pakistan", "operations", "diagnostic", "mai-dxo"}),
    RiskProfile("Apple", "AAPL", "Technology Hardware", "452020", 
               {"apple", "aapl", "iphone", "earnings", "foxconn", "taiwan", "assembler"}),
    RiskProfile("Amazon", "AMZN", "Internet Retail", "255020", 
               {"amazon", "amzn", "prime day", "shopping", "exec", "insider", "ai", "shein", "temu", "robotics"}),
    RiskProfile("Google/Alphabet", "GOOGL", "Internet Software", "451010", 
               {"google", "googl", "alphabet", "nuclear", "fusion", "power", "virginia", "energy", "lawmakers"}),
    RiskProfile("Netflix", "NFLX", "Entertainment", "254010", 
               {"netflix", "nflx", "streaming", "trillion", "market cap", "analyst", "upgrade"})
]

In [None]:
finance_prioritizer = NewsPrioritizer(finance_scraper, finance_profiles)
finance_prioritizer.show_dashboard(15)

NameError: name 'finance_profiles' is not defined

I'll summarize the key aspects of the Axioma AXWW4 World-Wide Equity Factor Risk Model document:

## Overview

The AXWW4 is a global equity factor risk model that provides risk analysis and forecasting capabilities for portfolio management. As of 2017, it covers approximately 42,700 securities (over 77,700 historically) across 94 markets worldwide.

## Key Features

**Coverage:**
- 48 major developed and emerging markets (US, UK, Japan, Germany, China, etc.)
- 46 additional emerging markets added in phases (2003, 2009, 2015)
- Estimation universe: ~12,700 securities with sufficient size and liquidity

**Model Variants:**
- 4 versions available: Medium-horizon (3-6 months) and Short-horizon (1-2 months), each with both fundamental and statistical factor models
- Can be customized for country or industry-specific analysis

## Factor Structure

**Fundamental Factor Model includes:**
- **Style Factors** (12-13): Market sensitivity, volatility, momentum, liquidity, size, value, earnings yield, leverage, growth, profitability, dividend yield
- **Industry Factors** (68): Based on GICS classification system
- **Country/Currency Factors**: Based on quotation location and primary currency
- **Global Market Factor**: Captures overall market movements
- **Local Factors**: Currently includes Domestic China factor

**Statistical Factor Model:**
- Uses 20 statistical factors derived through principal components analysis
- Includes currency factors

## Technical Specifications

**Estimation Methods:**
- Daily updates of factor exposures, returns, covariances, and asset-specific risks
- Uses constrained robust regression with Huber weights
- Employs Dynamic Volatility Adjustment (DVA) for responsive risk forecasts

**Risk Calculations:**
- Factor covariances use exponentially-weighted returns
- Specific risks account for issuer-specific covariances
- Currency risks derived from Axioma's Global Currency Risk Model

## Deliverables

- Daily data updates via FTP/SFTP
- Historical data from January 1997
- Multiple data formats (flat files, proprietary database)
- Includes benchmarks, ETF coverage, and factor mimicking portfolios
- Comprehensive asset identifiers and market data

This model serves as a comprehensive risk management tool for global equity portfolios, offering both fundamental and statistical approaches to factor risk analysis with extensive market coverage and daily updates.

Can you refine your scoring based on the axioma model risk decomposition of the portfolio? This will give you a nice way to compare news impact on portfolio based on relative size/risk of the position.

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Looking at your current scoring system and the Axioma model context, I can see how we could enhance the risk scoring by incorporating portfolio-level risk decomposition. Let me add a method that weights news impact by position size and risk contribution:

```python
@dataclass
class PortfolioPosition:
    "Portfolio position with risk metrics"
    symbol: str
    weight: float
    specific_risk: float = 0.0
    factor_exposures: Dict[str, float] = None
    
    def __post_init__(self):
        if self.factor_exposures is None: self.factor_exposures = {}

@patch
def score_portfolio_impact(self:NewsPrioritizer, article, portfolio_positions):
    "Score news impact weighted by portfolio position sizes and risk contributions"
    text = f"{article['title']} {article['summary']}".lower()
    portfolio_scores = []
    
    for position in portfolio_positions:
        matching_profile = next((p for p in self.profiles if p.ticker and p.ticker.lower() == position.symbol.lower()), None)
        if not matching_profile: continue
        
        base_score = 0
        if matching_profile.name.lower() in text: base_score += 5
        if matching_profile.ticker.lower() in text: base_score += 3
        for factor in matching_profile.risk_factors:
            if factor in text: base_score += 1
        
        risk_keywords = ["risk", "volatility", "regulation", "crisis", "shortage", "disruption"]
        risk_multiplier = 1.5 if any(keyword in text for keyword in risk_keywords) else 1.0
        
        news_score = base_score * risk_multiplier
        portfolio_impact = news_score * position.weight * (1 + position.specific_risk)
        
        if portfolio_impact > 0:
            portfolio_scores.append((matching_profile, news_score, portfolio_impact, position.weight))
    
    return sorted(portfolio_scores, key=lambda x: x[2], reverse=True)
```



Can you explain to me what you are trying to do? Use a simple, intuitive example. I'm new to this domain. 


##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Sure! Let me explain with a simple example.

Imagine you have a stock portfolio worth $100,000 with these positions:
- Tesla: $50,000 (50% of your portfolio)
- Apple: $30,000 (30% of your portfolio) 
- Microsoft: $20,000 (20% of your portfolio)

Now two news stories come out:

**Story 1:** "Tesla recalls 100,000 vehicles due to battery issues"
**Story 2:** "Apple launches new iPhone feature"

With basic news scoring, both stories might get similar scores. But the portfolio-weighted approach considers:

1. **Position size**: The Tesla story affects 50% of your portfolio, while Apple affects 30%
2. **Risk level**: Tesla might be more volatile (higher specific risk) than Apple
3. **News severity**: "Recall" and "battery issues" are crisis keywords that multiply the impact

So the Tesla story gets a much higher portfolio impact score because:
- It affects your biggest holding (50% weight)
- Tesla is inherently riskier 
- The news contains negative crisis keywords

This helps you prioritize which news stories actually matter most to YOUR specific portfolio, rather than treating all news equally. The Tesla story might score 8/10 for portfolio impact while the Apple story scores 3/10, even if both companies got similar basic news scores.

Does this make the concept clearer?



Brainstorm some more ideas. We'll have a conversation and give your feedback based on this. We'll decide the approach. 

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->


Here are some ideas to enhance the news prioritization system:

**Risk-Based Enhancements:**
- **Sector cascade scoring**: News about oil prices affects energy stocks, but also airlines, logistics, chemicals
- **Supply chain impact**: Chip shortage news affects semiconductor companies AND their customers (auto, tech, etc.)
- **Geographic risk**: News from specific regions affects companies with operations there

**Timing & Urgency:**
- **Earnings proximity**: Higher scoring if company has earnings coming up soon
- **Market hours sensitivity**: Breaking news during trading hours vs after-hours gets different weights
- **Momentum decay**: Recent news about same topic gets diminishing returns

**Content Intelligence:**
- **Sentiment analysis**: "Tesla beats delivery targets" vs "Tesla misses delivery targets"
- **Magnitude extraction**: "10% increase" vs "slight increase" - parse actual numbers
- **Source credibility**: Reuters vs random blog gets different trust scores

**Portfolio Context:**
- **Correlation clustering**: If you own 5 tech stocks, tech news affects your whole cluster
- **Hedge detection**: News that hurts one position but helps another (oil up/airlines down)
- **Concentration risk**: If 80% of portfolio is in one sector, weight that sector news heavily

**User Behavior:**
- **Learning from clicks**: Track which scored articles user actually reads
- **Custom keyword training**: Let users add their own risk factors
- **Alert thresholds**: Different users want different sensitivity levels

Which direction interests you most?

<details>
<summary>Thinking</summary>
The user wants me to brainstorm more ideas for improving the news prioritization system. They want to have a conversation and get feedback. I should provide several different approaches or enhancements they could consider, keeping them concise and focused on the news prioritization domain.

I need to follow the fast.ai style guide but they said not to mention that I'm following it. I should focus on brainstorming ideas rather than implementing code right now.
</details>




Link to our problem statement which you think makes more sense here. 

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->


Looking back at your problem statement, the most relevant ideas are:

**Portfolio Context** - This directly addresses your goal of matching stories against "stock portfolio" and finding news that affects your specific holdings. The sector cascade and correlation clustering would help catch indirect impacts.

**Content Intelligence** - Sentiment analysis and magnitude extraction would dramatically improve your scoring accuracy. Right now you're missing whether "Tesla faces battery shortage crisis" is actually bad news or just mentioning the word "crisis" neutrally.

**Timing & Urgency** - Earnings proximity scoring directly serves your goal of "never miss critical updates." A routine earnings preview becomes high-priority when your stock reports next week.

**User Behavior** - Learning from clicks is crucial for your "engage with 80%+ of top-scored stories" metric. If users consistently ignore certain types of high-scored articles, the algorithm should adapt.

The **Geographic risk** and **Supply chain impact** from risk-based enhancements also fit well - they help expand your matching beyond just company names to catch broader business impacts.

The key insight: your current scoring (company name +5, ticker +3, keywords +1) is quite crude. You need to understand:
- Is this news positive or negative?
- How significant is the impact?
- Does this affect my holdings indirectly through supply chains or sectors?
- Is this time-sensitive given my portfolio's upcoming events?

Which of these resonates most with your vision for the tool?

<details>
<summary>Thinking</summary>
The user is asking me to link back to the problem statement they provided earlier and identify which of the brainstormed ideas make the most sense in that context.

Looking at the problem statement they provided:
- They want to "prioritize news by personal impact"
- They want to "find important news 5x faster"
- They want to "never miss critical updates"
- They want users to "engage with 80%+ of top-scored stories"

The problem statement specifically mentions:
- Stock portfolio matching
- Industry focus
- Business type
- Location
- Custom keywords
- Score impact 0-10
- Surface high-priority stories first
- Send alerts for critical news

Given this context, I should identify which of my brainstormed ideas align best with their stated goals.
</details>




Okay, let's make a plan and then we'll start targeting it one-by-one. 

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->


Here's a practical implementation plan, ordered by impact and complexity:

**Phase 1: Content Intelligence**
1. Add sentiment analysis to distinguish good/bad news
2. Extract numerical magnitudes ("10% drop" vs "slight decline")
3. Improve crisis keyword detection with context

**Phase 2: Portfolio Context** 
4. Implement position-weighted scoring (bigger holdings = higher impact)
5. Add sector cascade effects (chip shortage affects auto makers)
6. Detect correlation clusters in portfolios

**Phase 3: Timing & Context**
7. Add earnings calendar proximity scoring
8. Market hours sensitivity adjustments
9. Geographic risk mapping

**Phase 4: Learning & Optimization**
10. Track user engagement patterns
11. Adaptive scoring based on user behavior
12. Custom keyword learning

**Phase 5: Advanced Features**
13. Supply chain impact analysis
14. Hedge detection (offsetting positions)
15. Real-time alert thresholds

I suggest we start with **sentiment analysis** (#1) since it's the biggest immediate improvement to your scoring accuracy. Right now your system can't tell if "Tesla beats delivery targets" is good or bad news.

Does this progression make sense? Should we start with sentiment analysis, or would you prefer to tackle portfolio weighting first?

<details>
<summary>Thinking</summary>
The user wants to make a plan for implementing the news prioritization enhancements. Based on our discussion, they want to build on the existing system and add the features that make the most sense for their problem statement.

I need to suggest a concrete plan that breaks down the implementation into manageable steps. Given the fast.ai approach of working incrementally, I should propose a sequence of small improvements we can tackle one at a time.

Key areas from our discussion that align with their problem statement:
1. Portfolio context (position weighting, sector impact)
2. Content intelligence (sentiment, magnitude extraction)
3. Timing & urgency (earnings proximity, market hours)
4. User behavior (learning from engagement)

I should suggest a logical order for implementing these.
</details>




Make it more concise and to actionable steps which I can keep on working. 

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

**Implementation Plan:**

**Week 1: Sentiment Analysis**
- Add positive/negative sentiment scoring to articles
- Weight crisis keywords by sentiment context
- Test on current finance articles

**Week 2: Portfolio Weighting** 
- Multiply news scores by position weights
- Add position-specific risk factors
- Create portfolio impact dashboard

**Week 3: Magnitude Extraction**
- Parse numbers from headlines ("10% drop", "$2B loss")
- Scale impact scores by magnitude
- Handle percentage vs absolute values

**Week 4: Sector Cascade**
- Map companies to sectors (energy, tech, finance)
- Score cross-sector impacts (oil→airlines)
- Add sector-wide risk events

**Week 5: Earnings Calendar**
- Integrate earnings dates for portfolio companies
- Boost scores for news near earnings
- Add earnings-specific keywords

**Next Steps:**
- User engagement tracking
- Geographic risk mapping
- Supply chain analysis

Start with sentiment analysis? It's the biggest immediate win for scoring accuracy.



**Implementation Plan:**

** 1: Sentiment Analysis**
- Add positive/negative sentiment scoring to articles
- Weight crisis keywords by sentiment context
- Test on current finance articles

**2: Portfolio Weighting** 
- Multiply news scores by position weights
- Add position-specific risk factors
- Create portfolio impact dashboard

**3: Magnitude Extraction**
- Parse numbers from headlines ("10% drop", "$2B loss")
- Scale impact scores by magnitude
- Handle percentage vs absolute values

**4: Sector Cascade**
- Map companies to sectors (energy, tech, finance)
- Score cross-sector impacts (oil→airlines)
- Add sector-wide risk events

**5: Earnings Calendar**
- Integrate earnings dates for portfolio companies
- Boost scores for news near earnings
- Add earnings-specific keywords

**Next Steps:**
- User engagement tracking
- Geographic risk mapping
- Supply chain analysis




In [None]:
import numpy as np, torch, pickle, feedparser, requests, sqlite3, re
from datetime import datetime
from pathlib import Path
from typing import Union, List, Dict, Tuple, Optional, Callable, Set
from dataclasses import dataclass
from fastcore.utils import *
from claudette import *

@dataclass
class RiskProfile:
    name: str
    ticker: Optional[str] = None
    industry: Optional[str] = None
    gics_code: Optional[str] = None
    risk_factors: Set[str] = None
    
    def __post_init__(self):
        if self.risk_factors is None: self.risk_factors = set()
        self.risk_factors.add(self.name.lower())
        if self.ticker: self.risk_factors.add(self.ticker.lower())
        if self.industry: self.risk_factors.add(self.industry.lower())

class RSScraper:
    def __init__(self, feeds): self.feeds = feeds if isinstance(feeds, list) else [feeds]
    def fetch_feed(self, url): return feedparser.parse(url)
    def get_articles(self, limit=50):
        articles = []
        per_feed = max(1, limit // len(self.feeds)) if self.feeds else 0
        for feed_url in self.feeds:
            try:
                feed = self.fetch_feed(feed_url)
                for entry in feed.entries[:per_feed]: articles.append(self._parse_entry(entry))
            except: continue
        return articles[:limit]
    def _parse_entry(self, entry): 
        summary = entry.get('summary', '') or entry.get('description', '')
        return dict(title=entry.get('title', ''), link=entry.get('link', ''), published=entry.get('published', ''), summary=summary)
    def get_articles_filtered(self, limit=100):
        articles = self.get_articles(limit)
        return [a for a in articles if a['summary'] and a['summary'].strip()]
    def analyze_sentiment_ai(self, text, risk_profile=None):
        sp = """You are a financial risk analyst. Analyze sentiment and risk impact on a scale of -10 to +10 where:
        -10: Extremely negative (major crisis, bankruptcy, massive losses)
        -5: Negative (earnings miss, regulatory issues, downgrades)
        0: Neutral (routine news, mixed signals)
        +5: Positive (earnings beat, upgrades, partnerships)
        +10: Extremely positive (breakthrough products, major acquisitions)
        
        Consider: market sentiment, fundamental impact, volatility implications, sector effects.
        Respond with only the number."""
        
        context = f"Risk factors: {', '.join(risk_profile.risk_factors) if risk_profile else 'general market'}"
        prompt = f"{context}\n\nNews: {text[:800]}"
        
        try:
            chat = Chat(models[1], sp=sp)
            result = chat(prompt)
            score = int(result.content[0].text.strip())
            return max(-10, min(10, score))
        except: return 0
    def score_risk_impact_comprehensive(self, article, risk_profiles):
        text = f"{article['title']} {article['summary']}"
        risk_scores = []
        
        for profile in risk_profiles:
            base_score = 0
            text_lower = text.lower()
            
            if profile.name.lower() in text_lower: base_score += 5
            if profile.ticker and profile.ticker.lower() in text_lower: base_score += 3
            for factor in profile.risk_factors:
                if factor in text_lower: base_score += 1
            
            if base_score > 0:
                sentiment_score = self.analyze_sentiment_ai(text, profile)
                risk_contribution = abs(sentiment_score) * 0.1
                final_score = min(int(base_score * (1 + risk_contribution)), 10)
                if final_score > 0: risk_scores.append((profile, final_score, sentiment_score))
        
        return sorted(risk_scores, key=lambda x: x[1], reverse=True)

class NewsPrioritizer:
    def __init__(self, scraper, profiles): self.scraper,self.profiles = scraper,profiles
    def get_prioritized_news_ai(self, limit=10):
        articles = self.scraper.get_articles_filtered(limit)
        prioritized = []
        for article in articles:
            scores = self.scraper.score_risk_impact_comprehensive(article, self.profiles)
            max_score = scores[0][1] if scores else 0
            prioritized.append((article, max_score, scores))
        return sorted(prioritized, key=lambda x: x[1], reverse=True)
    def show_dashboard_ai(self, limit=5):
        news = self.get_prioritized_news_ai(limit)
        for i, (article, score, matches) in enumerate(news):
            if score > 0:
                print(f"\n{i+1}. PRIORITY {score}/10: {article['title']}")
                print(f"   Published: {article['published']}")
                for profile, pscore, sentiment in matches: print(f"   Impact: {profile.name} ({pscore}/10, sentiment: {sentiment})")
                print(f"   Link: {article['link']}")

financial_rss_feeds = ["https://finance.yahoo.com/news/rssindex", "https://feeds.finance.yahoo.com/rss/2.0/headline?s=AAPL,MSFT,TSLA,GOOGL,AMZN&region=US&lang=en-US", "https://feeds.bloomberg.com/businessweek/news.rss", "https://fortune.com/feed", "https://fortune.com/section/finance/feed", "https://seekingalpha.com/feed.xml", "http://rss.cnn.com/rss/money_markets.rss", "http://rss.cnn.com/rss/money_news_economy.rss", "http://rss.cnn.com/rss/money_news_companies.rss", "https://fool.com/a/feeds/partner/google/", "http://247wallst.com/feed", "https://www.marketbeat.com/feed", "https://www.kiplinger.com/feed/all"]

finance_profiles = [
    RiskProfile("Tesla", "TSLA", "Automobiles", "251020", {"tesla", "tsla", "elon musk", "cybertruck", "electric vehicle", "ev", "autopilot", "robotaxi", "delivery", "production", "gigafactory"}),
    RiskProfile("Microsoft", "MSFT", "Software", "451030", {"microsoft", "msft", "ai", "artificial intelligence", "cloud", "azure", "pakistan", "operations", "diagnostic", "mai-dxo"}),
    RiskProfile("Apple", "AAPL", "Technology Hardware", "452020", {"apple", "aapl", "iphone", "earnings", "foxconn", "taiwan", "assembler"}),
    RiskProfile("Amazon", "AMZN", "Internet Retail", "255020", {"amazon", "amzn", "prime day", "shopping", "exec", "insider", "ai", "shein", "temu", "robotics"}),
    RiskProfile("Google/Alphabet", "GOOGL", "Internet Software", "451010", {"google", "googl", "alphabet", "nuclear", "fusion", "power", "virginia", "energy", "lawmakers"}),
    RiskProfile("Netflix", "NFLX", "Entertainment", "254010", {"netflix", "nflx", "streaming", "trillion", "market cap", "analyst", "upgrade"})
]

# finance_scraper = RSScraper(financial_rss_feeds)
# finance_prioritizer = NewsPrioritizer(finance_scraper, finance_profiles)
# finance_prioritizer.show_dashboard_ai(5)

test_articles = [
    dict(title="Tesla Reports Record Q3 Deliveries, Cybertruck Production Ramps Up", 
         summary="Tesla delivered 500,000 vehicles in Q3, beating analyst expectations. Cybertruck production increased 300% with new Gigafactory capacity coming online.",
         published="Mon, 07 Jul 2025 10:00:00 +0000", link="https://example.com/tesla-q3"),
    
    dict(title="Apple iPhone 16 Sales Disappoint, Foxconn Cuts Taiwan Production", 
         summary="Apple's latest iPhone 16 sales fell short of expectations, leading assembler Foxconn to reduce production at its Taiwan facilities by 25%.",
         published="Mon, 07 Jul 2025 11:00:00 +0000", link="https://example.com/apple-iphone"),
    
    dict(title="Microsoft Azure AI Services Drive 40% Revenue Growth", 
         summary="Microsoft's cloud division posted massive gains as Azure AI and artificial intelligence services attracted enterprise customers, boosting quarterly revenue.",
         published="Mon, 07 Jul 2025 12:00:00 +0000", link="https://example.com/msft-azure"),
    
    dict(title="Amazon Prime Day Breaks Records, Robotics Division Expands", 
         summary="Amazon's Prime Day generated $15B in sales while the company announced major expansion of its robotics operations and AI-powered logistics.",
         published="Mon, 07 Jul 2025 13:00:00 +0000", link="https://example.com/amazon-prime"),
    
    dict(title="Google Nuclear Fusion Partnership Powers Virginia Data Centers", 
         summary="Google's Alphabet announced a breakthrough nuclear fusion deal to power its Virginia data centers, with lawmakers supporting the clean energy initiative.",
         published="Mon, 07 Jul 2025 14:00:00 +0000", link="https://example.com/google-nuclear")
]
test_scraper = RSScraper([])
test_scraper.get_articles = lambda limit=50: test_articles
test_prioritizer = NewsPrioritizer(test_scraper, finance_profiles)
test_prioritizer.show_dashboard_ai()


1. PRIORITY 10/10: Tesla Reports Record Q3 Deliveries, Cybertruck Production Ramps Up
   Published: Mon, 07 Jul 2025 10:00:00 +0000
   Impact: Tesla (10/10, sentiment: 7)
   Impact: Netflix (1/10, sentiment: 0)
   Link: https://example.com/tesla-q3

2. PRIORITY 10/10: Apple iPhone 16 Sales Disappoint, Foxconn Cuts Taiwan Production
   Published: Mon, 07 Jul 2025 11:00:00 +0000
   Impact: Apple (10/10, sentiment: -6)
   Impact: Tesla (1/10, sentiment: 0)
   Impact: Microsoft (1/10, sentiment: 0)
   Impact: Amazon (1/10, sentiment: 0)
   Link: https://example.com/apple-iphone

3. PRIORITY 10/10: Microsoft Azure AI Services Drive 40% Revenue Growth
   Published: Mon, 07 Jul 2025 12:00:00 +0000
   Impact: Microsoft (10/10, sentiment: 7)
   Impact: Tesla (1/10, sentiment: 0)
   Impact: Amazon (1/10, sentiment: 0)
   Link: https://example.com/msft-azure

4. PRIORITY 10/10: Amazon Prime Day Breaks Records, Robotics Division Expands
   Published: Mon, 07 Jul 2025 13:00:00 +0000
   Impact: Ama

# Free Axioma-Style Risk Modeling Alternatives

Unfortunately, **there's no completely free way to access the actual Axioma risk models** - they are commercial enterprise products. However, there are several free alternatives that can replicate Axioma-style functionality:

---

## 🎯 **1. GS Quant (Limited Free Access)**

### **What It Is**
- **Open-source Python toolkit** by Goldman Sachs
- Provides access to Axioma risk models **but requires GS institutional client credentials**
- Available on [GitHub](https://github.com/goldmansachs/gs-quant) and [PyPI](https://pypi.org/project/gs-quant/)

### **Installation**
```bash
pip install gs-quant
```

### **Limitations**
- **Free code, but paid data access**: You need to be a Goldman Sachs institutional client
- API access requires client ID and secret from GS
- Not suitable for individual/small firm use

### **What You Can Use for Free**
```python
import gs_quant as gq
from gs_quant.markets.portfolio import Portfolio
from gs_quant.risk import Price

# The analytics functions work without credentials
# But you can't access live Axioma risk model data
portfolio = Portfolio(...)  # Your own portfolio data
```

---

## 🆓 **2. Open Risk Management Projects (Completely Free)**

### **Open Risk Initiative**
- **Website**: [OpenRiskManagement.com](https://www.openriskmanagement.com/open-source-risk-models/)
- **Focus**: Building open-source risk management tools
- **Python-based** with academic backing

### **Available Libraries**
```python
# Install open risk libraries
pip install correlationMatrix
pip install concentrationMetrics

# Example usage
from correlationMatrix import CorrelationMatrix
from concentrationMetrics import ConcentrationMetrics

# Factor correlation analysis
corr_matrix = CorrelationMatrix(return_data)
factor_correlations = corr_matrix.compute_factor_correlations()

# Portfolio concentration metrics
conc_metrics = ConcentrationMetrics(portfolio_weights)
herfindahl_index = conc_metrics.herfindahl_index()
```

---

## 🔬 **3. Academic Research Implementations (Free)**

### **Axioma Factor Definitions from Research Papers**

Based on the academic paper *"Axioma's Alpha Factor Method"* and the AXWW4 documentation, here are the exact factor definitions you can implement for free:

```python
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import yfinance as yf

class AxiomaStyleFactors:
    """
    Free implementation of Axioma-style factors based on 
    academic papers and public documentation
    """
    
    def __init__(self, tickers, period="2y"):
        self.tickers = tickers
        self.data = yf.download(tickers, period=period)
        self.prices = self.data['Adj Close']
        self.volume = self.data['Volume']
        self.market_cap = None  # Would need additional data
        
    def calculate_style_factors(self):
        """Calculate Axioma-style style factors"""
        factors = {}
        
        # 1. Market Sensitivity (Beta)
        factors['market_sensitivity'] = self._calculate_beta()
        
        # 2. Volatility
        factors['volatility'] = self._calculate_volatility()
        
        # 3. Momentum (Medium-term and Short-term)
        factors['medium_momentum'] = self._calculate_medium_momentum()
        factors['short_momentum'] = self._calculate_short_momentum()
        
        # 4. Size (Market Cap proxy using price)
        factors['size'] = self._calculate_size_proxy()
        
        # 5. Liquidity
        factors['liquidity'] = self._calculate_liquidity()
        
        return pd.DataFrame(factors)
    
    def _calculate_beta(self, window=252):
        """2-year weekly beta vs global market (Axioma AXWW4-MH)"""
        weekly_returns = self.prices.resample('W').last().pct_change()
        market_returns = weekly_returns.mean(axis=1)  # Equal-weight market proxy
        
        betas = {}
        for ticker in self.tickers:
            stock_returns = weekly_returns[ticker]
            covariance = stock_returns.rolling(window//5).cov(market_returns)
            market_variance = market_returns.rolling(window//5).var()
            betas[ticker] = (covariance / market_variance).iloc[-1]
        
        return betas
    
    def _calculate_volatility(self, window=126):
        """6-month average absolute returns (Axioma definition)"""
        daily_returns = self.prices.pct_change()
        abs_returns = daily_returns.abs()
        
        # Cross-sectional standard deviation normalization
        cross_sectional_std = abs_returns.std(axis=1)
        normalized_vol = abs_returns.div(cross_sectional_std, axis=0)
        
        return normalized_vol.rolling(window).mean().iloc[-1].to_dict()
    
    def _calculate_medium_momentum(self):
        """Cumulative return over past year excluding most recent month"""
        end_date = self.prices.index[-1]
        one_month_ago = end_date - pd.DateOffset(months=1)
        one_year_ago = end_date - pd.DateOffset(months=12)
        
        recent_prices = self.prices.loc[one_month_ago]
        year_ago_prices = self.prices.loc[one_year_ago:one_year_ago].iloc[0]
        
        momentum = (recent_prices / year_ago_prices - 1)
        return momentum.to_dict()
    
    def _calculate_short_momentum(self):
        """Cumulative return over past month (AXWW4-SH only)"""
        monthly_return = (self.prices.iloc[-1] / self.prices.iloc[-22] - 1)
        return monthly_return.to_dict()
    
    def _calculate_size_proxy(self):
        """Natural log of market cap (using price as proxy)"""
        return np.log(self.prices.iloc[-1]).to_dict()
    
    def _calculate_liquidity(self, window=63):
        """Axioma liquidity: volume/market_cap ratio and Amihud ratio"""
        daily_returns = self.prices.pct_change()
        
        # Amihud illiquidity ratio (inverse for liquidity)
        amihud_ratio = daily_returns.abs() / self.volume
        avg_amihud = amihud_ratio.rolling(window).mean()
        liquidity_proxy = 1 / avg_amihud.iloc[-1]
        
        return liquidity_proxy.to_dict()

# Usage example
factors_calculator = AxiomaStyleFactors(['AAPL', 'GOOGL', 'TSLA', 'MSFT'])
style_factors = factors_calculator.calculate_style_factors()
print("Axioma-style factors:")
print(style_factors)
```

### **Industry Classification (Free GICS Alternative)**

```python
import yfinance as yf

def get_free_sector_classification(tickers):
    """
    Get sector classification using yfinance (free alternative to GICS)
    Axioma uses 68 GICS industries - this gives sector-level classification
    """
    sector_mapping = {}
    
    for ticker in tickers:
        try:
            stock = yf.Ticker(ticker)
            info = stock.info
            sector_mapping[ticker] = {
                'sector': info.get('sector', 'Unknown'),
                'industry': info.get('industry', 'Unknown')
            }
        except:
            sector_mapping[ticker] = {'sector': 'Unknown', 'industry': 'Unknown'}
    
    return sector_mapping

# Create industry dummy variables (like Axioma's 68 industry factors)
sectors = get_free_sector_classification(['AAPL', 'GOOGL', 'TSLA'])
industry_dummies = pd.get_dummies(pd.DataFrame(sectors).T['industry'])
```

---

## 🛠️ **4. Complete Free Axioma Alternative Implementation**

### **Using Riskfolio-Lib + Custom Factors**

```python
import riskfolio as rp
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

class FreeAxiomaAlternative:
    """
    Complete free implementation of Axioma-style risk modeling
    """
    
    def __init__(self, tickers):
        self.tickers = tickers
        self.returns = None
        self.factors = None
        self.factor_loadings = None
        
    def build_risk_model(self):
        """Build complete factor risk model like Axioma"""
        
        # 1. Get price data
        self.returns = self._get_returns_data()
        
        # 2. Calculate style factors
        style_factors = self._calculate_all_style_factors()
        
        # 3. Get fundamental factors (free proxies)
        fundamental_factors = self._calculate_fundamental_factors()
        
        # 4. Combine all factors
        all_factors = pd.concat([style_factors, fundamental_factors], axis=1)
        
        # 5. Calculate factor loadings (like Axioma exposures)
        self.factor_loadings = self._calculate_factor_loadings(all_factors)
        
        # 6. Build factor covariance matrix
        factor_covariance = self._build_factor_covariance(all_factors)
        
        # 7. Calculate specific risks
        specific_risks = self._calculate_specific_risks()
        
        return {
            'factor_loadings': self.factor_loadings,
            'factor_covariance': factor_covariance,
            'specific_risks': specific_risks,
            'factor_returns': all_factors
        }
    
    def _get_returns_data(self):
        """Get stock returns data"""
        import yfinance as yf
        data = yf.download(self.tickers, period="2y")['Adj Close']
        returns = data.pct_change().dropna()
        return returns
    
    def _calculate_all_style_factors(self):
        """Calculate all style factors like Axioma AXWW4"""
        calculator = AxiomaStyleFactors(self.tickers)
        return calculator.calculate_style_factors()
    
    def _calculate_fundamental_factors(self):
        """Free implementation of Axioma fundamental factors"""
        fundamental_data = {}
        
        for ticker in self.tickers:
            try:
                stock = yf.Ticker(ticker)
                info = stock.info
                
                # Value factors
                fundamental_data[ticker] = {
                    'book_to_price': 1 / info.get('priceToBook', 1),
                    'earnings_yield': info.get('trailingEps', 0) / info.get('currentPrice', 1),
                    'dividend_yield': info.get('dividendYield', 0),
                    'debt_to_equity': info.get('debtToEquity', 0) / 100,
                    'roe': info.get('returnOnEquity', 0),
                    'roa': info.get('returnOnAssets', 0),
                    'profit_margin': info.get('profitMargins', 0),
                    'revenue_growth': info.get('revenueGrowth', 0)
                }
            except:
                # Default values if data unavailable
                fundamental_data[ticker] = {k: 0 for k in ['book_to_price', 'earnings_yield', 
                                                         'dividend_yield', 'debt_to_equity', 
                                                         'roe', 'roa', 'profit_margin', 'revenue_growth']}
        
        return pd.DataFrame(fundamental_data).T
    
    def _calculate_factor_loadings(self, factors):
        """Calculate factor loadings (exposures) like Axioma"""
        # Standardize factors (cross-sectional z-scores)
        scaler = StandardScaler()
        factor_loadings = pd.DataFrame(
            scaler.fit_transform(factors),
            index=factors.index,
            columns=factors.columns
        )
        return factor_loadings
    
    def _build_factor_covariance(self, factors, half_life=125):
        """Build factor covariance matrix with exponential weighting"""
        # Exponentially weighted covariance (like Axioma)
        weights = np.exp(-np.arange(len(factors)) / half_life)[::-1]
        weights = weights / weights.sum()
        
        weighted_factors = factors * np.sqrt(weights[:, np.newaxis])
        factor_cov = weighted_factors.cov()
        
        return factor_cov
    
    def _calculate_specific_risks(self, half_life=125):
        """Calculate asset-specific risks like Axioma"""
        # Fit factor model to get residuals
        from sklearn.linear_model import LinearRegression
        
        specific_risks = {}
        
        for ticker in self.tickers:
            if ticker in self.returns.columns:
                y = self.returns[ticker].dropna()
                X = self.factor_loadings.loc[ticker].values.reshape(1, -1)
                X = np.repeat(X, len(y), axis=0)
                
                # Fit factor model
                model = LinearRegression()
                model.fit(X, y)
                residuals = y - model.predict(X)
                
                # Exponentially weighted specific variance
                weights = np.exp(-np.arange(len(residuals)) / half_life)[::-1]
                weights = weights / weights.sum()
                
                specific_var = np.average(residuals**2, weights=weights)
                specific_risks[ticker] = np.sqrt(specific_var)
        
        return pd.Series(specific_risks)

# Portfolio optimization with free risk model
class FreeAxiomaPortfolioOptimizer:
    def __init__(self, risk_model_output):
        self.factor_loadings = risk_model_output['factor_loadings']
        self.factor_covariance = risk_model_output['factor_covariance']
        self.specific_risks = risk_model_output['specific_risks']
    
    def optimize_portfolio(self, expected_returns, method='min_vol'):
        """Optimize portfolio using factor risk model"""
        
        # Build full covariance matrix from factor model
        B = self.factor_loadings  # Factor loadings matrix
        F = self.factor_covariance  # Factor covariance matrix
        D = np.diag(self.specific_risks**2)  # Specific risk diagonal matrix
        
        # Covariance = B * F * B' + D (Axioma formula)
        factor_covariance_contrib = B @ F @ B.T
        full_covariance = factor_covariance_contrib + D
        
        # Use Riskfolio-Lib for optimization
        port = rp.Portfolio(returns=expected_returns)
        port.assets_stats(method_mu='hist', method_cov='ledoit')
        
        # Override with our factor-based covariance
        port.cov = full_covariance
        
        # Optimize
        if method == 'min_vol':
            weights = port.optimization(model='Classic', rm='MV', obj='MinRisk')
        elif method == 'max_sharpe':
            weights = port.optimization(model='Classic', rm='MV', obj='Sharpe')
        
        return weights

# Usage Example
print("Building free Axioma-style risk model...")
risk_model = FreeAxiomaAlternative(['AAPL', 'GOOGL', 'TSLA', 'MSFT', 'AMZN'])
model_output = risk_model.build_risk_model()

print("Factor loadings (like Axioma exposures):")
print(model_output['factor_loadings'].round(3))

print("\nSpecific risks:")
print(model_output['specific_risks'].round(4))

# Portfolio optimization
optimizer = FreeAxiomaPortfolioOptimizer(model_output)
expected_returns = risk_model.returns.mean() * 252  # Annualized
optimal_weights = optimizer.optimize_portfolio(expected_returns, method='max_sharpe')

print("\nOptimal portfolio weights:")
print(optimal_weights.round(3))
```

---

## 📚 **5. Academic Resources & Research Papers (Free)**

### **Key Academic Papers with Implementations**

1. **"Axioma's Alpha Factor Method"** - Available on ResearchGate
   - Describes exact methodology for risk model bias correction
   - Mathematical formulations you can implement

2. **Axioma AXWW4 Model Documentation** - Factor definitions
   - Complete list of all 68 industry factors
   - Exact formulas for style factors
   - Statistical model methodology

### **University Research Datasets**

```python
# Access academic factor data
import pandas_datareader as pdr

# Fama-French factors (free academic alternative)
ff_factors = pdr.get_data_famafrench('F-F_Research_Data_Factors', start='2020')[0]

# Kenneth French momentum factor
momentum = pdr.get_data_famafrench('F-F_Momentum_Factor', start='2020')[0]

# These can substitute for some Axioma style factors
print("Fama-French factors (free alternative to Axioma):")
print(ff_factors.head())
```

---

## 🎯 **6. Best Free Alternative Strategy**

### **Recommended Approach**

1. **Use the complete implementation above** - builds 80% of Axioma functionality
2. **Supplement with Riskfolio-Lib** - professional-grade portfolio optimization
3. **Add FinBERT sentiment factors** - for news-based risk modeling
4. **Use academic Fama-French factors** - as benchmarks

### **Performance Comparison**

| Feature | Real Axioma | Free Alternative | Coverage |
|---------|-------------|------------------|----------|
| Style Factors | 12-13 factors | 8-10 factors | 75% |
| Industry Factors | 68 GICS industries | 11 sectors | 60% |
| Country Factors | 48+ countries | Manual setup | 40% |
| Daily Updates | Yes | Manual/API | 90% |
| Historical Data | 20+ years | 2-5 years | 50% |
| **Overall Coverage** | **100%** | **~65%** | **Good for most uses** |

### **Cost Comparison**

- **Real Axioma**: $50,000-200,000+ per year
- **Free Alternative**: $0-500/month (for data feeds)
- **ROI**: Build institutional-quality risk models at 1% of the cost

---

## ⚡ **Quick Start (15 Minutes)**

```bash
# Install required packages
pip install riskfolio-lib yfinance pandas numpy scikit-learn

# Clone the implementation
git clone https://github.com/your-repo/free-axioma-alternative
cd free-axioma-alternative

# Run example
python axioma_alternative.py
```

This gives you **professional-grade factor risk modeling** comparable to Axioma for research, small funds, or academic use!

Help me add Axioma-Style Risk Modeling please step by step..

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Let's add Axioma-style risk modeling step by step. I'll start with the core factor calculation class:

```python
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

class AxiomaStyleFactors:
    def __init__(self, tickers, period="2y"):
        self.tickers = tickers
        self.period = period
        self.data = None
        self.prices = None
        self.volume = None
        self.returns = None
        
    def fetch_data(self):
        self.data = yf.download(self.tickers, period=self.period)
        self.prices = self.data['Adj Close'] if len(self.tickers) > 1 else self.data[['Adj Close']]
        self.volume = self.data['Volume'] if len(self.tickers) > 1 else self.data[['Volume']]
        self.returns = self.prices.pct_change().dropna()
        return self
        
    def calculate_style_factors(self):
        factors = {}
        factors['market_sensitivity'] = self._calculate_beta()
        factors['volatility'] = self._calculate_volatility()
        factors['momentum'] = self._calculate_momentum()
        factors['size'] = self._calculate_size_proxy()
        factors['liquidity'] = self._calculate_liquidity()
        return pd.DataFrame(factors, index=self.tickers)
        
    def _calculate_beta(self, window=252):
        weekly_returns = self.prices.resample('W').last().pct_change().dropna()
        market_returns = weekly_returns.mean(axis=1)
        betas = {}
        for ticker in self.tickers:
            if ticker in weekly_returns.columns:
                stock_returns = weekly_returns[ticker].dropna()
                if len(stock_returns) > 50:
                    covariance = stock_returns.cov(market_returns)
                    market_variance = market_returns.var()
                    betas[ticker] = covariance / market_variance if market_variance > 0 else 1.0
                else: betas[ticker] = 1.0
            else: betas[ticker] = 1.0
        return betas
        
    def _calculate_volatility(self, window=63):
        daily_returns = self.returns
        return daily_returns.std().to_dict()
        
    def _calculate_momentum(self):
        end_date = self.prices.index[-1]
        start_date = end_date - pd.DateOffset(months=11)
        momentum = {}
        for ticker in self.tickers:
            if ticker in self.prices.columns:
                recent_price = self.prices[ticker].iloc[-1]
                past_price = self.prices[ticker].loc[self.prices.index <= start_date].iloc[-1] if len(self.prices[ticker].loc[self.prices.index <= start_date]) > 0 else self.prices[ticker].iloc[0]
                momentum[ticker] = (recent_price / past_price - 1) if past_price > 0 else 0
            else: momentum[ticker] = 0
        return momentum
        
    def _calculate_size_proxy(self):
        return np.log(self.prices.iloc[-1]).to_dict()
        
    def _calculate_liquidity(self, window=63):
        daily_returns = self.returns.abs()
        liquidity = {}
        for ticker in self.tickers:
            if ticker in daily_returns.columns and ticker in self.volume.columns:
                avg_volume = self.volume[ticker].rolling(window).mean().iloc[-1]
                avg_return = daily_returns[ticker].rolling(window).mean().iloc[-1]
                liquidity[ticker] = avg_volume / (avg_return * self.prices[ticker].iloc[-1]) if avg_return > 0 else 0
            else: liquidity[ticker] = 0
        return liquidity
```

Run this first to set up the factor calculation framework.



In [None]:
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

class AxiomaStyleFactors:
    def __init__(self, tickers, period="2y"):
        self.tickers = tickers
        self.period = period
        self.data = None
        self.prices = None
        self.volume = None
        self.returns = None
        
    def fetch_data(self):
        self.data = yf.download(self.tickers, period=self.period)
        self.prices = self.data['Adj Close'] if len(self.tickers) > 1 else self.data[['Adj Close']]
        self.volume = self.data['Volume'] if len(self.tickers) > 1 else self.data[['Volume']]
        self.returns = self.prices.pct_change().dropna()
        return self
        
    def calculate_style_factors(self):
        factors = {}
        factors['market_sensitivity'] = self._calculate_beta()
        factors['volatility'] = self._calculate_volatility()
        factors['momentum'] = self._calculate_momentum()
        factors['size'] = self._calculate_size_proxy()
        factors['liquidity'] = self._calculate_liquidity()
        return pd.DataFrame(factors, index=self.tickers)
        
    def _calculate_beta(self, window=252):
        weekly_returns = self.prices.resample('W').last().pct_change().dropna()
        market_returns = weekly_returns.mean(axis=1)
        betas = {}
        for ticker in self.tickers:
            if ticker in weekly_returns.columns:
                stock_returns = weekly_returns[ticker].dropna()
                if len(stock_returns) > 50:
                    covariance = stock_returns.cov(market_returns)
                    market_variance = market_returns.var()
                    betas[ticker] = covariance / market_variance if market_variance > 0 else 1.0
                else: betas[ticker] = 1.0
            else: betas[ticker] = 1.0
        return betas
        
    def _calculate_volatility(self, window=63):
        daily_returns = self.returns
        return daily_returns.std().to_dict()
        
    def _calculate_momentum(self):
        end_date = self.prices.index[-1]
        start_date = end_date - pd.DateOffset(months=11)
        momentum = {}
        for ticker in self.tickers:
            if ticker in self.prices.columns:
                recent_price = self.prices[ticker].iloc[-1]
                past_price = self.prices[ticker].loc[self.prices.index <= start_date].iloc[-1] if len(self.prices[ticker].loc[self.prices.index <= start_date]) > 0 else self.prices[ticker].iloc[0]
                momentum[ticker] = (recent_price / past_price - 1) if past_price > 0 else 0
            else: momentum[ticker] = 0
        return momentum
        
    def _calculate_size_proxy(self):
        return np.log(self.prices.iloc[-1]).to_dict()
        
    def _calculate_liquidity(self, window=63):
        daily_returns = self.returns.abs()
        liquidity = {}
        for ticker in self.tickers:
            if ticker in daily_returns.columns and ticker in self.volume.columns:
                avg_volume = self.volume[ticker].rolling(window).mean().iloc[-1]
                avg_return = daily_returns[ticker].rolling(window).mean().iloc[-1]
                liquidity[ticker] = avg_volume / (avg_return * self.prices[ticker].iloc[-1]) if avg_return > 0 else 0
            else: liquidity[ticker] = 0
        return liquidity


ModuleNotFoundError: No module named 'yfinance'

In [None]:
pip install yfinance