In [2]:
pip install groq eventregistry transformers

Collecting groq
  Downloading groq-0.13.1-py3-none-any.whl.metadata (14 kB)
Collecting eventregistry
  Downloading eventregistry-9.1.tar.gz (59 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/59.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading groq-0.13.1-py3-none-any.whl (109 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: eventregistry
  Building wheel for eventregistry (setup.py) ... [?25l[?25hdone
  Created wheel for eventregistry: filename=eventregistry-9.1-py3-none-any.whl size=67624 sha256=963f77419f445b87a4d5e913b773673f954bc6ce4f013ddd27e955e57b04d811
  Stored in directory: /root/.cache/pip/wheels/43/b1/a3/96973dbeb71bb960bd053bfc7113194a3c35859407e2

In [5]:
import requests
import pandas as pd
from datetime import datetime as dt
from datetime import timedelta
from groq import Groq
from transformers import pipeline, AutoTokenizer
import json
import os
from eventregistry import *

# API Configuration
GROQ_API_KEY = "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"
EVENT_REGISTRY_API_KEY = "c3892498-706c-443a-a9a7-b194c52887b7"

# Model names
SENTIMENT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"

# Initialize Groq client
def initialize_groq():
    return Groq(api_key=GROQ_API_KEY)

def initialize_sentiment_analyzer():
    tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL)
    sentiment_pipeline = pipeline(
        "sentiment-analysis",
        model=SENTIMENT_MODEL,
        tokenizer=tokenizer
    )
    return sentiment_pipeline, tokenizer

def truncate_for_model(text, tokenizer, max_length=512):
    """Truncate text to fit within model's token limit"""
    tokens = tokenizer.encode(text, truncation=False)
    if len(tokens) > max_length:
        tokens = tokens[:max_length-1] + [tokenizer.sep_token_id]
        text = tokenizer.decode(tokens, skip_special_tokens=True)
    return text

def truncate_for_llama(text, max_length=900):
    """Truncate text for LLaMA model"""
    words = text.split()
    if len(words) > max_length:
        return ' '.join(words[:max_length]) + "..."
    return text

# Function to fetch news data from Event Registry
def fetch_news():
    try:
        # Initialize EventRegistry
        er = EventRegistry(apiKey=EVENT_REGISTRY_API_KEY)

        # Create query for articles
        q = QueryArticlesIter(
            keywords = QueryItems.OR([
       "Lithium - Ion", "Batteries", "Electric Vehicles"
    "Lithium shortage",
    "Lithium",
    "Cobalt",
       "Mineral Mining"
            ]),
            dateStart = (dt.now() - timedelta(days=7)).strftime('%Y-%m-%d'),
            dateEnd = dt.now().strftime('%Y-%m-%d'),
            dataType = ["news", "blog"],
            lang = "eng"
        )

        # Execute query and collect results
        articles = []
        for article in q.execQuery(er, sortBy="date", maxItems=20):
            articles.append({
                "source": {
                    "title": article.get("source", {}).get("title", "")
                },
                "title": article.get("title", ""),
                "body": article.get("body", ""),
                "dateTime": article.get("dateTime", "")
            })

        return {"articles": {"results": articles}}
    except Exception as e:
        print(f"Error fetching news: {e}")
        return None

# Risk analysis with Groq LLaMa
def analyze_risk_with_llama(content, client):
    try:
        truncated_content = truncate_for_llama(content)

        prompt = f"""Analyze the following news article for lithium-ion battery supply chain risks.

        Consider these specific factors:
        1. Raw Material Risks:
           - Lithium, cobalt, nickel, and other critical material availability
           - Price fluctuations and market dynamics
           - Geopolitical tensions affecting material access

        2. Manufacturing Risks:
           - Production capacity issues
           - Quality control challenges
           - Technology changes or innovations

        3. Geographic Risks:
           - Regional concentration of suppliers
           - Political instability in key regions
           - Trade restrictions or policy changes

        4. Industry Impact:
           - Effects on EV and energy storage markets
           - Impact on battery manufacturers
           - Downstream effects on dependent industries

        5. Mitigation Strategies:
           - Alternative materials or technologies
           - Supply diversification opportunities
           - Strategic stockpiling considerations

        Article: {truncated_content}

        Provide a structured analysis of the identified risks and their potential impact on the lithium-ion battery supply chain."""

        completion = client.chat.completions.create(
            model="llama3-70b-8192",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=1024,
            top_p=1,
            stream=False
        )

        return completion.choices[0].message.content
    except Exception as e:
        print(f"Error with Groq LLaMa: {e}")
        return "Error in risk analysis"

# Sentiment analysis with proper truncation
def analyze_sentiment_with_model(content, sentiment_pipeline, tokenizer):
    try:
        # Properly truncate content for the model
        truncated_content = truncate_for_model(content, tokenizer)

        # Get sentiment prediction
        result = sentiment_pipeline(truncated_content)[0]

        # Format the result
        return {
            "label": result["label"],
            "score": float(result["score"]),
            "analysis": f"Sentiment: {result['label']} (confidence: {result['score']:.2f})"
        }
    except Exception as e:
        print(f"Error with sentiment analysis: {e}")
        return {
            "label": "ERROR",
            "score": 0.0,
            "analysis": "Error in sentiment analysis"
        }

# Aggregate data into structured format
def aggregate_data(news_data):
    try:
        structured_data = []
        for article in news_data.get('articles', {}).get('results', []):
            structured_data.append({
                "source": article.get('source', {}).get('title', ''),
                "title": article.get('title', ''),
                "description": article.get('body', ''),
                "content": article.get('body', ''),
                "published_at": article.get('dateTime', '')
            })
        return pd.DataFrame(structured_data)
    except Exception as e:
        print(f"Error structuring data: {e}")
        return None

# Main pipeline
def main():
    # Initialize models
    groq_client = initialize_groq()
    sentiment_pipeline, tokenizer = initialize_sentiment_analyzer()

    # Fetch news data
    news_data = fetch_news()
    if not news_data:
        return

    # Aggregate data into structured format
    structured_data = aggregate_data(news_data)
    if structured_data is None or structured_data.empty:
        print("No data to analyze")
        return

    # Create output directory if it doesn't exist
    output_dir = "analysis_results"
    os.makedirs(output_dir, exist_ok=True)

    # Analyze risk and sentiment
    results = []
    for idx, row in structured_data.iterrows():
        print(f"\nAnalyzing article {idx + 1}/{len(structured_data)}: {row['title']}")

        # Perform analyses
        risk_analysis = analyze_risk_with_llama(row['content'], groq_client)
        sentiment_analysis = analyze_sentiment_with_model(row['content'], sentiment_pipeline, tokenizer)

        # Store results
        results.append({
            'title': row['title'],
            'source': row['source'],
            'published_at': row['published_at'],
            'risk_analysis': risk_analysis,
            'sentiment_analysis': sentiment_analysis
        })

        # Save interim results
        with open(f"{output_dir}/analysis_results.json", 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=4, ensure_ascii=False)

        print(f"Sentiment: {sentiment_analysis['label']} (confidence: {sentiment_analysis['score']:.2f})")
        print("Analysis saved to analysis_results.json")

if __name__ == "__main__":
    main()

Device set to use cpu



Analyzing article 1/20: Weeks after Ratan Tata's death, Tata Group boss N Chandrasekaran makes big announcement on jobs
Sentiment: POSITIVE (confidence: 0.99)
Analysis saved to analysis_results.json

Analyzing article 2/20: Scientists develop 'sustainable shield' tech on quest to harness limitless energy source: '[It] could play a vital role in the future'


Token indices sequence length is longer than the specified maximum sequence length for this model (616 > 512). Running this sequence through the model will result in indexing errors


Sentiment: POSITIVE (confidence: 0.99)
Analysis saved to analysis_results.json

Analyzing article 3/20: The Most Important Breakthroughs of 2024
Sentiment: POSITIVE (confidence: 0.99)
Analysis saved to analysis_results.json

Analyzing article 4/20: E-scooter ban reminder for rail passengers on the Airedale line
Sentiment: NEGATIVE (confidence: 0.99)
Analysis saved to analysis_results.json

Analyzing article 5/20: I Switched From An IPhone 12 Pro To An IPhone 16 Pro - Here Are 5 Things I Love And 3 Disappointments - Ny Breaking News
Sentiment: POSITIVE (confidence: 1.00)
Analysis saved to analysis_results.json

Analyzing article 6/20: Weeks after Ratan Tata's death, Tata Group boss N Chandrasekaran makes big announcement on jobs
Sentiment: POSITIVE (confidence: 0.99)
Analysis saved to analysis_results.json

Analyzing article 7/20: Little Rock's high-end home sales for December 2-6: Six of the most expensive properties | Northwest Arkansas Democrat-Gazette
Sentiment: NEGATIVE (confidence