In [1]:
from utils.my_llm_utils import *
from utils.my_utils import *
from utils.tavily_search_utils import web_search
import pandas as pd
from datetime import datetime, timedelta
import time

In [2]:
def get_existing_partnerships(csv_path):
    """Read existing partnerships and create a set of normalized partner pairs"""
    df = pd.read_csv(csv_path)
    existing_pairs = set()
    
    for _, row in df.iterrows():
        partner1 = str(row['partner1']).lower().strip()
        partner2 = str(row['partner2']).lower().strip()
        # Store pairs in both orders to handle case-insensitive matching
        existing_pairs.add(frozenset([partner1, partner2]))
    
    return existing_pairs

In [15]:
#search_query = "company announces AI partnership with company"
    
#search_results = web_search(search_query, max_results=5)

# for result in search_results['results']:
#              content = result.get('content', '').lower()
#              title = result.get('title', '').lower()
#              url = result.get('url')

companies = extract_companies_from_text(content + " " + title)

# if not search_results or not search_results.get('results'):
#     continue
        
#         # Process each result
#         for result in search_results['results']:
#             content = result.get('content', '').lower()
#             title = result.get('title', '').lower()
#             url = result.get('url')
            
#             # Skip if content is too short
#             if len(content) < 50:
#                 continue
                
#             # Look for company names in the content
#             companies = extract_companies_from_text(content + " " + title)
            
#             # Skip if we don't find at least 2 companies
#             if len(companies) < 2:
#                 continue
            
#             # Check each pair of companies
#             for i in range(len(companies)):
#                 for j in range(i + 1, len(companies)):
#                     partner1 = companies[i]
#                     partner2 = companies[j]
                    
#                     # Skip if either company name is too short
#                     if len(partner1) < 3 or len(partner2) < 3:
#                         continue
                    
#                     # Check if this partnership is new
#                     pair = frozenset([partner1.lower().strip(), partner2.lower().strip()])
#                     if pair not in existing_pairs:
#                         # Extract date
#                         date = extract_date_from_text(content)
                        
#                         # Add to new partnerships
#                         new_partnerships.append({
#                             'partner1': partner1,
#                             'partner2': partner2,
#                             'When announced': date,
#                             'Link': url,
#                             'raw_content': content
#                         })
                        
#                         # Add to existing pairs to avoid duplicates
#                         existing_pairs.add(pair)
                        
#                         print(f"Found new partnership: {partner1} and {partner2}")
        
#         # Add a small delay between queries to avoid rate limits
#         time.sleep(1)
    
#     return new_partnerships

In [3]:
def search_new_partnerships(existing_pairs, days_back=30):
    """Search for new AI partnerships using Tavily"""
    new_partnerships = []
    
    # Multiple search queries to increase coverage
    search_queries = [
        f"company announces AI partnership with company"
        # f"company expands partnership with company AI",
        # f"company and company announce AI partnership",
        # f"company partners with company AI",
        # f"company collaborates with company artificial intelligence",
        # f"company teams up with company AI",
        # f"company joins forces with company AI",
        # f"company strategic partnership with company AI",
        # f"company new partnership with company artificial intelligence",
        # f"company alliance with company AI"
    ]
    
    # Process each search query
    for query in search_queries:
        print(f"\nSearching with query: {query}")
        search_results = web_search(query, max_results=5)
        
        if not search_results or not search_results.get('results'):
            continue
        
        # Process each result
        for result in search_results['results']:
            content = result.get('content', '').lower()
            title = result.get('title', '').lower()
            url = result.get('url')
            
            # Skip if content is too short
            if len(content) < 50:
                continue
                
            # Look for company names in the content
            companies = extract_companies_from_text(content + " " + title)
            
            # Skip if we don't find at least 2 companies
            if len(companies) < 2:
                continue
            
            # Check each pair of companies
            for i in range(len(companies)):
                for j in range(i + 1, len(companies)):
                    partner1 = companies[i]
                    partner2 = companies[j]
                    
                    # Skip if either company name is too short
                    if len(partner1) < 3 or len(partner2) < 3:
                        continue
                    
                    # Check if this partnership is new
                    pair = frozenset([partner1.lower().strip(), partner2.lower().strip()])
                    if pair not in existing_pairs:
                        # Extract date
                        date = extract_date_from_text(content)
                        
                        # Add to new partnerships
                        new_partnerships.append({
                            'partner1': partner1,
                            'partner2': partner2,
                            'When announced': date,
                            'Link': url,
                            'raw_content': content
                        })
                        
                        # Add to existing pairs to avoid duplicates
                        existing_pairs.add(pair)
                        
                        print(f"Found new partnership: {partner1} and {partner2}")
        
        # Add a small delay between queries to avoid rate limits
        time.sleep(1)
    
    return new_partnerships

In [4]:
def extract_companies_from_text(text):
    """Extract potential company names from text"""
    companies = []
    
    # Common company indicators and partnership phrases
    indicators = [
        'announces', 'announced', 'partners with', 'collaborates with', 'teams up with',
        'partnership with', 'collaboration with', 'working with', 'expands partnership with',
        'joins forces with', 'alliance with', 'strategic partnership', 'new partnership',
        'recent partnership', 'partnership between', 'collaboration between', 'teaming up with',
        'and', 'with'  # Added simple connectors
    ]
    
    # Split text into sentences
    sentences = text.replace('!', '.').replace('?', '.').split('.')
    
    for sentence in sentences:
        sentence = sentence.strip()
        if not sentence:
            continue
            
        # Check for company indicators
        for indicator in indicators:
            if indicator in sentence:
                # Split sentence around the indicator
                parts = sentence.split(indicator)
                if len(parts) > 1:
                    # Process each part
                    for part in parts:
                        # Look for capitalized words that might be company names
                        words = part.split()
                        company_name = []
                        
                        for i in range(len(words)):
                            word = words[i]
                            # Check if word is capitalized and not a common word
                            if (word.istitle() and len(word) > 2 and 
                                word not in ['The', 'And', 'With', 'For', 'From', 'This', 'That', 'Announces', 'Announced']):
                                company_name.append(word)
                            
                            # If we have a company name and hit a non-capitalized word, save the company
                            elif company_name:
                                companies.append(' '.join(company_name))
                                company_name = []
                        
                        # Add any remaining company name
                        if company_name:
                            companies.append(' '.join(company_name))
    
    # Clean up company names
    cleaned_companies = []
    for company in companies:
        # Remove common words from the end
        company = company.strip()
        if company.endswith((' Inc', ' Ltd', ' LLC', ' Corp', ' Corporation')):
            company = company.rsplit(' ', 1)[0]
        # Remove common words from the beginning
        if company.startswith(('The ', 'A ', 'An ')):
            company = company.split(' ', 1)[1]
        cleaned_companies.append(company)
    
    return list(set(cleaned_companies))  # Remove duplicates

In [5]:
# Configuration
input_csv = "data/AI Partnerships.csv"
output_csv = "data/new_partnerships.csv"
days_back = 30  # Search for partnerships from the last 30 days
    
# Get existing partnerships
existing_pairs = get_existing_partnerships(input_csv)
print(f"Found {len(existing_pairs)} existing partnerships")

Found 732 existing partnerships


In [6]:
# Search for new partnerships
new_partnerships = search_new_partnerships(existing_pairs, days_back)
print(f"Found {len(new_partnerships)} new partnerships")


Searching with query: company announces AI partnership with company
Found 0 new partnerships


In [9]:
if new_partnerships:
    # Create DataFrame for new partnerships
    df_new = pd.DataFrame(new_partnerships)
        
    # Process new partnerships to get summaries
    df_new = update_partnerships(df_new, output_csv)
        
    print(f"\nNew partnerships saved to {output_csv}")
else:
    print("No new partnerships found")

No new partnerships found
