<a href="https://colab.research.google.com/github/AvigdorFeldman/Collab/blob/main/Tirgul7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# user_service.py
class UserService:
    def __init__(self):
        self.users = {
            '1': {'id': '1', 'name': 'John Doe', 'email': 'john@example.com'},
            '2': {'id': '2', 'name': 'Jane Doe', 'email': 'jane@example.com'}
        }

    def get_user(self, user_id):
        return self.users.get(user_id, {})

# index_service.py
class IndexService:
    def __init__(self):
        self.documents = {}
        self.index = {}

    def add_document(self, doc_data):
        """Add a document to the index"""
        doc_id = str(len(self.documents) + 1)
        self.documents[doc_id] = {**doc_data, 'id': doc_id}

        # Create inverted index
        words = doc_data['content'].lower().split()
        for word in words:
            if word not in self.index:
                self.index[word] = set()
            self.index[word].add(doc_id)

        return self.documents[doc_id]

    def get_document(self, doc_id):
        """Retrieve a document by ID"""
        return self.documents.get(doc_id)

    def search_word(self, word):
        """Find documents containing a word"""
        word = word.lower()
        return list(self.index.get(word, set()))

# query_service.py
class QueryService:
    def __init__(self, index_service):
        self.index_service = index_service
        self.queries = {}

    def create_query(self, query_data):
        """Create and execute a search query with 'AND' and 'OR' logic"""
        try:
            query_id = str(len(self.queries) + 1)
            search_terms = [term.lower() for term in query_data['terms']] # Ensure terms are lowercase

            final_or_result_accumulator = set()
            current_and_group_results = None

            for term in search_terms:
                if term == 'or':
                    if current_and_group_results is not None:
                        final_or_result_accumulator.update(current_and_group_results)
                    current_and_group_results = None # Reset for the next 'AND' group
                else:
                    doc_ids_for_term = set(self.index_service.search_word(term))

                    if current_and_group_results is None:
                        current_and_group_results = doc_ids_for_term
                    else:
                        current_and_group_results &= doc_ids_for_term # Perform intersection for 'AND'

            # After the loop, add any remaining results from the last 'AND' group
            if current_and_group_results is not None:
                final_or_result_accumulator.update(current_and_group_results)

            # Create query record
            query = {
                'id': query_id,
                'terms': query_data['terms'],
                'results': list(final_or_result_accumulator),
                'timestamp': query_data.get('timestamp', 'now')
            }
            self.queries[query_id] = query
            return query

        except Exception as e:
            return {'error': str(e)}

# result_service.py
class ResultService:
    def __init__(self, index_service, query_service):
        self.index_service = index_service
        self.query_service = query_service
        self.results = {}

    def format_results(self, query_id):
        """Format search results for display"""
        try:
            query = self.query_service.queries.get(query_id)
            if not query:
                return {'error': 'Query not found'}

            formatted_results = []
            for doc_id in query['results']:
                doc = self.index_service.get_document(doc_id)
                if doc:
                    formatted_results.append({
                        'doc_id': doc_id,
                        'title': doc['title'],
                        'snippet': doc['content'][:100] + '...'
                    })

            result_id = str(len(self.results) + 1)
            result = {
                'id': result_id,
                'query_id': query_id,
                'formatted_results': formatted_results,
                'count': len(formatted_results)
            }
            self.results[result_id] = result
            return result

        except Exception as e:
            return {'error': str(e)}
class RankService:
  def __init__(self, result_service):
    self.result_service = result_service
    self.rankings = {}

  def _calc_rank_for_result(self, result):
    """Helper to calculate a simple rank based on result count."""
    if not result or 'count' not in result:
      return 0.0
    count = result['count']
    if count == 0:
      return 0.0
    # Example rank: higher count -> higher rank (closer to 1)
    return 1 - (1.0 / count) # Use 1.0 for float division

  def rank_results(self, result_id):
    """Ranks the formatted results for a given result_id."""
    # Access the results dictionary of the result_service instance
    result = self.result_service.results.get(result_id)
    if not result:
      return {'error': f'Result with ID {result_id} not found'}

    rank = self._calc_rank_for_result(result)

    # Store and return the rank
    self.rankings[result_id] = rank
    return {'result_id': result_id, 'rank': rank}


# main.py
def main():
    # Initialize services
    index_service = IndexService()
    query_service = QueryService(index_service)
    result_service = ResultService(index_service, query_service)
    rank_service = RankService(result_service)
    # Add sample documents
    doc1 = index_service.add_document({
        'title': 'Python Programming',
        'content': 'Python is a popular programming language for cloud computing'
    })
    doc2 = index_service.add_document({
        'title': 'Cloud Services',
        'content': 'Cloud computing enables scalable microservices architecture'
    })
    doc3 = index_service.add_document({
        'title': 'Serverless Functions',
        'content': 'Serverless functions are a core component of FaaS'
    })
    print(f"Added documents: {doc1['id']}, {doc2['id']}, {doc3['id']}")

    # Test with implicit AND
    query_and = query_service.create_query(
        {'terms': ['cloud', 'computing']}
    )
    print(f"Query (AND) results: {query_and}")

    # Test with explicit OR
    query_or = query_service.create_query(
        {'terms': ['python', 'or', 'serverless']}
    )
    print(f"Query (OR) results: {query_or}")

    # Test with AND and OR combined
    query_and_or = query_service.create_query(
        {'terms': ['cloud', 'or', 'python', 'programming']}
    )
    print(f"Query (AND + OR) results: {query_and_or}")

    # Format and rank results for an example query
    formatted_results = result_service.format_results(query_and_or['id'])
    print(f"Formatted results for Query (AND + OR): {formatted_results}")

    ranked_output = rank_service.rank_results(formatted_results['id'])
    print(f"Ranked results for Query (AND + OR): {ranked_output}")

if __name__ == "__main__":
    main()

# Example test cases
def test_search():
    # Initialize services
    index_service = IndexService()
    query_service = QueryService(index_service)
    result_service = ResultService(index_service, query_service)
    rank_service = RankService(result_service)
    # Test document indexing
    doc = index_service.add_document({
        'title': 'Test Document',
        'content': 'This is a test document about microservices'
    })
    assert doc['id'] == '1', "Initial document ID should be 1"

    # Test search functionality (implicit AND)
    query_and_test = query_service.create_query({
        'terms': ['test', 'microservices']
    })
    assert len(query_and_test['results']) == 1, "Query with AND should find 1 document"
    assert '1' in query_and_test['results'], "Query with AND should find doc1"

    # Add new documents for OR testing
    doc_apple_banana = index_service.add_document({'title': 'Fruit Salad', 'content': 'Apple and banana make a great fruit salad'})
    assert doc_apple_banana['id'] == '2', "Second document ID should be 2"

    doc_orange = index_service.add_document({'title': 'Citrus Delight', 'content': 'Orange is a citrus fruit'})
    assert doc_orange['id'] == '3', "Third document ID should be 3"

    # Test with explicit OR
    query_or_explicit = query_service.create_query(
        {'terms': ['apple', 'or', 'orange']}
    )
    assert len(query_or_explicit['results']) == 2, f"Query with OR ('apple' or 'orange') should find 2 documents, found {len(query_or_explicit['results'])}"
    assert '2' in query_or_explicit['results'] and '3' in query_or_explicit['results'], "Query with OR should find doc2 and doc3"

    # Test with combined AND and OR
    # Expect to find doc2 (apple AND banana) and doc3 (orange)
    query_and_or_combined = query_service.create_query(
        {'terms': ['apple', 'banana', 'or', 'orange']}
    )
    assert len(query_and_or_combined['results']) == 2, f"Combined AND/OR query should find 2 documents, found {len(query_and_or_combined['results'])}"
    assert '2' in query_and_or_combined['results'] and '3' in query_and_or_combined['results'], "Combined AND/OR query should find doc2 and doc3"

    # Test result formatting
    results = result_service.format_results(query_and_test['id'])
    assert results['count'] == 1, "Formatted results count should be 1"
    assert 'test document' in results['formatted_results'][0]['snippet'].lower(), "Formatted results snippet should contain 'test document'"

    # Test rank service
    print(rank_service.rank_results(results['id'])) # Pass the result_id

if __name__ == "__main__":
    test_search()


Added documents: 1, 2
Query results: {'id': '1', 'terms': ['cloud', 'computing'], 'results': ['2', '1'], 'timestamp': 'now'}
Formatted results: {'id': '1', 'query_id': '1', 'formatted_results': [{'doc_id': '2', 'title': 'Cloud Services', 'snippet': 'Cloud computing enables scalable microservices architecture...'}, {'doc_id': '1', 'title': 'Python Programming', 'snippet': 'Python is a popular programming language for cloud computing...'}], 'count': 2}
Ranked results: {'result_id': '1', 'rank': 0.5}
{'result_id': '1', 'rank': 0.0}


In [None]:


"""
Function as a Service (FaaS) Architecture Demo: Search Engine Implementation

This code demonstrates key FaaS principles through a simple search engine implementation.
It simulates how serverless functions would operate in a cloud environment.

Key FaaS Characteristics Demonstrated:
1. Stateless Functions - Each invocation is independent
2. Event-Driven Architecture - Functions respond to specific triggers
3. Single Responsibility - Each function performs one specific task
4. Automatic Scaling (simulated) - Functions can handle multiple concurrent requests
"""

class IndexerFunction:

    def __init__(self):
        # In real FaaS, this would be external storage
        self.index = {}

    def handle(self, event):
        """
        Function entry point - similar to AWS Lambda handler

        Args:
            event (dict): Contains document to be indexed
                {
                    'document_id': str - Unique document identifier
                    'content': str - Document content to index
                }

        Returns:
            dict: Indexing operation results
                {
                    'status': str - Operation status
                    'indexed_words': int - Number of words processed
                }
        """
        doc_id = event['document_id']
        content = event['content'].lower().split()

        # Build inverted index
        for word in content:
            if word not in self.index:
                self.index[word] = set()
            self.index[word].add(doc_id)

        return {
            'status': 'success',
            'indexed_words': len(content)
        }

class SearcherFunction:
    """
    Simulates a FaaS search function.

    FaaS Characteristics:
    - Event-driven: Responds to search requests
    - Stateless: Each search is independent
    - Scalable: Multiple instances can handle concurrent searches


    """
    def __init__(self, index_service):
        self.index = index_service.index

    def handle(self, event):
        """
        Search function entry point

        Args:
            event (dict): Contains search parameters
                {
                    'query': str - Search terms
                }

        Returns:
            dict: Search results
                {
                    'status': str - Operation status
                    'results': list - Matching document IDs
                }

        Note: In real FaaS:
        - Would include error handling
        - Would implement timeouts
        - Would include logging/monitoring
        """
        terms = event['query'].lower().split()
        results = set()

        for term in terms:
            if term in self.index:
                if not results:
                    results = self.index[term].copy()
                else:
                    results &= self.index[term]

        return {
            'status': 'success',
            'results': list(results)
        }

class FaaSSimulator:
    """
    Simulates a FaaS environment.

    Demonstrates:
    - Function isolation
    - Event-based invocation
    - Resource management (simulated)
    - Function routing

    In real FaaS platforms (like AWS Lambda):
    - Functions run in isolated containers
    - Resources are automatically managed
    - Scaling happens automatically
    - Includes monitoring and logging
    """
    def __init__(self):
        self.indexer = IndexerFunction()
        self.searcher = SearcherFunction(self.indexer)
        self.invocations = 0

    def invoke(self, function_name, event):
        """
        Simulates FaaS function invocation

        Args:
            function_name (str): Name of function to invoke
            event (dict): Event data for the function

        Real FaaS differences:
        - Would spawn new container/instance
        - Would handle concurrent requests
        - Would implement timeout limits
        - Would include error handling
        """
        self.invocations += 1

        if function_name == 'indexer':
            return self.indexer.handle(event)
        elif function_name == 'searcher':
            return self.searcher.handle(event)
        else:
            raise ValueError(f"Unknown function: {function_name}")

def demonstrate_faas():
    """
    Demonstrates key FaaS concepts through example usage

    Shows:
    1. Event-driven invocation
    2. Function independence
    3. Scalability potential
    """
    # Test data setup
    test_documents = [
        {
            'document_id': 'doc1',
            'content': 'Python is a popular programming language for cloud computing'
        },
        {
            'document_id': 'doc2',
            'content': 'Cloud computing enables scalable microservices architecture'
        }
    ]

    # Initialize FaaS environment
    faas = FaaSSimulator()

    # Demonstrate event-driven invocation
    print("1. Event-Driven Invocation:")
    for doc in test_documents:
        result = faas.invoke('indexer', doc)
        print(f"  Indexed document {doc['document_id']}: {result}")

    # Demonstrate independent function calls
    print("\n2. Independent Function Calls:")
    search_queries = [
        {'query': 'cloud computing'},
        {'query': 'python programming'}
    ]

    for query in search_queries:
        result = faas.invoke('searcher', query)
        print(f"  Search results for '{query['query']}': {result}")

    # Demonstrate scalability concept
    print(f"\n3. Scalability Demonstration:")
    print(f"  Total function invocations: {faas.invocations}")
    print("  In real FaaS: These would execute in parallel with automatic scaling")

if __name__ == "__main__":
    demonstrate_faas()


1. Event-Driven Invocation:
  Indexed document doc1: {'status': 'success', 'indexed_words': 9}
  Indexed document doc2: {'status': 'success', 'indexed_words': 6}

2. Independent Function Calls:
  Search results for 'cloud computing': {'status': 'success', 'results': ['doc1', 'doc2']}
  Search results for 'python programming': {'status': 'success', 'results': ['doc1']}

3. Scalability Demonstration:
  Total function invocations: 4
  In real FaaS: These would execute in parallel with automatic scaling


# Task
Modify the `create_query` method in `QueryService` to parse search terms for an explicit 'OR' operator. When 'OR' is found, combine the results of the preceding and subsequent terms using set union. For terms not separated by 'OR', continue to treat them as an implicit 'AND' (set intersection).

## Implement OR Operator in QueryService

### Subtask:
Modify the `create_query` method in `QueryService` to parse search terms for an explicit 'OR' operator. When 'OR' is found, combine the results of the preceding and subsequent terms using set union. For terms not separated by 'OR', continue to treat them as an implicit 'AND' (set intersection).


**Reasoning**:
The subtask requires modifying the `create_query` method in `QueryService` to implement 'OR' operator logic. I will provide the updated code for the `QueryService` class.



In [4]:
# user_service.py
class UserService:
    def __init__(self):
        self.users = {
            '1': {'id': '1', 'name': 'John Doe', 'email': 'john@example.com'},
            '2': {'id': '2', 'name': 'Jane Doe', 'email': 'jane@example.com'}
        }

    def get_user(self, user_id):
        return self.users.get(user_id, {})

# index_service.py
class IndexService:
    def __init__(self):
        self.documents = {}
        self.index = {}

    def add_document(self, doc_data):
        """Add a document to the index"""
        doc_id = str(len(self.documents) + 1)
        self.documents[doc_id] = {**doc_data, 'id': doc_id}

        # Create inverted index
        words = doc_data['content'].lower().split()
        for word in words:
            if word not in self.index:
                self.index[word] = set()
            self.index[word].add(doc_id)

        return self.documents[doc_id]

    def get_document(self, doc_id):
        """Retrieve a document by ID"""
        return self.documents.get(doc_id)

    def search_word(self, word):
        """Find documents containing a word"""
        word = word.lower()
        return list(self.index.get(word, set()))

# query_service.py
class QueryService:
    def __init__(self, index_service):
        self.index_service = index_service
        self.queries = {}

    def create_query(self, query_data):
        """Create and execute a search query with 'AND' and 'OR' logic"""
        try:
            query_id = str(len(self.queries) + 1)
            search_terms = [term.lower() for term in query_data['terms']] # Ensure terms are lowercase

            final_or_result_accumulator = set()
            current_and_group_results = None

            for term in search_terms:
                if term == 'or':
                    if current_and_group_results is not None:
                        final_or_result_accumulator.update(current_and_group_results)
                    current_and_group_results = None # Reset for the next 'AND' group
                else:
                    doc_ids_for_term = set(self.index_service.search_word(term))

                    if current_and_group_results is None:
                        current_and_group_results = doc_ids_for_term
                    else:
                        current_and_group_results &= doc_ids_for_term # Perform intersection for 'AND'

            # After the loop, add any remaining results from the last 'AND' group
            if current_and_group_results is not None:
                final_or_result_accumulator.update(current_and_group_results)

            # Create query record
            query = {
                'id': query_id,
                'terms': query_data['terms'],
                'results': list(final_or_result_accumulator),
                'timestamp': query_data.get('timestamp', 'now')
            }
            self.queries[query_id] = query
            return query

        except Exception as e:
            return {'error': str(e)}

# result_service.py
class ResultService:
    def __init__(self, index_service, query_service):
        self.index_service = index_service
        self.query_service = query_service
        self.results = {}

    def format_results(self, query_id):
        """Format search results for display"""
        try:
            query = self.query_service.queries.get(query_id)
            if not query:
                return {'error': 'Query not found'}

            formatted_results = []
            for doc_id in query['results']:
                doc = self.index_service.get_document(doc_id)
                if doc:
                    formatted_results.append({
                        'doc_id': doc_id,
                        'title': doc['title'],
                        'snippet': doc['content'][:100] + '...'
                    })

            result_id = str(len(self.results) + 1)
            result = {
                'id': result_id,
                'query_id': query_id,
                'formatted_results': formatted_results,
                'count': len(formatted_results)
            }
            self.results[result_id] = result
            return result

        except Exception as e:
            return {'error': str(e)}
class RankService:
  def __init__(self, result_service):
    self.result_service = result_service
    self.rankings = {}

  def _calc_rank_for_result(self, result):
    """Helper to calculate a simple rank based on result count."""
    if not result or 'count' not in result:
      return 0.0
    count = result['count']
    if count == 0:
      return 0.0
    # Example rank: higher count -> higher rank (closer to 1)
    return 1 - (1.0 / count) # Use 1.0 for float division

  def rank_results(self, result_id):
    """Ranks the formatted results for a given result_id."""
    # Access the results dictionary of the result_service instance
    result = self.result_service.results.get(result_id)
    if not result:
      return {'error': f'Result with ID {result_id} not found'}

    rank = self._calc_rank_for_result(result)

    # Store and return the rank
    self.rankings[result_id] = rank
    return {'result_id': result_id, 'rank': rank}


# main.py
def main():
    # Initialize services
    index_service = IndexService()
    query_service = QueryService(index_service)
    result_service = ResultService(index_service, query_service)
    rank_service = RankService(result_service)
    # Add sample documents
    doc1 = index_service.add_document({
        'title': 'Python Programming',
        'content': 'Python is a popular programming language for cloud computing'
    })
    doc2 = index_service.add_document({
        'title': 'Cloud Services',
        'content': 'Cloud computing enables scalable microservices architecture'
    })
    doc3 = index_service.add_document({
        'title': 'Serverless Functions',
        'content': 'Serverless functions are a core component of FaaS'
    })
    print(f"Added documents: {doc1['id']}, {doc2['id']}, {doc3['id']}")

    # Test with implicit AND
    query_and = query_service.create_query(
        {'terms': ['cloud', 'computing']}
    )
    print(f"Query (AND) results: {query_and}")

    # Test with explicit OR
    query_or = query_service.create_query(
        {'terms': ['python', 'or', 'serverless']}
    )
    print(f"Query (OR) results: {query_or}")

    # Test with AND and OR combined
    query_and_or = query_service.create_query(
        {'terms': ['cloud', 'or', 'python', 'programming']}
    )
    print(f"Query (AND + OR) results: {query_and_or}")

    # Format and rank results for an example query
    formatted_results = result_service.format_results(query_and_or['id'])
    print(f"Formatted results for Query (AND + OR): {formatted_results}")

    ranked_output = rank_service.rank_results(formatted_results['id'])
    print(f"Ranked results for Query (AND + OR): {ranked_output}")

if __name__ == "__main__":
    main()

Added documents: 1, 2, 3
Query (AND) results: {'id': '1', 'terms': ['cloud', 'computing'], 'results': ['2', '1'], 'timestamp': 'now'}
Query (OR) results: {'id': '2', 'terms': ['python', 'or', 'serverless'], 'results': ['3', '1'], 'timestamp': 'now'}
Query (AND + OR) results: {'id': '3', 'terms': ['cloud', 'or', 'python', 'programming'], 'results': ['1', '2'], 'timestamp': 'now'}
Formatted results for Query (AND + OR): {'id': '1', 'query_id': '3', 'formatted_results': [{'doc_id': '1', 'title': 'Python Programming', 'snippet': 'Python is a popular programming language for cloud computing...'}, {'doc_id': '2', 'title': 'Cloud Services', 'snippet': 'Cloud computing enables scalable microservices architecture...'}], 'count': 2}
Ranked results for Query (AND + OR): {'result_id': '1', 'rank': 0.5}
