1. NoSQL Databases:
   a. Write a Python program that connects to a MongoDB database and inserts a new document into a collection named "students". The document should include fields such as "name", "age", and "grade". Print a success message after the insertion.
   b. Implement a Python function that connects to a Cassandra database and inserts a new record into a table named "products". The record should contain fields like "id", "name", and "price". Handle any potential errors that may occur during the insertion.




In [None]:
from pymongo import MongoClient

# Establish a connection to MongoDB
client = MongoClient('mongodb://localhost:27017/')

# Access the database
db = client['your_database_name']

# Access the collection
collection = db['students']

# Create a new document
document = {
    'name': 'John Doe',
    'age': 20,
    'grade': 'A'
}

# Insert the document into the collection
collection.insert_one(document)

# Print success message
print("Document inserted successfully.")


In [None]:
# Python function to insert a record into Cassandra:
from cassandra.cluster import Cluster
from cassandra import ConsistencyLevel
from cassandra.query import SimpleStatement

def insert_into_cassandra(id, name, price):
    try:
        # Establish a connection to Cassandra
        cluster = Cluster(['localhost'])
        session = cluster.connect()

        # Specify the keyspace
        session.set_keyspace('your_keyspace_name')

        # Prepare the query
        query = "INSERT INTO products (id, name, price) VALUES (%s, %s, %s)"
        prepared_query = session.prepare(query)

        # Set the consistency level
        prepared_query.consistency_level = ConsistencyLevel.ONE

        # Bind the parameters and execute the query
        session.execute(prepared_query, (id, name, price))

        # Print success message
        print("Record inserted successfully.")
    except Exception as e:
        # Handle any potential errors
        print("Error inserting record:", e)

    finally:
        # Close the connection
        session.shutdown()
        cluster.shutdown()


2. Document-oriented NoSQL Databases:
   a. Given a MongoDB collection named "books", write a Python function that fetches all the books published in the last year and prints their titles and authors.
   b. Design a schema for a document-oriented NoSQL database to store customer information for an e-commerce platform. Write a Python program to insert a new customer document into the database and handle any necessary validations.


In [None]:
# Python function to fetch books published in the last year from MongoDB:
from pymongo import MongoClient
from datetime import datetime, timedelta

def fetch_recent_books():
    try:
        # Establish a connection to MongoDB
        client = MongoClient('mongodb://localhost:27017/')

        # Access the database
        db = client['your_database_name']

        # Access the collection
        collection = db['books']

        # Calculate the date one year ago from today
        one_year_ago = datetime.now() - timedelta(days=365)

        # Fetch books published in the last year
        query = {"publication_date": {"$gte": one_year_ago}}
        projection = {"_id": 0, "title": 1, "author": 1}
        books = collection.find(query, projection)

        # Print the titles and authors of the books
        for book in books:
            print("Title:", book["title"])
            print("Author:", book["author"])
            print()

    except Exception as e:
        # Handle any potential errors
        print("Error fetching books:", e)

    finally:
        # Close the connection
        client.close()


In [None]:
#When designing a document-oriented NoSQL database schema, we need to consider the structure of the customer information you want to store. Here's an example schema for an e-commerce platform:

{
    "customer_id": "<unique identifier>",
    "name": {
        "first_name": "<first name>",
        "last_name": "<last name>"
    },
    "email": "<email address>",
    "address": {
        "street": "<street address>",
        "city": "<city>",
        "state": "<state>",
        "country": "<country>",
        "postal_code": "<postal code>"
    },
    "phone": "<phone number>",
    "registration_date": "<registration date>"
}

#To insert a new customer document into the database and handle validations, you can use the following Python program:
from pymongo import MongoClient
from datetime import datetime

def insert_new_customer(customer_data):
    try:
        # Establish a connection to MongoDB
        client = MongoClient('mongodb://localhost:27017/')

        # Access the database
        db = client['your_database_name']

        # Access the collection
        collection = db['customers']

        # Validate customer data
        if not validate_customer_data(customer_data):
            print("Invalid customer data. Aborting insertion.")
            return

        # Set the registration date
        customer_data['registration_date'] = datetime.now()

        # Insert the customer document
        collection.insert_one(customer_data)

        # Print success message
        print("Customer document inserted successfully.")

    except Exception as e:
        # Handle any potential errors
        print("Error inserting customer document:", e)

    finally:
        # Close the connection
        client.close()

def validate_customer_data(customer_data):
    # Implement your validation logic here
    # Return True if the customer data is valid, False otherwise
    # You can perform checks such as required fields, data types, length limits, etc.
    # Example validation logic:
    if 'name' not in customer_data:
        return False
    if 'email' not in customer_data:
        return False
    # Add more validation checks as needed
    return True



3. High Availability and Fault Tolerance:
   a. Explain the concept of replica sets in MongoDB. Write a Python program that connects to a MongoDB replica set and retrieves the status of the primary and secondary nodes.

   Ans.a. Replica sets in MongoDB are a mechanism for achieving high availability and fault tolerance. A replica set consists of multiple MongoDB instances, where one instance acts as the primary node and the others act as secondary nodes. The primary node handles all write operations and replicates the data changes to the secondary nodes. If the primary node becomes unavailable, one of the secondary nodes automatically gets promoted to become the new primary node.

Replica sets provide the following benefits:

High availability: If the primary node fails, one of the secondary nodes can take over as the new primary node, ensuring continuous availability of the database.

Fault tolerance: Data is automatically replicated to multiple nodes, providing redundancy and protecting against data loss in case of node failures.

Read scalability: Applications can read data from secondary nodes, distributing the read workload and improving read performance.

Now, let's write a Python program that connects to a MongoDB replica set and retrieves the status of the primary and secondary nodes:
   



In [None]:
from pymongo import MongoClient

def get_replica_set_status():
    try:
        # Establish a connection to the replica set
        client = MongoClient('mongodb://<primary_node_host>:<primary_node_port>,<secondary_node_1_host>:<secondary_node_1_port>,<secondary_node_2_host>:<secondary_node_2_port>/',
                             replicaSet='<replica_set_name>')

        # Get the replica set status
        status = client.admin.command('replSetGetStatus')

        # Print the primary node status
        print("Primary Node:")
        print("-----------")
        print("ID:", status['myState'])
        print("Host:", status['members'][0]['name'])
        print("State:", status['members'][0]['stateStr'])
        print()

        # Print the secondary nodes status
        print("Secondary Nodes:")
        print("----------------")
        for member in status['members'][1:]:
            print("ID:", member['state'])
            print("Host:", member['name'])
            print("State:", member['stateStr'])
            print()

    except Exception as e:
        # Handle any potential errors
        print("Error retrieving replica set status:", e)

    finally:
        # Close the connection
        client.close()


b. Describe how Cassandra ensures high availability and fault tolerance in a distributed database system. Write a Python program that connects to a Cassandra cluster and fetches the status of the nodes.

Ans.Cassandra ensures high availability and fault tolerance in a distributed database system through the following mechanisms:

Distributed Architecture: Cassandra uses a peer-to-peer distributed architecture where all nodes in the cluster are equal. There is no single point of failure or master node. Data is distributed across multiple nodes using a partitioning strategy, ensuring that each node is responsible for a specific range of data.

Replication: Cassandra replicates data across multiple nodes using configurable replication factors. Each data item is replicated to multiple nodes in different geographical locations or data centers, providing fault tolerance and enabling data availability even if some nodes fail.

Consistent Hashing: Cassandra uses consistent hashing to distribute data evenly across the cluster. This allows for the addition or removal of nodes without requiring a full data redistribution, ensuring minimal impact on the system's availability.

Data Replication Strategy: Cassandra supports multiple data replication strategies, such as SimpleStrategy and NetworkTopologyStrategy. These strategies define how data is replicated across nodes and data centers. NetworkTopologyStrategy enables replication based on data center placement, ensuring fault tolerance in case of data center failures.

Gossip Protocol: Cassandra uses a gossip protocol for node discovery, failure detection, and cluster state propagation. Nodes periodically exchange information about themselves and other nodes in the cluster, enabling efficient detection of node failures and ensuring consistent cluster status across all nodes.

Now, let's write a Python program that connects to a Cassandra cluster and fetches the status of the nodes:

In [None]:
from cassandra.cluster import Cluster

def get_cluster_status():
    try:
        # Establish a connection to the Cassandra cluster
        cluster = Cluster(['<contact_point_1>', '<contact_point_2>', '<contact_point_3>'])

        # Connect to the cluster and retrieve the cluster metadata
        session = cluster.connect()
        metadata = cluster.metadata

        # Print the status of each node in the cluster
        for host in metadata.all_hosts():
            print("Node Address:", host.address)
            print("Data Center:", host.datacenter)
            print("Rack:", host.rack)
            print("Status:", host.is_up)
            print()

    except Exception as e:
        # Handle any potential errors
        print("Error fetching cluster status:", e)

    finally:
        # Close the connection
        cluster.shutdown()



4. Sharding in MongoDB:
   a. Explain the concept of sharding in MongoDB and how it improves performance and scalability. Write a Python program that sets up sharding for a MongoDB cluster and inserts multiple documents into a sharded collection.
   
   Ans.Sharding in MongoDB is a technique used to horizontally partition data across multiple machines or shards in a cluster. Each shard contains a subset of the data, allowing for distributed storage and parallel processing. Sharding improves performance and scalability by providing the following benefits:

Horizontal Scaling: Sharding allows you to scale your MongoDB cluster horizontally by adding more machines or shards to the cluster. This distributes the data and workload across multiple servers, enabling the system to handle larger data sets and higher throughput.

Load Balancing: MongoDB's sharding architecture automatically balances the data distribution across shards. It redistributes the data as new shards are added or existing ones are removed, ensuring that each shard has a similar amount of data and workload.

Improved Query Performance: Sharding can enhance query performance by allowing parallel execution of queries across multiple shards. As data is distributed across multiple shards, queries can be executed in parallel on different shards, resulting in faster query response times.
  Python program that sets up sharding for a MongoDB cluster and inserts multiple documents into a sharded collection:









In [None]:
python
Copy code
from pymongo import MongoClient

def setup_sharding():
    try:
        # Establish a connection to the MongoDB cluster
        client = MongoClient('mongodb://<mongos_host>:<mongos_port>/')

        # Enable sharding for a specific database
        client.admin.command('enableSharding', '<your_database_name>')

        # Create a sharded collection
        client.admin.command('shardCollection', '<your_database_name>.<your_collection_name>', key={'shard_key': 'hashed'})

        # Print success message
        print("Sharding set up successfully.")

    except Exception as e:
        # Handle any potential errors
        print("Error setting up sharding:", e)

    finally:
        # Close the connection
        client.close()


def insert_documents():
    try:
        # Establish a connection to the MongoDB cluster
        client = MongoClient('mongodb://<mongos_host>:<mongos_port>/')

        # Access the sharded collection
        collection = client['<your_database_name>']['<your_collection_name>']

        # Insert multiple documents into the sharded collection
        documents = [
            {'_id': 1, 'shard_key': 'abc', 'field1': 'value1'},
            {'_id': 2, 'shard_key': 'def', 'field1': 'value2'},
            {'_id': 3, 'shard_key': 'ghi', 'field1': 'value3'}
        ]
        collection.insert_many(documents)

        # Print success message
        print("Documents inserted successfully.")

    except Exception as e:
        # Handle any potential errors
        print("Error inserting documents:", e)

    finally:
        # Close the connection
        client.close()


b. Design a sharding strategy for a social media application where user data needs to be distributed across multiple shards. Write a Python program to demonstrate how data is distributed and retrieved from the sharded cluster.

Ans.When designing a sharding strategy for a social media application, a common approach is to shard the data based on the user ID. This ensures that the user data is distributed evenly across multiple shards and allows for efficient retrieval of user-specific data.

Here's a Python program that demonstrates how data is distributed and retrieved from a sharded cluster based on the user ID:

In [None]:
from pymongo import MongoClient

def distribute_data():
    try:
        # Establish a connection to the MongoDB cluster
        client = MongoClient('mongodb://<mongos_host>:<mongos_port>/')

        # Access the sharded collection
        collection = client['social_media']['users']

        # Distribute user data across shards
        for user_id in range(1, 101):
            document = {
                '_id': user_id,
                'name': f'User {user_id}',
                'email': f'user{user_id}@example.com',
                # Add more user data fields as needed
            }
            collection.insert_one(document)

        # Print success message
        print("Data distributed successfully.")

    except Exception as e:
        # Handle any potential errors
        print("Error distributing data:", e)

    finally:
        # Close the connection
        client.close()


def retrieve_data(user_id):
    try:
        # Establish a connection to the MongoDB cluster
        client = MongoClient('mongodb://<mongos_host>:<mongos_port>/')

        # Access the sharded collection
        collection = client['social_media']['users']

        # Retrieve user data
        document = collection.find_one({'_id': user_id})

        if document:
            print("User ID:", document['_id'])
            print("Name:", document['name'])
            print("Email:", document['email'])
            # Print more user data fields as needed
        else:
            print("User not found.")

    except Exception as e:
        # Handle any potential errors
        print("Error retrieving data:", e)

    finally:
        # Close the connection
        client.close()


5. Indexing in MongoDB:
   a. Describe the concept of indexing in MongoDB and its importance in query optimization. Write a Python program that creates an index on a specific field in a MongoDB collection and executes a query using that index.
  
  Ans.
  Indexing in MongoDB is the process of creating an index data structure that improves the speed of data retrieval operations, such as queries, by enabling efficient access to specific fields or combinations of fields in a collection. Indexes are stored in a separate data structure that allows MongoDB to quickly locate and retrieve documents that match the query criteria.

Here are some key points regarding indexing in MongoDB and its importance in query optimization:

Index Structure: MongoDB supports various types of indexes, including single-field indexes, compound indexes, multi-key indexes, geospatial indexes, and text indexes. Each index type is designed to efficiently handle different query patterns and data types.

Improved Query Performance: Indexes facilitate faster query execution by reducing the number of documents that need to be examined. Instead of scanning the entire collection, MongoDB can use the index to identify a subset of documents that match the query conditions, significantly improving query performance.

Selectivity: An important aspect of indexing is the selectivity of an index. A highly selective index filters out a large portion of the documents, leading to faster query execution. On the other hand, a less selective index may still improve query performance but might not provide as significant gains.

Considerations for Indexing: While indexing enhances query performance, it also has trade-offs. Indexes consume storage space and require additional resources for maintenance during write operations. Therefore, it's crucial to create indexes selectively based on the application's query patterns and performance requirements.

Now, let's write a Python program that creates an index on a specific field in a MongoDB collection and executes a query using that index:





In [None]:
from pymongo import MongoClient

def create_index():
    try:
        # Establish a connection to the MongoDB database
        client = MongoClient('mongodb://localhost:27017/')

        # Access the database and collection
        db = client['your_database_name']
        collection = db['your_collection_name']

        # Create an index on a specific field
        collection.create_index('your_field_name')

        # Print success message
        print("Index created successfully.")

    except Exception as e:
        # Handle any potential errors
        print("Error creating index:", e)

    finally:
        # Close the connection
        client.close()


def execute_query():
    try:
        # Establish a connection to the MongoDB database
        client = MongoClient('mongodb://localhost:27017/')

        # Access the database and collection
        db = client['your_database_name']
        collection = db['your_collection_name']

        # Execute a query using the index
        query = {'your_field_name': 'your_field_value'}
        result = collection.find(query)

        # Print query results
        for document in result:
            print(document)

    except Exception as e:
        # Handle any potential errors
        print("Error executing query:", e)

    finally:
        # Close the connection
        client.close()


 b. Given a MongoDB collection named "products", write a Python function that searches for products with a specific keyword in the name or description. Optimize the query by adding appropriate indexes.

In [None]:
from pymongo import MongoClient

def search_products(keyword):
    try:
        # Establish a connection to the MongoDB database
        client = MongoClient('mongodb://localhost:27017/')

        # Access the database and collection
        db = client['your_database_name']
        collection = db['products']

        # Create indexes on the name and description fields
        collection.create_index('name')
        collection.create_index('description')

        # Perform the search query using the indexes
        query = {
            '$or': [
                {'name': {'$regex': keyword, '$options': 'i'}},
                {'description': {'$regex': keyword, '$options': 'i'}}
            ]
        }
        results = collection.find(query)

        # Print the search results
        for product in results:
            print("Name:", product['name'])
            print("Description:", product['description'])
            print()

    except Exception as e:
        # Handle any potential errors
        print("Error searching products:", e)

    finally:
        # Close the connection
        client.close()
