<a href="https://colab.research.google.com/github/Deimiser/DV/blob/main/Untitled30.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from collections import Counter
from multiprocessing import Pool

def map_function(text):
    return Counter(text.split())

def reduce_function(counters):
    return sum(counters, Counter())

def map_reduce(data, map_func, reduce_func, num_processes=2):
    with Pool(num_processes) as pool:
        return reduce_func(pool.map(map_func, data))

if __name__ == "__main__":
    input_data = [
        "Hello world",
        "World of MapReduce",
        "MapReduce example",
        "Word Count in MapReduce"
    ]

    # Word Count
    result = map_reduce(input_data, map_function, reduce_function)

    # Display Word Count result
    print("Word Count Result:")
    for word, count in result.items():
        print(f"{word}: {count}")


Word Count Result:
Hello: 1
world: 1
World: 1
of: 1
MapReduce: 3
example: 1
Word: 1
Count: 1
in: 1


In [8]:
class BloomFilter:
    def __init__(self, size, hash_functions):
        self.bit_array = [0] * size
        self.size = size
        self.hash_functions = [lambda item, i=i: hash(f"{item}{i}") % size for i in range(hash_functions)]

    def add(self, item):
        for hash_function in self.hash_functions:
            self.bit_array[hash_function(item)] = 1

    def __contains__(self, item):
        return all(self.bit_array[hash_function(item)] == 1 for hash_function in self.hash_functions)

# Example usage:
if __name__ == "__main__":
    bloom_filter = BloomFilter(1000, 3)
    elements_to_add = ["apple", "banana", "cherry", "dog", "elephant"]
    [bloom_filter.add(element) for element in elements_to_add]

    elements_to_check = ["apple", "banana", "grape", "elephant", "cat"]
    [print(f"{element} {'may be in the set.' if element in bloom_filter else 'is definitely not in the set.'}") for element in elements_to_check]


apple may be in the set.
banana may be in the set.
grape is definitely not in the set.
elephant may be in the set.
cat is definitely not in the set.


In [12]:
from itertools import combinations

def generate_candidates(prev_candidates, k):
    return set(frozenset(itemset1.union(itemset2)) for itemset1 in prev_candidates for itemset2 in prev_candidates if len(itemset1.union(itemset2)) == k)

def prune_candidates(candidates, prev_frequent_sets):
    return {c for c in candidates if all(subset in prev_frequent_sets for subset in combinations(c, len(c)))}

def apriori(transactions, min_support):
    itemsets = [set(transaction) for transaction in transactions]
    k, frequent_sets = 1, []

    while True:
        candidates = prune_candidates(generate_candidates(frequent_sets, k), frequent_sets)
        counts = {c: 0 for c in candidates}

        for itemset in itemsets:
            for candidate in candidates:
                if candidate.issubset(itemset):
                    counts[candidate] += 1

        frequent_sets_k = {itemset for itemset, count in counts.items() if count >= min_support}

        if not frequent_sets_k:
            break

        frequent_sets.extend(frequent_sets_k)
        k += 1

    return frequent_sets

# Example usage:
transactions = [
    ["bread", "milk", "eggs"],
    ["bread", "butter", "eggs"],
    ["milk", "butter", "eggs"],
    ["bread", "milk", "butter", "eggs"]
]

min_support = 2
result = apriori(transactions, min_support)

print("Frequent Itemsets:")
for itemset in result:
    print(itemset)


Frequent Itemsets:


In [14]:
import numpy as np

def pagerank(graph, damping=0.85, epsilon=1e-8, max_iter=100):
    n = len(graph)
    A = np.array([[1 / len(graph.get(j, [])) if j in graph[i] else 0 for j in range(n)] for i in range(n)])
    r = np.ones(n) / n

    for _ in range(max_iter):
        new_r = (1 - damping) / n + damping * A.T @ r

        # Check for convergence
        if np.linalg.norm(new_r - r, 2) < epsilon:
            break

        r = new_r

    return r

# Example graph: {node: [neighbors]}
graph = {0: [1, 2], 1: [2], 2: [0, 3], 3: [2]}

# Calculate PageRank scores
scores = pagerank(graph)

# Print PageRank scores
for node, score in enumerate(scores):
    print(f"Node {node}: PageRank Score = {score:.4f}")


Node 0: PageRank Score = 0.1754
Node 1: PageRank Score = 0.1866
Node 2: PageRank Score = 0.3246
Node 3: PageRank Score = 0.3134
