diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1bb3de782275..28f83a638d7b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,12 +16,12 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.4 + rev: v0.1.6 hooks: - id: ruff - repo: https://github.com/psf/black - rev: 23.10.1 + rev: 23.11.0 hooks: - id: black @@ -33,7 +33,7 @@ repos: - tomli - repo: https://github.com/tox-dev/pyproject-fmt - rev: "1.4.1" + rev: "1.5.1" hooks: - id: pyproject-fmt @@ -51,7 +51,7 @@ repos: - id: validate-pyproject - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.6.1 + rev: v1.7.1 hooks: - id: mypy args: diff --git a/DIRECTORY.md b/DIRECTORY.md index cb4b00b045b5..ea0ba22bcc13 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -428,12 +428,16 @@ * [Haversine Distance](geodesy/haversine_distance.py) * [Lamberts Ellipsoidal Distance](geodesy/lamberts_ellipsoidal_distance.py) +## Geometry + * [Geometry](geometry/geometry.py) + ## Graphics * [Bezier Curve](graphics/bezier_curve.py) * [Vector3 For 2D Rendering](graphics/vector3_for_2d_rendering.py) ## Graphs * [A Star](graphs/a_star.py) + * [Ant Colony Optimization Algorithms](graphs/ant_colony_optimization_algorithms.py) * [Articulation Points](graphs/articulation_points.py) * [Basic Graphs](graphs/basic_graphs.py) * [Bellman Ford](graphs/bellman_ford.py) @@ -718,6 +722,7 @@ * [Sock Merchant](maths/sock_merchant.py) * [Softmax](maths/softmax.py) * [Solovay Strassen Primality Test](maths/solovay_strassen_primality_test.py) + * [Spearman Rank Correlation Coefficient](maths/spearman_rank_correlation_coefficient.py) * Special Numbers * [Armstrong Numbers](maths/special_numbers/armstrong_numbers.py) * [Automorphic Number](maths/special_numbers/automorphic_number.py) @@ -1310,7 +1315,6 @@ * [Fetch Well Rx Price](web_programming/fetch_well_rx_price.py) * [Get Amazon Product Data](web_programming/get_amazon_product_data.py) * [Get Imdb Top 250 Movies Csv](web_programming/get_imdb_top_250_movies_csv.py) - * [Get Imdbtop](web_programming/get_imdbtop.py) * [Get Ip Geolocation](web_programming/get_ip_geolocation.py) * [Get Top Billionaires](web_programming/get_top_billionaires.py) * [Get Top Hn Posts](web_programming/get_top_hn_posts.py) diff --git a/backtracking/all_combinations.py b/backtracking/all_combinations.py index ecbcc5882ec1..407304948c39 100644 --- a/backtracking/all_combinations.py +++ b/backtracking/all_combinations.py @@ -26,9 +26,11 @@ def generate_all_combinations(n: int, k: int) -> list[list[int]]: >>> generate_all_combinations(n=10, k=-1) Traceback (most recent call last): ... - RecursionError: maximum recursion depth exceeded + ValueError: k must not be negative >>> generate_all_combinations(n=-1, k=10) - [] + Traceback (most recent call last): + ... + ValueError: n must not be negative >>> generate_all_combinations(n=5, k=4) [[1, 2, 3, 4], [1, 2, 3, 5], [1, 2, 4, 5], [1, 3, 4, 5], [2, 3, 4, 5]] >>> from itertools import combinations @@ -36,6 +38,10 @@ def generate_all_combinations(n: int, k: int) -> list[list[int]]: ... for n in range(1, 6) for k in range(1, 6)) True """ + if k < 0: + raise ValueError("k must not be negative") + if n < 0: + raise ValueError("n must not be negative") result: list[list[int]] = [] create_all_state(1, n, k, [], result) diff --git a/backtracking/knight_tour.py b/backtracking/knight_tour.py index cc88307b7fe8..5f7dee8d97bf 100644 --- a/backtracking/knight_tour.py +++ b/backtracking/knight_tour.py @@ -79,7 +79,7 @@ def open_knight_tour(n: int) -> list[list[int]]: >>> open_knight_tour(2) Traceback (most recent call last): ... - ValueError: Open Kight Tour cannot be performed on a board of size 2 + ValueError: Open Knight Tour cannot be performed on a board of size 2 """ board = [[0 for i in range(n)] for j in range(n)] @@ -91,7 +91,7 @@ def open_knight_tour(n: int) -> list[list[int]]: return board board[i][j] = 0 - msg = f"Open Kight Tour cannot be performed on a board of size {n}" + msg = f"Open Knight Tour cannot be performed on a board of size {n}" raise ValueError(msg) diff --git a/backtracking/n_queens.py b/backtracking/n_queens.py index 0f237d95e7c8..2cd8c703fc72 100644 --- a/backtracking/n_queens.py +++ b/backtracking/n_queens.py @@ -24,6 +24,10 @@ def is_safe(board: list[list[int]], row: int, column: int) -> bool: Returns: Boolean Value + >>> is_safe([[0, 0, 0], [0, 0, 0], [0, 0, 0]], 1, 1) + True + >>> is_safe([[1, 0, 0], [0, 0, 0], [0, 0, 0]], 1, 1) + False """ n = len(board) # Size of the board diff --git a/bit_manipulation/is_even.py b/bit_manipulation/is_even.py index ba036f35aa1e..6f95a1160797 100644 --- a/bit_manipulation/is_even.py +++ b/bit_manipulation/is_even.py @@ -1,7 +1,7 @@ def is_even(number: int) -> bool: """ return true if the input integer is even - Explanation: Lets take a look at the following deicmal to binary conversions + Explanation: Lets take a look at the following decimal to binary conversions 2 => 10 14 => 1110 100 => 1100100 diff --git a/bit_manipulation/single_number.py b/bit_manipulation/single_number.py new file mode 100644 index 000000000000..8eeeefeecd60 --- /dev/null +++ b/bit_manipulation/single_number.py @@ -0,0 +1,41 @@ +""" + Given a non-empty array of integers nums, every element + appears twice except for one. Find that single one + + You must implement a solution with a linear runtime complexity + and use only constant extra space. + + Reference: https://leetcode.com/problems/single-number/ +""" + + +def single_number(nums: list) -> int: + """ + :param nums: A non-empty array of any integers nums, + every element appears twice except for one. + :return: element that appears only one time + + Examples: + Example 1 + >>> print(single_number([1, 3, 3, 2, 6, 2, 1])) + 6 + + Example 2 + >>> print(single_number([12, 1, 1, 7, 1, 12, 1])) + 7 + + Example 3 + >>> print(single_number([6])) + 6 + """ + + result = 0 + for el in nums: + result ^= el + return result + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/data_structures/binary_tree/binary_search_tree.py b/data_structures/binary_tree/binary_search_tree.py index f08f278a8e47..9071f03dcc8c 100644 --- a/data_structures/binary_tree/binary_search_tree.py +++ b/data_structures/binary_tree/binary_search_tree.py @@ -198,6 +198,30 @@ def insert(self, *values) -> Self: return self def search(self, value) -> Node | None: + """ + >>> tree = BinarySearchTree().insert(10, 20, 30, 40, 50) + >>> tree.search(10) + {'10': (None, {'20': (None, {'30': (None, {'40': (None, 50)})})})} + >>> tree.search(20) + {'20': (None, {'30': (None, {'40': (None, 50)})})} + >>> tree.search(30) + {'30': (None, {'40': (None, 50)})} + >>> tree.search(40) + {'40': (None, 50)} + >>> tree.search(50) + 50 + >>> tree.search(5) is None # element not present + True + >>> tree.search(0) is None # element not present + True + >>> tree.search(-5) is None # element not present + True + >>> BinarySearchTree().search(10) + Traceback (most recent call last): + ... + IndexError: Warning: Tree is empty! please use another. + """ + if self.empty(): raise IndexError("Warning: Tree is empty! please use another.") else: @@ -210,6 +234,15 @@ def search(self, value) -> Node | None: def get_max(self, node: Node | None = None) -> Node | None: """ We go deep on the right branch + + >>> BinarySearchTree().insert(10, 20, 30, 40, 50).get_max() + 50 + >>> BinarySearchTree().insert(-5, -1, 0.1, -0.3, -4.5).get_max() + {'0.1': (-0.3, None)} + >>> BinarySearchTree().insert(1, 78.3, 30, 74.0, 1).get_max() + {'78.3': ({'30': (1, 74.0)}, None)} + >>> BinarySearchTree().insert(1, 783, 30, 740, 1).get_max() + {'783': ({'30': (1, 740)}, None)} """ if node is None: if self.root is None: @@ -224,6 +257,15 @@ def get_max(self, node: Node | None = None) -> Node | None: def get_min(self, node: Node | None = None) -> Node | None: """ We go deep on the left branch + + >>> BinarySearchTree().insert(10, 20, 30, 40, 50).get_min() + {'10': (None, {'20': (None, {'30': (None, {'40': (None, 50)})})})} + >>> BinarySearchTree().insert(-5, -1, 0, -0.3, -4.5).get_min() + {'-5': (None, {'-1': (-4.5, {'0': (-0.3, None)})})} + >>> BinarySearchTree().insert(1, 78.3, 30, 74.0, 1).get_min() + {'1': (None, {'78.3': ({'30': (1, 74.0)}, None)})} + >>> BinarySearchTree().insert(1, 783, 30, 740, 1).get_min() + {'1': (None, {'783': ({'30': (1, 740)}, None)})} """ if node is None: node = self.root diff --git a/geometry/geometry.py b/geometry/geometry.py new file mode 100644 index 000000000000..9e353dee17a7 --- /dev/null +++ b/geometry/geometry.py @@ -0,0 +1,259 @@ +from __future__ import annotations + +import math +from dataclasses import dataclass, field +from types import NoneType +from typing import Self + +# Building block classes + + +@dataclass +class Angle: + """ + An Angle in degrees (unit of measurement) + + >>> Angle() + Angle(degrees=90) + >>> Angle(45.5) + Angle(degrees=45.5) + >>> Angle(-1) + Traceback (most recent call last): + ... + TypeError: degrees must be a numeric value between 0 and 360. + >>> Angle(361) + Traceback (most recent call last): + ... + TypeError: degrees must be a numeric value between 0 and 360. + """ + + degrees: float = 90 + + def __post_init__(self) -> None: + if not isinstance(self.degrees, (int, float)) or not 0 <= self.degrees <= 360: + raise TypeError("degrees must be a numeric value between 0 and 360.") + + +@dataclass +class Side: + """ + A side of a two dimensional Shape such as Polygon, etc. + adjacent_sides: a list of sides which are adjacent to the current side + angle: the angle in degrees between each adjacent side + length: the length of the current side in meters + + >>> Side(5) + Side(length=5, angle=Angle(degrees=90), next_side=None) + >>> Side(5, Angle(45.6)) + Side(length=5, angle=Angle(degrees=45.6), next_side=None) + >>> Side(5, Angle(45.6), Side(1, Angle(2))) # doctest: +ELLIPSIS + Side(length=5, angle=Angle(degrees=45.6), next_side=Side(length=1, angle=Angle(d... + """ + + length: float + angle: Angle = field(default_factory=Angle) + next_side: Side | None = None + + def __post_init__(self) -> None: + if not isinstance(self.length, (int, float)) or self.length <= 0: + raise TypeError("length must be a positive numeric value.") + if not isinstance(self.angle, Angle): + raise TypeError("angle must be an Angle object.") + if not isinstance(self.next_side, (Side, NoneType)): + raise TypeError("next_side must be a Side or None.") + + +@dataclass +class Ellipse: + """ + A geometric Ellipse on a 2D surface + + >>> Ellipse(5, 10) + Ellipse(major_radius=5, minor_radius=10) + >>> Ellipse(5, 10) is Ellipse(5, 10) + False + >>> Ellipse(5, 10) == Ellipse(5, 10) + True + """ + + major_radius: float + minor_radius: float + + @property + def area(self) -> float: + """ + >>> Ellipse(5, 10).area + 157.07963267948966 + """ + return math.pi * self.major_radius * self.minor_radius + + @property + def perimeter(self) -> float: + """ + >>> Ellipse(5, 10).perimeter + 47.12388980384689 + """ + return math.pi * (self.major_radius + self.minor_radius) + + +class Circle(Ellipse): + """ + A geometric Circle on a 2D surface + + >>> Circle(5) + Circle(radius=5) + >>> Circle(5) is Circle(5) + False + >>> Circle(5) == Circle(5) + True + >>> Circle(5).area + 78.53981633974483 + >>> Circle(5).perimeter + 31.41592653589793 + """ + + def __init__(self, radius: float) -> None: + super().__init__(radius, radius) + self.radius = radius + + def __repr__(self) -> str: + return f"Circle(radius={self.radius})" + + @property + def diameter(self) -> float: + """ + >>> Circle(5).diameter + 10 + """ + return self.radius * 2 + + def max_parts(self, num_cuts: float) -> float: + """ + Return the maximum number of parts that circle can be divided into if cut + 'num_cuts' times. + + >>> circle = Circle(5) + >>> circle.max_parts(0) + 1.0 + >>> circle.max_parts(7) + 29.0 + >>> circle.max_parts(54) + 1486.0 + >>> circle.max_parts(22.5) + 265.375 + >>> circle.max_parts(-222) + Traceback (most recent call last): + ... + TypeError: num_cuts must be a positive numeric value. + >>> circle.max_parts("-222") + Traceback (most recent call last): + ... + TypeError: num_cuts must be a positive numeric value. + """ + if not isinstance(num_cuts, (int, float)) or num_cuts < 0: + raise TypeError("num_cuts must be a positive numeric value.") + return (num_cuts + 2 + num_cuts**2) * 0.5 + + +@dataclass +class Polygon: + """ + An abstract class which represents Polygon on a 2D surface. + + >>> Polygon() + Polygon(sides=[]) + """ + + sides: list[Side] = field(default_factory=list) + + def add_side(self, side: Side) -> Self: + """ + >>> Polygon().add_side(Side(5)) + Polygon(sides=[Side(length=5, angle=Angle(degrees=90), next_side=None)]) + """ + self.sides.append(side) + return self + + def get_side(self, index: int) -> Side: + """ + >>> Polygon().get_side(0) + Traceback (most recent call last): + ... + IndexError: list index out of range + >>> Polygon().add_side(Side(5)).get_side(-1) + Side(length=5, angle=Angle(degrees=90), next_side=None) + """ + return self.sides[index] + + def set_side(self, index: int, side: Side) -> Self: + """ + >>> Polygon().set_side(0, Side(5)) + Traceback (most recent call last): + ... + IndexError: list assignment index out of range + >>> Polygon().add_side(Side(5)).set_side(0, Side(10)) + Polygon(sides=[Side(length=10, angle=Angle(degrees=90), next_side=None)]) + """ + self.sides[index] = side + return self + + +class Rectangle(Polygon): + """ + A geometric rectangle on a 2D surface. + + >>> rectangle_one = Rectangle(5, 10) + >>> rectangle_one.perimeter() + 30 + >>> rectangle_one.area() + 50 + """ + + def __init__(self, short_side_length: float, long_side_length: float) -> None: + super().__init__() + self.short_side_length = short_side_length + self.long_side_length = long_side_length + self.post_init() + + def post_init(self) -> None: + """ + >>> Rectangle(5, 10) # doctest: +NORMALIZE_WHITESPACE + Rectangle(sides=[Side(length=5, angle=Angle(degrees=90), next_side=None), + Side(length=10, angle=Angle(degrees=90), next_side=None)]) + """ + self.short_side = Side(self.short_side_length) + self.long_side = Side(self.long_side_length) + super().add_side(self.short_side) + super().add_side(self.long_side) + + def perimeter(self) -> float: + return (self.short_side.length + self.long_side.length) * 2 + + def area(self) -> float: + return self.short_side.length * self.long_side.length + + +@dataclass +class Square(Rectangle): + """ + a structure which represents a + geometrical square on a 2D surface + >>> square_one = Square(5) + >>> square_one.perimeter() + 20 + >>> square_one.area() + 25 + """ + + def __init__(self, side_length: float) -> None: + super().__init__(side_length, side_length) + + def perimeter(self) -> float: + return super().perimeter() + + def area(self) -> float: + return super().area() + + +if __name__ == "__main__": + __import__("doctest").testmod() diff --git a/graphs/ant_colony_optimization_algorithms.py b/graphs/ant_colony_optimization_algorithms.py new file mode 100644 index 000000000000..652ad6144297 --- /dev/null +++ b/graphs/ant_colony_optimization_algorithms.py @@ -0,0 +1,226 @@ +""" +Use an ant colony optimization algorithm to solve the travelling salesman problem (TSP) +which asks the following question: +"Given a list of cities and the distances between each pair of cities, what is the + shortest possible route that visits each city exactly once and returns to the origin + city?" + +https://en.wikipedia.org/wiki/Ant_colony_optimization_algorithms +https://en.wikipedia.org/wiki/Travelling_salesman_problem + +Author: Clark +""" + +import copy +import random + +cities = { + 0: [0, 0], + 1: [0, 5], + 2: [3, 8], + 3: [8, 10], + 4: [12, 8], + 5: [12, 4], + 6: [8, 0], + 7: [6, 2], +} + + +def main( + cities: dict[int, list[int]], + ants_num: int, + iterations_num: int, + pheromone_evaporation: float, + alpha: float, + beta: float, + q: float, # Pheromone system parameters Q,which is a constant +) -> tuple[list[int], float]: + """ + Ant colony algorithm main function + >>> main(cities=cities, ants_num=10, iterations_num=20, + ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) + ([0, 1, 2, 3, 4, 5, 6, 7, 0], 37.909778143828696) + >>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=5, + ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) + ([0, 1, 0], 5.656854249492381) + >>> main(cities={0: [0, 0], 1: [2, 2], 4: [4, 4]}, ants_num=5, iterations_num=5, + ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) + Traceback (most recent call last): + ... + IndexError: list index out of range + >>> main(cities={}, ants_num=5, iterations_num=5, + ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) + Traceback (most recent call last): + ... + StopIteration + >>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=0, iterations_num=5, + ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) + ([], inf) + >>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=0, + ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) + ([], inf) + >>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=5, + ... pheromone_evaporation=1, alpha=1.0, beta=5.0, q=10) + ([0, 1, 0], 5.656854249492381) + >>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=5, + ... pheromone_evaporation=0, alpha=1.0, beta=5.0, q=10) + ([0, 1, 0], 5.656854249492381) + """ + # Initialize the pheromone matrix + cities_num = len(cities) + pheromone = [[1.0] * cities_num] * cities_num + + best_path: list[int] = [] + best_distance = float("inf") + for _ in range(iterations_num): + ants_route = [] + for _ in range(ants_num): + unvisited_cities = copy.deepcopy(cities) + current_city = {next(iter(cities.keys())): next(iter(cities.values()))} + del unvisited_cities[next(iter(current_city.keys()))] + ant_route = [next(iter(current_city.keys()))] + while unvisited_cities: + current_city, unvisited_cities = city_select( + pheromone, current_city, unvisited_cities, alpha, beta + ) + ant_route.append(next(iter(current_city.keys()))) + ant_route.append(0) + ants_route.append(ant_route) + + pheromone, best_path, best_distance = pheromone_update( + pheromone, + cities, + pheromone_evaporation, + ants_route, + q, + best_path, + best_distance, + ) + return best_path, best_distance + + +def distance(city1: list[int], city2: list[int]) -> float: + """ + Calculate the distance between two coordinate points + >>> distance([0, 0], [3, 4] ) + 5.0 + >>> distance([0, 0], [-3, 4] ) + 5.0 + >>> distance([0, 0], [-3, -4] ) + 5.0 + """ + return (((city1[0] - city2[0]) ** 2) + ((city1[1] - city2[1]) ** 2)) ** 0.5 + + +def pheromone_update( + pheromone: list[list[float]], + cities: dict[int, list[int]], + pheromone_evaporation: float, + ants_route: list[list[int]], + q: float, # Pheromone system parameters Q,which is a constant + best_path: list[int], + best_distance: float, +) -> tuple[list[list[float]], list[int], float]: + """ + Update pheromones on the route and update the best route + >>> + >>> pheromone_update(pheromone=[[1.0, 1.0], [1.0, 1.0]], + ... cities={0: [0,0], 1: [2,2]}, pheromone_evaporation=0.7, + ... ants_route=[[0, 1, 0]], q=10, best_path=[], + ... best_distance=float("inf")) + ([[0.7, 4.235533905932737], [4.235533905932737, 0.7]], [0, 1, 0], 5.656854249492381) + >>> pheromone_update(pheromone=[], + ... cities={0: [0,0], 1: [2,2]}, pheromone_evaporation=0.7, + ... ants_route=[[0, 1, 0]], q=10, best_path=[], + ... best_distance=float("inf")) + Traceback (most recent call last): + ... + IndexError: list index out of range + >>> pheromone_update(pheromone=[[1.0, 1.0], [1.0, 1.0]], + ... cities={}, pheromone_evaporation=0.7, + ... ants_route=[[0, 1, 0]], q=10, best_path=[], + ... best_distance=float("inf")) + Traceback (most recent call last): + ... + KeyError: 0 + """ + for a in range(len(cities)): # Update the volatilization of pheromone on all routes + for b in range(len(cities)): + pheromone[a][b] *= pheromone_evaporation + for ant_route in ants_route: + total_distance = 0.0 + for i in range(len(ant_route) - 1): # Calculate total distance + total_distance += distance(cities[ant_route[i]], cities[ant_route[i + 1]]) + delta_pheromone = q / total_distance + for i in range(len(ant_route) - 1): # Update pheromones + pheromone[ant_route[i]][ant_route[i + 1]] += delta_pheromone + pheromone[ant_route[i + 1]][ant_route[i]] = pheromone[ant_route[i]][ + ant_route[i + 1] + ] + + if total_distance < best_distance: + best_path = ant_route + best_distance = total_distance + + return pheromone, best_path, best_distance + + +def city_select( + pheromone: list[list[float]], + current_city: dict[int, list[int]], + unvisited_cities: dict[int, list[int]], + alpha: float, + beta: float, +) -> tuple[dict[int, list[int]], dict[int, list[int]]]: + """ + Choose the next city for ants + >>> city_select(pheromone=[[1.0, 1.0], [1.0, 1.0]], current_city={0: [0, 0]}, + ... unvisited_cities={1: [2, 2]}, alpha=1.0, beta=5.0) + ({1: [2, 2]}, {}) + >>> city_select(pheromone=[], current_city={0: [0,0]}, + ... unvisited_cities={1: [2, 2]}, alpha=1.0, beta=5.0) + Traceback (most recent call last): + ... + IndexError: list index out of range + >>> city_select(pheromone=[[1.0, 1.0], [1.0, 1.0]], current_city={}, + ... unvisited_cities={1: [2, 2]}, alpha=1.0, beta=5.0) + Traceback (most recent call last): + ... + StopIteration + >>> city_select(pheromone=[[1.0, 1.0], [1.0, 1.0]], current_city={0: [0, 0]}, + ... unvisited_cities={}, alpha=1.0, beta=5.0) + Traceback (most recent call last): + ... + IndexError: list index out of range + """ + probabilities = [] + for city in unvisited_cities: + city_distance = distance( + unvisited_cities[city], next(iter(current_city.values())) + ) + probability = (pheromone[city][next(iter(current_city.keys()))] ** alpha) * ( + (1 / city_distance) ** beta + ) + probabilities.append(probability) + + chosen_city_i = random.choices( + list(unvisited_cities.keys()), weights=probabilities + )[0] + chosen_city = {chosen_city_i: unvisited_cities[chosen_city_i]} + del unvisited_cities[next(iter(chosen_city.keys()))] + return chosen_city, unvisited_cities + + +if __name__ == "__main__": + best_path, best_distance = main( + cities=cities, + ants_num=10, + iterations_num=20, + pheromone_evaporation=0.7, + alpha=1.0, + beta=5.0, + q=10, + ) + + print(f"{best_path = }") + print(f"{best_distance = }") diff --git a/greedy_methods/smallest_range.py b/greedy_methods/smallest_range.py new file mode 100644 index 000000000000..e2b7f8d7e96a --- /dev/null +++ b/greedy_methods/smallest_range.py @@ -0,0 +1,71 @@ +""" +smallest_range function takes a list of sorted integer lists and finds the smallest +range that includes at least one number from each list, using a min heap for efficiency. +""" + +from heapq import heappop, heappush +from sys import maxsize + + +def smallest_range(nums: list[list[int]]) -> list[int]: + """ + Find the smallest range from each list in nums. + + Uses min heap for efficiency. The range includes at least one number from each list. + + Args: + nums: List of k sorted integer lists. + + Returns: + list: Smallest range as a two-element list. + + Examples: + >>> smallest_range([[4, 10, 15, 24, 26], [0, 9, 12, 20], [5, 18, 22, 30]]) + [20, 24] + >>> smallest_range([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) + [1, 1] + >>> smallest_range(((1, 2, 3), (1, 2, 3), (1, 2, 3))) + [1, 1] + >>> smallest_range(((-3, -2, -1), (0, 0, 0), (1, 2, 3))) + [-1, 1] + >>> smallest_range([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + [3, 7] + >>> smallest_range([[0, 0, 0], [0, 0, 0], [0, 0, 0]]) + [0, 0] + >>> smallest_range([[], [], []]) + Traceback (most recent call last): + ... + IndexError: list index out of range + """ + + min_heap: list[tuple[int, int, int]] = [] + current_max = -maxsize - 1 + + for i, items in enumerate(nums): + heappush(min_heap, (items[0], i, 0)) + current_max = max(current_max, items[0]) + + # Initialize smallest_range with large integer values + smallest_range = [-maxsize - 1, maxsize] + + while min_heap: + current_min, list_index, element_index = heappop(min_heap) + + if current_max - current_min < smallest_range[1] - smallest_range[0]: + smallest_range = [current_min, current_max] + + if element_index == len(nums[list_index]) - 1: + break + + next_element = nums[list_index][element_index + 1] + heappush(min_heap, (next_element, list_index, element_index + 1)) + current_max = max(current_max, next_element) + + return smallest_range + + +if __name__ == "__main__": + from doctest import testmod + + testmod() + print(f"{smallest_range([[1, 2, 3], [1, 2, 3], [1, 2, 3]])}") # Output: [1, 1] diff --git a/maths/spearman_rank_correlation_coefficient.py b/maths/spearman_rank_correlation_coefficient.py new file mode 100644 index 000000000000..32ff6b9e3d71 --- /dev/null +++ b/maths/spearman_rank_correlation_coefficient.py @@ -0,0 +1,82 @@ +from collections.abc import Sequence + + +def assign_ranks(data: Sequence[float]) -> list[int]: + """ + Assigns ranks to elements in the array. + + :param data: List of floats. + :return: List of ints representing the ranks. + + Example: + >>> assign_ranks([3.2, 1.5, 4.0, 2.7, 5.1]) + [3, 1, 4, 2, 5] + + >>> assign_ranks([10.5, 8.1, 12.4, 9.3, 11.0]) + [3, 1, 5, 2, 4] + """ + ranked_data = sorted((value, index) for index, value in enumerate(data)) + ranks = [0] * len(data) + + for position, (_, index) in enumerate(ranked_data): + ranks[index] = position + 1 + + return ranks + + +def calculate_spearman_rank_correlation( + variable_1: Sequence[float], variable_2: Sequence[float] +) -> float: + """ + Calculates Spearman's rank correlation coefficient. + + :param variable_1: List of floats representing the first variable. + :param variable_2: List of floats representing the second variable. + :return: Spearman's rank correlation coefficient. + + Example Usage: + + >>> x = [1, 2, 3, 4, 5] + >>> y = [5, 4, 3, 2, 1] + >>> calculate_spearman_rank_correlation(x, y) + -1.0 + + >>> x = [1, 2, 3, 4, 5] + >>> y = [2, 4, 6, 8, 10] + >>> calculate_spearman_rank_correlation(x, y) + 1.0 + + >>> x = [1, 2, 3, 4, 5] + >>> y = [5, 1, 2, 9, 5] + >>> calculate_spearman_rank_correlation(x, y) + 0.6 + """ + n = len(variable_1) + rank_var1 = assign_ranks(variable_1) + rank_var2 = assign_ranks(variable_2) + + # Calculate differences of ranks + d = [rx - ry for rx, ry in zip(rank_var1, rank_var2)] + + # Calculate the sum of squared differences + d_squared = sum(di**2 for di in d) + + # Calculate the Spearman's rank correlation coefficient + rho = 1 - (6 * d_squared) / (n * (n**2 - 1)) + + return rho + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + # Example usage: + print( + f"{calculate_spearman_rank_correlation([1, 2, 3, 4, 5], [2, 4, 6, 8, 10]) = }" + ) + + print(f"{calculate_spearman_rank_correlation([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]) = }") + + print(f"{calculate_spearman_rank_correlation([1, 2, 3, 4, 5], [5, 1, 2, 9, 5]) = }") diff --git a/matrix/spiral_print.py b/matrix/spiral_print.py index 5eef263f7aef..7ba0a275157b 100644 --- a/matrix/spiral_print.py +++ b/matrix/spiral_print.py @@ -116,7 +116,7 @@ def spiral_traversal(matrix: list[list]) -> list[int]: [1, 2, 3, 4, 8, 12, 11, 10, 9, 5, 6, 7] + spiral_traversal([]) """ if matrix: - return list(matrix.pop(0)) + spiral_traversal(list(zip(*matrix))[::-1]) + return list(matrix.pop(0)) + spiral_traversal(list(zip(*matrix))[::-1]) # type: ignore else: return [] diff --git a/scripts/build_directory_md.py b/scripts/build_directory_md.py index 24bc00cd036f..aa95b95db4b5 100755 --- a/scripts/build_directory_md.py +++ b/scripts/build_directory_md.py @@ -6,7 +6,11 @@ def good_file_paths(top_dir: str = ".") -> Iterator[str]: for dir_path, dir_names, filenames in os.walk(top_dir): - dir_names[:] = [d for d in dir_names if d != "scripts" and d[0] not in "._"] + dir_names[:] = [ + d + for d in dir_names + if d != "scripts" and d[0] not in "._" and "venv" not in d + ] for filename in filenames: if filename == "__init__.py": continue diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index 7be4074dc39b..3af6608723a5 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -1,20 +1,9 @@ -""" -This is a Python implementation of the levenshtein distance. -Levenshtein distance is a string metric for measuring the -difference between two sequences. - -For doctests run following command: -python -m doctest -v levenshtein-distance.py -or -python3 -m doctest -v levenshtein-distance.py - -For manual testing run: -python levenshtein-distance.py -""" +from collections.abc import Callable def levenshtein_distance(first_word: str, second_word: str) -> int: - """Implementation of the levenshtein distance in Python. + """ + Implementation of the Levenshtein distance in Python. :param first_word: the first word to measure the difference. :param second_word: the second word to measure the difference. :return: the levenshtein distance between the two words. @@ -47,7 +36,7 @@ def levenshtein_distance(first_word: str, second_word: str) -> int: current_row = [i + 1] for j, c2 in enumerate(second_word): - # Calculate insertions, deletions and substitutions + # Calculate insertions, deletions, and substitutions insertions = previous_row[j + 1] + 1 deletions = current_row[j] + 1 substitutions = previous_row[j] + (c1 != c2) @@ -62,9 +51,75 @@ def levenshtein_distance(first_word: str, second_word: str) -> int: return previous_row[-1] +def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: + """ + Compute the Levenshtein distance between two words (strings). + The function is optimized for efficiency by modifying rows in place. + :param first_word: the first word to measure the difference. + :param second_word: the second word to measure the difference. + :return: the Levenshtein distance between the two words. + Examples: + >>> levenshtein_distance_optimized("planet", "planetary") + 3 + >>> levenshtein_distance_optimized("", "test") + 4 + >>> levenshtein_distance_optimized("book", "back") + 2 + >>> levenshtein_distance_optimized("book", "book") + 0 + >>> levenshtein_distance_optimized("test", "") + 4 + >>> levenshtein_distance_optimized("", "") + 0 + >>> levenshtein_distance_optimized("orchestration", "container") + 10 + """ + if len(first_word) < len(second_word): + return levenshtein_distance_optimized(second_word, first_word) + + if len(second_word) == 0: + return len(first_word) + + previous_row = list(range(len(second_word) + 1)) + + for i, c1 in enumerate(first_word): + current_row = [i + 1] + [0] * len(second_word) + + for j, c2 in enumerate(second_word): + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (c1 != c2) + current_row[j + 1] = min(insertions, deletions, substitutions) + + previous_row = current_row + + return previous_row[-1] + + +def benchmark_levenshtein_distance(func: Callable) -> None: + """ + Benchmark the Levenshtein distance function. + :param str: The name of the function being benchmarked. + :param func: The function to be benchmarked. + """ + from timeit import timeit + + stmt = f"{func.__name__}('sitting', 'kitten')" + setup = f"from __main__ import {func.__name__}" + number = 25_000 + result = timeit(stmt=stmt, setup=setup, number=number) + print(f"{func.__name__:<30} finished {number:,} runs in {result:.5f} seconds") + + if __name__ == "__main__": - first_word = input("Enter the first word:\n").strip() - second_word = input("Enter the second word:\n").strip() + # Get user input for words + first_word = input("Enter the first word for Levenshtein distance:\n").strip() + second_word = input("Enter the second word for Levenshtein distance:\n").strip() + + # Calculate and print Levenshtein distances + print(f"{levenshtein_distance(first_word, second_word) = }") + print(f"{levenshtein_distance_optimized(first_word, second_word) = }") - result = levenshtein_distance(first_word, second_word) - print(f"Levenshtein distance between {first_word} and {second_word} is {result}") + # Benchmark the Levenshtein distance functions + benchmark_levenshtein_distance(levenshtein_distance) + benchmark_levenshtein_distance(levenshtein_distance_optimized) diff --git a/web_programming/get_imdbtop.py b/web_programming/get_imdbtop.py.DISABLED similarity index 100% rename from web_programming/get_imdbtop.py rename to web_programming/get_imdbtop.py.DISABLED