diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 14747188..19532baf 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,17 +1,22 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/python { - "name": "Python 3", + // Container definition for a Python 3.11 development environment + "name": "Python 3.11", "image": "mcr.microsoft.com/devcontainers/python:0-3.11", - "onCreateCommand": "sudo apt update && sudo apt upgrade -y && pip3 install --upgrade pip && pip3 install --user -r requirements.txt", + + // Custom configuration options "customizations": { "vscode": { + + // Use 'settings' to set default VS code values on container create "settings": { "jupyter.kernels.excludePythonEnvironments": ["/usr/bin/python3"], - "remote.portsAttributes": { - "ipykernel_launcher": {"onAutoForward": "ignore"} - } + "remote.autoForwardPorts": false, + "remote.restoreForwardedPorts": false }, + + // Add the IDs of VS code extensions you want to install here "extensions": [ "-dbaeumer.vscode-eslint", "ms-python.python", @@ -20,5 +25,10 @@ ] } }, + + // Use 'onCreateCommand' to run commands once when the container is created + "onCreateCommand": "sudo apt update && sudo apt upgrade -y && pip3 install --upgrade pip && pip3 install --user -r requirements.txt", + + // Use 'postAttachCommand' to run commands each time a user connects to the container "postAttachCommand": "htop" } \ No newline at end of file diff --git a/.gitignore b/.gitignore index 54f2a929..22feac23 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ -.vscode +__pycache__ .ipynb_checkpoints +.vscode +.venv diff --git a/README.md b/README.md index 5e6d637c..dcd6b86d 100644 --- a/README.md +++ b/README.md @@ -1,69 +1,131 @@ -# Algorithm Optimization Project +# Algorithm Optimization Project - Machine Learning -![Preview](assets/preview.png) +[![Codespaces Prebuilds](https://github.com/4GeeksAcademy/gperdrizet-algorithm-optimization-project-machine-learning/actions/workflows/codespaces/create_codespaces_prebuilds/badge.svg)](https://github.com/4GeeksAcademy/gperdrizet-algorithm-optimization-project-machine-learning/actions/workflows/codespaces/create_codespaces_prebuilds) -This repository contains exercises designed to help you practice optimizing Python algorithms for better performance and readability. +A comprehensive programming optimization project focused on improving algorithm efficiency and code performance. This project demonstrates essential optimization techniques through practical exercises involving text processing and list operations. -## What You'll Learn +![Project Preview](assets/preview.png) -- Text processing optimization techniques -- Efficient list operations and filtering -- Using Python's built-in functions and data structures -- Code modularity and best practices -- Performance analysis and improvement strategies -## Assignment Overview +## Project Overview -The `problems.ipynb` notebook contains two main exercises: +This project focuses on algorithm optimization through two main exercises that teach fundamental performance improvement techniques: -1. **Text Processing Optimization** - Improve code that processes text by converting to lowercase, removing punctuation, counting word frequencies, and finding the most common words. +**Exercise 1: Text Processing Optimization** +- Convert text to lowercase +- Remove punctuation marks efficiently +- Count word frequencies +- Extract most common words -2. **List Processing Optimization** - Enhance code that filters even numbers, duplicates values, sums results, and checks for prime numbers. +**Exercise 2: List Processing Optimization** +- Filter even numbers from lists +- Duplicate list elements +- Sum numerical values +- Prime number detection + +The project provides hands-on experience with: +- Code refactoring and optimization +- Efficient data structure usage +- Python built-in function utilization +- Modular programming practices +- Performance analysis and improvement -Each exercise includes working but inefficient code that you'll optimize using better algorithms, data structures, and Python idioms. ## Getting Started ### Option 1: GitHub Codespaces (Recommended) -1. Fork this repository to your GitHub account -2. Click the green "Code" button on your forked repository -3. Select "Codespaces" tab -4. Click "Create codespace on main" -5. Wait for the environment to load (this may take a few minutes) -6. Open `problems.ipynb` and start working! + +1. **Fork the Repository** + - Click the "Fork" button on the top right of the GitHub repository page + - 4Geeks students: set 4GeeksAcademy as the owner - 4Geeks pays for your codespace usage. All others, set yourself as the owner + - Give the fork a descriptive name. 4Geeks students: I recommend including your GitHub username to help in finding the fork if you loose the link + - Click "Create fork" + - 4Geeks students: bookmark or otherwise save the link to your fork + +2. **Create a GitHub Codespace** + - On your forked repository, click the "Code" button + - Select "Create codespace on main" + - If the "Create codespace on main" option is grayed out - go to your codespaces list from the three-bar menu at the upper left and delete an old codespace + - Wait for the environment to load (dependencies are pre-installed) + +3. **Start Working** + - Open `notebooks/assignment.ipynb` in the Jupyter interface + - Follow the step-by-step instructions in the notebook ### Option 2: Local Development -1. Fork and clone this repository -2. Create a virtual environment: `python -m venv venv` -3. Activate the virtual environment: - - On Windows: `venv\Scripts\activate` - - On macOS/Linux: `source venv/bin/activate` -4. Install Jupyter: `pip install jupyter` -5. Install dependencies: `pip install -r requirements.txt` -6. Launch Jupyter: `jupyter notebook` -7. Open `problems.ipynb` - -## Working with the Notebook - -- Each exercise contains the original inefficient code followed by optimization points -- Review the provided solutions as reference implementations -- Try implementing your own optimizations before checking the solutions -- Run each cell to test your code and compare performance - -## Learning Goals - -By completing this assignment, you will: -- Understand common performance bottlenecks in Python code -- Learn to use appropriate data structures for different problems -- Practice writing clean, modular, and efficient code -- Gain experience with Python's built-in optimization tools - -## Assessment - -Focus on: -- **Correctness**: Does your optimized code produce the same results? -- **Efficiency**: Is your solution faster and more memory-efficient? -- **Readability**: Is your code clean and well-structured? -- **Best Practices**: Are you using appropriate Python idioms? - -Happy coding! + +1. **Prerequisites** + - Git + - Python >= 3.10 + +2. **Fork the repository** + - Click the "Fork" button on the top right of the GitHub repository page + - Optional: give the fork a new name and/or description + - Click "Create fork" + +3. **Clone the repository** + - From your fork of the repository, click the green "Code" button at the upper right + - From the "Local" tab, select HTTPS and copy the link + - Run the following commands on your machine, replacing `` and `` + + ```bash + git clone + cd + ``` + +4. **Set Up Environment** + + ```bash + python -m venv venv + source venv/bin/activate + pip install -r requirements.txt + ``` + +5. **Launch Jupyter & start the notebook** + ```bash + jupyter notebook notebooks/assignment.ipynb + ``` + + +## Project Structure + +``` +├── .devcontainer/ # Development container configuration +├── assets/ # Files and resources directory +│ +├── notebooks/ # Jupyter notebook directory +│ ├── assignment.ipynb # Assignment notebook with exercises +│ └── solution.ipynb # Solution notebook with optimized code +│ +├── .gitignore # Files/directories not tracked by git +├── requirements.txt # Python dependencies +└── README.md # Project documentation +``` + + +## Learning Objectives + +1. **Algorithm Analysis**: Identify performance bottlenecks in existing code +2. **Data Structure Optimization**: Use appropriate Python data structures for efficiency +3. **Built-in Functions**: Leverage Python's optimized built-in functions +4. **List Comprehensions**: Replace loops with more efficient comprehensions +5. **Modular Design**: Break code into focused, reusable functions +6. **Performance Comparison**: Understand the impact of different approaches + +## Technologies Used + +- **Python 3.11**: Core programming language +- **Collections**: Counter for efficient frequency counting +- **String**: Built-in string processing utilities +- **Math**: Mathematical operations and functions +- **Jupyter**: Interactive development environment + + +## Contributing + +This is an educational project. Contributions for improving the optimization examples or adding new exercises are welcome: + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Submit a pull request diff --git a/notebooks/assignment.ipynb b/notebooks/assignment.ipynb new file mode 100644 index 00000000..69225b4d --- /dev/null +++ b/notebooks/assignment.ipynb @@ -0,0 +1,223 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "275f1ad9", + "metadata": {}, + "outputs": [], + "source": [ + "import string\n", + "from collections import Counter" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "27b09e13", + "metadata": {}, + "source": [ + "# Optimization of Algorithms problems" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "ed05e9bf", + "metadata": {}, + "source": [ + "## Exercise 1\n", + "### Code Optimization for Text Processing\n", + "\n", + "You are provided with a text processing code to perform the following operations:\n", + "\n", + "1. Convert all text to lowercase.\n", + "2. Remove punctuation marks.\n", + "3. Count the frequency of each word.\n", + "4. Show the 5 most common words.\n", + "\n", + "The code works, but it is inefficient and can be optimized. Your task is to identify areas that can be improved and rewrite those parts to make the code more efficient and readable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8467465b", + "metadata": {}, + "outputs": [], + "source": [ + "def remove_punctuation(text):\n", + " translator = str.maketrans(\"\", \"\", string.punctuation)\n", + " return text.translate(translator)\n", + "\n", + "def count_words(text):\n", + " # Split text into words\n", + " palabras = text.split()\n", + "\n", + " return Counter(palabras)\n", + "\n", + "def get_most_common(frequencies, n = 5):\n", + " return frequencies.most_common(n)\n", + "\n", + "def process_text(text):\n", + " # Text to lowercase\n", + " text = text.lower()\n", + "\n", + " # Remove punctuation\n", + " text = remove_punctuation(text)\n", + " \n", + " # Count frequencies\n", + " frequencies = count_words(text)\n", + " \n", + " top_5 = get_most_common(frequencies)\n", + " \n", + " for w, frequency in top_5:\n", + " print(f\"'{w}': {frequency} veces\")\n", + "\n", + "text = \"\"\"\n", + " In the heart of the city, Emily discovered a quaint little café, hidden away from the bustling streets. \n", + " The aroma of freshly baked pastries wafted through the air, drawing in passersby. As she sipped on her latte, \n", + " she noticed an old bookshelf filled with classics, creating a cozy atmosphere that made her lose track of time.\n", + "\"\"\"\n", + "\n", + "process_text(text)" + ] + }, + { + "cell_type": "markdown", + "id": "29040779", + "metadata": {}, + "source": [ + "Points to optimize:\n", + "\n", + "1. **Removal of punctuation marks**: Using `replace` in a loop can be inefficient, especially with long texts. Look for a more efficient way to remove punctuation marks.\n", + "2. **Frequency count**: The code checks for the existence of each word in the dictionary and then updates its count. This can be done more efficiently with certain data structures in Python.\n", + "3. **Sort and select:** Consider if there is a more direct or efficient way to get the 5 most frequent words without sorting all the words.\n", + "4. **Modularity**: Break the code into smaller functions so that each one performs a specific task. This will not only optimize performance, but also make the code more readable and maintainable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57cd6641", + "metadata": {}, + "outputs": [], + "source": [ + "# Your code here\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "011996bc", + "metadata": {}, + "source": [ + "## Exercise 2\n", + "### Code Optimization for List Processing\n", + "\n", + "You have been given a code that performs operations on a list of numbers for:\n", + "\n", + "1. Filter out even numbers.\n", + "2. Duplicate each number.\n", + "3. Add all numbers.\n", + "4. Check if the result is a prime number.\n", + "\n", + "The code provided achieves its goal, but it may be inefficient. Your task is to identify and improve the parts of the code to increase its efficiency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "783d03a0", + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "\n", + "def is_prime(n):\n", + " if n <= 1:\n", + " return False\n", + " for i in range(2, int(math.sqrt(n)) + 1):\n", + " if n % i == 0:\n", + " return False\n", + " return True\n", + "\n", + "def process_list(list_):\n", + " filtered_list = []\n", + " for num in list_:\n", + " if num % 2 == 0:\n", + " filtered_list.append(num)\n", + " \n", + " duplicate_list = []\n", + " for num in filtered_list:\n", + " duplicate_list.append(num * 2)\n", + " \n", + " sum = 0\n", + " for num in duplicate_list:\n", + " sum += num\n", + "\n", + " prime = is_prime(sum)\n", + " \n", + " return sum, prime\n", + "\n", + "nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", + "result, result_prime = process_list(nums)\n", + "print(f\"Result: {result}, ¿Prime? {'Yes' if result_prime else 'No'}\")" + ] + }, + { + "cell_type": "markdown", + "id": "128d564e", + "metadata": {}, + "source": [ + "Points to optimize:\n", + "\n", + "1. **Filter numbers**: The code goes through the original list to filter out even numbers. Consider a more efficient way to filter the list.\n", + "2. **Duplication**: The list is traversed multiple times. Is there a way to do this more efficiently?\n", + "3. **Summing**: The numbers in a list are summed through a loop. Python has built-in functions that can optimize this.\n", + "4. **Function `is_prime`**: While this function is relatively efficient, investigate if there are ways to make it even faster.\n", + "5. **Modularity**: Consider breaking the code into smaller functions, each focused on a specific task." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f40e35d6", + "metadata": {}, + "outputs": [], + "source": [ + "# Your code here\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "1af70806", + "metadata": {}, + "source": [ + "Both exercises will help you improve your code performance optimization skills and give you a better understanding of how different data structures and programming techniques can affect the efficiency of your code." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/problems.ipynb b/notebooks/solution.ipynb similarity index 99% rename from problems.ipynb rename to notebooks/solution.ipynb index 4530e5a5..43ed52d8 100644 --- a/problems.ipynb +++ b/notebooks/solution.ipynb @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "8467465b", "metadata": {}, "outputs": [ @@ -78,7 +78,7 @@ " # Remove punctuation\n", " text = remove_punctuation(text)\n", " \n", - " # Count frecuencies\n", + " # Count frequencies\n", " frequencies = count_words(text)\n", " \n", " top_5 = get_most_common(frequencies)\n", diff --git a/requirements.txt b/requirements.txt index 0b848732..ac72e6e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -ipykernel \ No newline at end of file +jupyter==1.1.1 \ No newline at end of file