# Working with Libraries

## Learning Objectives
By the end of this lesson, you will be able to:
- Install and manage Python packages using pip
- Work with virtual environments for project isolation
- Use popular standard library modules effectively
- Handle third-party library dependencies
- Create and publish your own packages

## Core Concepts
- **pip**: Python package installer
- **Virtual Environment**: Isolated Python environment for projects
- **PyPI**: Python Package Index (repository of packages)
- **requirements.txt**: File listing project dependencies
- **Standard Library**: Built-in Python modules

# 1. Standard Library Modules

In [None]:
# datetime - Working with dates and times
from datetime import datetime, timedelta, date
import calendar

now = datetime.now()
print(f"Current time: {now}")
print(f"Formatted: {now.strftime('%Y-%m-%d %H:%M:%S')}")

# Date arithmetic
tomorrow = now + timedelta(days=1)
last_week = now - timedelta(weeks=1)
print(f"Tomorrow: {tomorrow.date()}")
print(f"Last week: {last_week.date()}")

# os and pathlib - File system operations
import os
from pathlib import Path

print(f"Current directory: {os.getcwd()}")
print(f"Home directory: {Path.home()}")

# Create a test directory structure
test_dir = Path("test_project")
test_dir.mkdir(exist_ok=True)
(test_dir / "src").mkdir(exist_ok=True)
(test_dir / "docs").mkdir(exist_ok=True)

# List directory contents
print(f"Test directory contents: {list(test_dir.iterdir())}")

# collections - Specialized container datatypes
from collections import Counter, defaultdict, namedtuple

# Counter for counting items
text = "hello world hello python world"
word_counts = Counter(text.split())
print(f"Word counts: {word_counts}")
print(f"Most common: {word_counts.most_common(2)}")

# defaultdict for automatic default values
grades = defaultdict(list)
grades['Alice'].append(85)
grades['Bob'].append(92)
grades['Alice'].append(78)
print(f"Grades: {dict(grades)}")

# namedtuple for structured data
Point = namedtuple('Point', ['x', 'y'])
p1 = Point(3, 4)
print(f"Point: {p1}, Distance from origin: {(p1.x**2 + p1.y**2)**0.5}")

# itertools - Iterator functions
import itertools

# combinations and permutations
colors = ['red', 'blue', 'green']
print(f"Combinations: {list(itertools.combinations(colors, 2))}")
print(f"Permutations: {list(itertools.permutations(colors, 2))}")

# cycle and repeat
cycler = itertools.cycle(['A', 'B', 'C'])
print(f"First 10 cycled items: {[next(cycler) for _ in range(10)]}")

# random - Generate random numbers
import random

random.seed(42)  # For reproducible results
print(f"Random integer: {random.randint(1, 100)}")
print(f"Random float: {random.random()}")
print(f"Random choice: {random.choice(['apple', 'banana', 'cherry'])}")

numbers = list(range(1, 11))
random.shuffle(numbers)
print(f"Shuffled numbers: {numbers}")
print(f"Random sample: {random.sample(numbers, 3)}")

# 2. Package Management with pip

In [None]:
# pip commands (run these in terminal, shown here for reference)
pip_commands = """
# Install a package
pip install requests

# Install specific version
pip install requests==2.28.0

# Install from requirements file
pip install -r requirements.txt

# List installed packages
pip list

# Show package information
pip show requests

# Upgrade a package
pip install --upgrade requests

# Uninstall a package
pip uninstall requests

# Create requirements file
pip freeze > requirements.txt

# Install in development mode (for local packages)
pip install -e .
"""

print("Common pip commands:")
print(pip_commands)

# Working with requirements.txt
requirements_content = """
requests>=2.25.0
beautifulsoup4==4.11.1
pandas>=1.3.0,<2.0.0
numpy
pytest>=6.0.0
"""

with open("requirements.txt", "w") as f:
    f.write(requirements_content)

print("Created requirements.txt file")

# Check installed packages (simulation)
import sys
import pkg_resources

installed_packages = [d.project_name for d in pkg_resources.working_set]
common_packages = ['pip', 'setuptools', 'wheel']

print(f"Sample installed packages: {[pkg for pkg in installed_packages if pkg in common_packages]}")

# Virtual environment commands (for reference)
venv_commands = """
# Create virtual environment
python -m venv myproject_env

# Activate (Windows)
myproject_env\\Scripts\\activate

# Activate (macOS/Linux)
source myproject_env/bin/activate

# Deactivate
deactivate

# Install packages in virtual environment
pip install requests pandas

# Create requirements from virtual environment
pip freeze > requirements.txt
"""

print("\\nVirtual environment commands:")
print(venv_commands)

# Package version checking
def check_package_version(package_name):
    """Check if a package is installed and return its version"""
    try:
        version = pkg_resources.get_distribution(package_name).version
        return f"{package_name}: {version}"
    except pkg_resources.DistributionNotFound:
        return f"{package_name}: Not installed"

# Check some common packages
test_packages = ['pip', 'setuptools', 'wheel', 'requests', 'numpy']
for package in test_packages:
    print(check_package_version(package))

# 3. Popular Third-Party Libraries

In [None]:
# Note: These examples show how to use popular libraries
# Install these with: pip install requests beautifulsoup4 pillow

# requests - HTTP library (simulation of usage)
requests_example = """
import requests

# GET request
response = requests.get('https://api.github.com/users/octocat')
if response.status_code == 200:
    user_data = response.json()
    print(f"User: {user_data['name']}")
    print(f"Public repos: {user_data['public_repos']}")

# POST request with data
data = {'key': 'value', 'name': 'Python User'}
response = requests.post('https://httpbin.org/post', json=data)
print(f"Response status: {response.status_code}")

# Session for persistent connections
session = requests.Session()
session.headers.update({'User-Agent': 'My Python App'})
response = session.get('https://httpbin.org/headers')
"""

print("requests library example:")
print(requests_example)

# beautifulsoup4 - HTML parsing (simulation)
bs4_example = """
from bs4 import BeautifulSoup
import requests

# Parse HTML content
html_content = '''
<html>
    <head><title>Sample Page</title></head>
    <body>
        <h1>Welcome</h1>
        <p class="intro">This is a paragraph.</p>
        <ul>
            <li>Item 1</li>
            <li>Item 2</li>
        </ul>
    </body>
</html>
'''

soup = BeautifulSoup(html_content, 'html.parser')

# Find elements
title = soup.find('title').text
heading = soup.find('h1').text
intro = soup.find('p', class_='intro').text
items = [li.text for li in soup.find_all('li')]

print(f"Title: {title}")
print(f"Heading: {heading}")
print(f"Items: {items}")
"""

print("\\nBeautifulSoup example:")
print(bs4_example)

# Common libraries overview
libraries_overview = {
    "Web Development": {
        "requests": "HTTP library for API calls",
        "flask": "Lightweight web framework",
        "django": "Full-featured web framework",
        "fastapi": "Modern API framework"
    },
    "Data Science": {
        "numpy": "Numerical computing arrays",
        "pandas": "Data manipulation and analysis",
        "matplotlib": "Data visualization",
        "seaborn": "Statistical data visualization",
        "scikit-learn": "Machine learning library"
    },
    "Image Processing": {
        "pillow": "Image processing library",
        "opencv-python": "Computer vision library"
    },
    "Testing": {
        "pytest": "Testing framework",
        "unittest": "Built-in testing framework",
        "mock": "Mock object library"
    },
    "Database": {
        "sqlalchemy": "SQL toolkit and ORM",
        "psycopg2": "PostgreSQL adapter",
        "pymongo": "MongoDB driver"
    }
}

print("\\nPopular Python libraries by category:")
for category, libs in libraries_overview.items():
    print(f"\\n{category}:")
    for lib, description in libs.items():
        print(f"  - {lib}: {description}")

# Creating a simple package structure (setup.py example)
setup_py_content = '''
from setuptools import setup, find_packages

setup(
    name="my-package",
    version="0.1.0",
    author="Your Name",
    author_email="your.email@example.com",
    description="A sample Python package",
    long_description=open("README.md").read(),
    long_description_content_type="text/markdown",
    url="https://github.com/yourusername/my-package",
    packages=find_packages(),
    classifiers=[
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Developers",
        "License :: OSI Approved :: MIT License",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
    ],
    python_requires=">=3.8",
    install_requires=[
        "requests>=2.25.0",
        "click>=7.0",
    ],
    extras_require={
        "dev": [
            "pytest>=6.0",
            "black",
            "flake8",
        ],
    },
    entry_points={
        "console_scripts": [
            "my-tool=my_package.cli:main",
        ],
    },
)
'''

print("\\nsetup.py example for creating packages:")
print(setup_py_content)

# Practice Exercises

In [None]:
# Exercise 1: Create a utility library using standard modules
import json
import csv
import logging
from datetime import datetime
from pathlib import Path

class DataManager:
    """A utility class for managing different data formats"""
    
    def __init__(self, base_dir="data"):
        self.base_dir = Path(base_dir)
        self.base_dir.mkdir(exist_ok=True)
        
        # Setup logging
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s'
        )
        self.logger = logging.getLogger(__name__)
    
    def save_json(self, data, filename):
        """Save data as JSON file"""
        filepath = self.base_dir / f"{filename}.json"
        try:
            with open(filepath, 'w') as f:
                json.dump(data, f, indent=2, default=str)
            self.logger.info(f"Saved JSON data to {filepath}")
            return True
        except Exception as e:
            self.logger.error(f"Failed to save JSON: {e}")
            return False
    
    def load_json(self, filename):
        """Load data from JSON file"""
        filepath = self.base_dir / f"{filename}.json"
        try:
            with open(filepath, 'r') as f:
                data = json.load(f)
            self.logger.info(f"Loaded JSON data from {filepath}")
            return data
        except Exception as e:
            self.logger.error(f"Failed to load JSON: {e}")
            return None
    
    def save_csv(self, data, filename, headers=None):
        """Save list of dictionaries as CSV"""
        filepath = self.base_dir / f"{filename}.csv"
        try:
            with open(filepath, 'w', newline='') as f:
                if data and isinstance(data[0], dict):
                    writer = csv.DictWriter(f, fieldnames=headers or data[0].keys())
                    writer.writeheader()
                    writer.writerows(data)
                else:
                    writer = csv.writer(f)
                    if headers:
                        writer.writerow(headers)
                    writer.writerows(data)
            self.logger.info(f"Saved CSV data to {filepath}")
            return True
        except Exception as e:
            self.logger.error(f"Failed to save CSV: {e}")
            return False
    
    def load_csv(self, filename, as_dict=True):
        """Load data from CSV file"""
        filepath = self.base_dir / f"{filename}.csv"
        try:
            with open(filepath, 'r') as f:
                if as_dict:
                    reader = csv.DictReader(f)
                    data = list(reader)
                else:
                    reader = csv.reader(f)
                    data = list(reader)
            self.logger.info(f"Loaded CSV data from {filepath}")
            return data
        except Exception as e:
            self.logger.error(f"Failed to load CSV: {e}")
            return None

# Test the DataManager
dm = DataManager()

# Test JSON operations
sample_data = {
    "users": [
        {"name": "Alice", "age": 30, "city": "New York"},
        {"name": "Bob", "age": 25, "city": "San Francisco"}
    ],
    "timestamp": datetime.now(),
    "total_users": 2
}

dm.save_json(sample_data, "users")
loaded_data = dm.load_json("users")
print(f"Loaded data: {loaded_data}")

# Test CSV operations
csv_data = [
    {"name": "Alice", "score": 95, "subject": "Math"},
    {"name": "Bob", "score": 87, "subject": "Math"},
    {"name": "Charlie", "score": 92, "subject": "Science"}
]

dm.save_csv(csv_data, "scores")
loaded_csv = dm.load_csv("scores")
print(f"Loaded CSV: {loaded_csv}")

# Exercise 2: Package dependency analyzer
import subprocess
import sys
from collections import defaultdict

class PackageAnalyzer:
    """Analyze installed packages and their dependencies"""
    
    def __init__(self):
        self.packages = {}
        self.dependencies = defaultdict(list)
    
    def get_installed_packages(self):
        """Get list of installed packages (simulation)"""
        # In real scenario: result = subprocess.run([sys.executable, '-m', 'pip', 'list'], ...)
        # For demo, we'll simulate some packages
        simulated_packages = [
            "pip==23.0.1",
            "setuptools==65.5.0",
            "wheel==0.38.4",
            "requests==2.28.2",
            "urllib3==1.26.14",
            "certifi==2022.12.7",
            "charset-normalizer==3.0.1",
            "idna==3.4"
        ]
        
        for pkg in simulated_packages:
            name, version = pkg.split('==')
            self.packages[name] = version
        
        return self.packages
    
    def analyze_package_sizes(self):
        """Analyze package information"""
        # Simulate package analysis
        size_data = {
            "pip": {"size": "9.2MB", "files": 215},
            "setuptools": {"size": "2.1MB", "files": 178},
            "requests": {"size": "0.5MB", "files": 32},
            "urllib3": {"size": "0.3MB", "files": 28}
        }
        return size_data
    
    def check_outdated(self):
        """Check for outdated packages (simulation)"""
        outdated = {
            "pip": {"current": "23.0.1", "latest": "23.1.2"},
            "setuptools": {"current": "65.5.0", "latest": "67.6.1"}
        }
        return outdated
    
    def generate_requirements(self, exclude_system=True):
        """Generate requirements.txt content"""
        system_packages = {"pip", "setuptools", "wheel"}
        requirements = []
        
        for name, version in self.packages.items():
            if exclude_system and name in system_packages:
                continue
            requirements.append(f"{name}=={version}")
        
        return "\\n".join(requirements)

# Test the analyzer
analyzer = PackageAnalyzer()
packages = analyzer.get_installed_packages()
print(f"Installed packages: {len(packages)}")

size_info = analyzer.analyze_package_sizes()
print("\\nPackage sizes:")
for pkg, info in size_info.items():
    print(f"  {pkg}: {info['size']} ({info['files']} files)")

outdated = analyzer.check_outdated()
print("\\nOutdated packages:")
for pkg, versions in outdated.items():
    print(f"  {pkg}: {versions['current']} -> {versions['latest']}")

requirements = analyzer.generate_requirements()
print("\\nGenerated requirements.txt:")
print(requirements)

# Exercise 3: Simple web scraper (simulation)
import re
from urllib.parse import urljoin, urlparse

class WebScraper:
    """Simple web scraper simulation"""
    
    def __init__(self):
        self.session_data = {}
    
    def simulate_web_request(self, url):
        """Simulate a web request response"""
        # Simulate different website responses
        if "github.com" in url:
            return {
                "status_code": 200,
                "content": '''
                <html>
                    <head><title>GitHub</title></head>
                    <body>
                        <h1>Welcome to GitHub</h1>
                        <div class="repo">
                            <h3>python/cpython</h3>
                            <p>The Python programming language</p>
                            <span class="stars">45.2k stars</span>
                        </div>
                        <div class="repo">
                            <h3>microsoft/vscode</h3>
                            <p>Visual Studio Code</p>
                            <span class="stars">137k stars</span>
                        </div>
                    </body>
                </html>
                ''',
                "headers": {"content-type": "text/html"}
            }
        else:
            return {
                "status_code": 404,
                "content": "Not Found",
                "headers": {}
            }
    
    def extract_data(self, html_content):
        """Extract structured data from HTML"""
        # Simple regex-based extraction (in real scenario, use BeautifulSoup)
        title_match = re.search(r'<title>(.*?)</title>', html_content)
        title = title_match.group(1) if title_match else "No title"
        
        # Extract repository information
        repo_pattern = r'<div class="repo">.*?<h3>(.*?)</h3>.*?<p>(.*?)</p>.*?<span class="stars">(.*?)</span>.*?</div>'
        repos = re.findall(repo_pattern, html_content, re.DOTALL)
        
        return {
            "title": title,
            "repositories": [
                {"name": name.strip(), "description": desc.strip(), "stars": stars.strip()}
                for name, desc, stars in repos
            ]
        }
    
    def scrape_site(self, url):
        """Scrape a website and return structured data"""
        print(f"Scraping: {url}")
        response = self.simulate_web_request(url)
        
        if response["status_code"] == 200:
            data = self.extract_data(response["content"])
            return data
        else:
            return {"error": f"Failed to fetch {url}"}

# Test the scraper
scraper = WebScraper()
github_data = scraper.scrape_site("https://github.com")

print("Scraped data from GitHub:")
print(f"Title: {github_data['title']}")
print("Repositories:")
for repo in github_data.get('repositories', []):
    print(f"  - {repo['name']}: {repo['description']} ({repo['stars']})")

# Save scraped data
with open("scraped_data.json", "w") as f:
    json.dump(github_data, f, indent=2)

print("\\nScraped data saved to scraped_data.json")