# Python Scripting

# 1. Write Python scripts for basic file operations and data processing?

Scenario Used in Example
Suppose we have a text file students.txt that contains student names and scores:

Alice,85
Bob,78
Charlie,92
David,60
Eva,99


We will:

Read the data

Parse and process it (e.g., filter students who scored > 80)

Write the result to a new file top_students.txt

In [2]:
import os

# 1. File path setup
input_file = 'students.txt'
output_file = 'top_students.txt'
append_file = 'log.txt'


In [3]:
# 2. Write some sample data to input file (for demonstration)
with open(input_file, 'w') as f:
    f.write("Alice,85\nBob,78\nCharlie,92\nDavid,60\nEva,99\n")

In [4]:
# 3. Read and process data
top_students = []

with open(input_file, 'r') as file:
    for line in file:
        name, score_str = line.strip().split(',')
        score = int(score_str)
        if score > 80:
            top_students.append((name, score))


In [5]:
# 4. Write processed data to a new file
with open(output_file, 'w') as f:
    for name, score in top_students:
        f.write(f"{name},{score}\n")

In [6]:
# 5. Append log entry to another file
with open(append_file, 'a') as log:
    log.write("Filtered top students written to top_students.txt\n")

In [7]:

# 6. Displaying results (for confirmation)
print("Top students with score > 80:")
for name, score in top_students:
    print(f"{name}: {score}")


Top students with score > 80:
Alice: 85
Charlie: 92
Eva: 99


# File/Directory Manipulation

In [8]:
import os
from pathlib import Path
import shutil
import glob

In [9]:
# Create a new directory
Path("example_dir").mkdir(exist_ok=True)

In [10]:
# Create some sample files
for i in range(3):
    Path(f"example_dir/file_{i}.txt").write_text(f"This is file {i}")

# List all .txt files using glob
txt_files = glob.glob("example_dir/*.txt")
print("Text files:", txt_files)


Text files: ['example_dir\\file_0.txt', 'example_dir\\file_1.txt', 'example_dir\\file_2.txt']


In [11]:
# Move one file to a new folder
Path("archive").mkdir(exist_ok=True)
shutil.move(txt_files[0], "archive/")

'archive/file_0.txt'

In [12]:
# Rename a file
os.rename("example_dir/file_1.txt", "example_dir/renamed_file.txt")

In [13]:

# Remove a file
os.remove("example_dir/file_2.txt")

In [14]:
# Clean up: delete directories
shutil.rmtree("example_dir")
shutil.rmtree("archive")

# Parsing and Transforming Data (String & Regex)

In [15]:
import re

data = [
    "John, Age: 28",
    "Maria, Age: 35",
    "David, Age: 22"
]

# Extract names and ages using regex
parsed_data = []

for entry in data:
    match = re.match(r"(\w+), Age: (\d+)", entry)
    if match:
        name, age = match.groups()
        parsed_data.append((name, int(age)))

# Transform: only people older than 25
older_people = [f"{name} ({age})" for name, age in parsed_data if age > 25]

print("People older than 25:", older_people)


People older than 25: ['John (28)', 'Maria (35)']


# Email Validation with Regex

In [16]:
import re

def is_valid_email(email: str) -> bool:
    """
    Validates an email address using a regular expression.

    Args:
        email (str): The email address string to validate.

    Returns:
        bool: True if the email is valid, False otherwise.
    """
    email_pattern = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}$")

    # re.match() checks for a match only at the beginning of the string.
    # If the entire string should match the pattern, re.fullmatch() is also an option.
    # For simple validation where the pattern should cover the whole string,
    # including ^ and $ in the pattern with re.match() achieves the same.
    if email_pattern.match(email):
        return True
    else:
        return False

# --- Example Usage ---
print("--- Email Validation Examples ---")
print(f"'test@example.com' is valid: {is_valid_email('test@example.com')}")
print(f"'john.doe123@sub.domain.co.uk' is valid: {is_valid_email('john.doe123@sub.domain.co.uk')}")
print(f"'invalid-email' is valid: {is_valid_email('invalid-email')}")
print(f"'user@.com' is valid: {is_valid_email('user@.com')}")
print(f"'user@domain' is valid: {is_valid_email('user@domain')}")
print(f"'user@domain.c' is valid: {is_valid_email('user@domain.c')}") 
print(f"'user@domain.commm' is valid: {is_valid_email('user@domain.commm')}") 
print(f"'firstname.lastname@example.org' is valid: {is_valid_email('firstname.lastname@example.org')}")
print(f"'email@example.test.net' is valid: {is_valid_email('email@example.test.net')}")
print(f"'email@123.456.789.10' is valid: {is_valid_email('email@123.456.789.10')}")

--- Email Validation Examples ---
'test@example.com' is valid: True
'john.doe123@sub.domain.co.uk' is valid: True
'invalid-email' is valid: False
'user@.com' is valid: False
'user@domain' is valid: False
'user@domain.c' is valid: False
'user@domain.commm' is valid: True
'firstname.lastname@example.org' is valid: True
'email@example.test.net' is valid: True
'email@123.456.789.10' is valid: False


# Phone Number Validation with Country codes

In [17]:
import re

def is_valid_phone(phone_number: str) -> bool:
    """
    Validates a phone number using a regular expression, accommodating several common formats.

    Args:
        phone_number (str): The phone number string to validate.

    Returns:
        bool: True if the phone number is valid, False otherwise.
    """
    phone_pattern = re.compile(r"^((\+1|1)\s*[-\.\s]?)?(\(?\d{3}\)?|\d{3})[-\.\s]?\d{3}[-\.\s]?\d{4}$")

    # re.match() checks for a match only at the beginning of the string.
    # For full string validation, including ^ and $ in the pattern with re.match() is effective.
    if phone_pattern.match(phone_number):
        return True
    else:
        return False

# --- Example Usage ---
print("\n--- Phone Number Validation Examples ---")
print(f"'(123) 456-7890' is valid: {is_valid_phone('(123) 456-7890')}")
print(f"'123-456-7890' is valid: {is_valid_phone('123-456-7890')}")
print(f"'123.456.7890' is valid: {is_valid_phone('123.456.7890')}")
print(f"'1234567890' is valid: {is_valid_phone('1234567890')}")
print(f"'+1 123-456-7890' is valid: {is_valid_phone('+1 123-456-7890')}")
print(f"'1-123-456-7890' is valid: {is_valid_phone('1-123-456-7890')}")
print(f"'invalid-phone' is valid: {is_valid_phone('invalid-phone')}")
print(f"'123-4567' is valid: {is_valid_phone('123-4567')}") # Too short
print(f"'(123)X456-7890' is valid: {is_valid_phone('(123)X456-7890')}") # Invalid character 'X'


--- Phone Number Validation Examples ---
'(123) 456-7890' is valid: True
'123-456-7890' is valid: True
'123.456.7890' is valid: True
'1234567890' is valid: True
'+1 123-456-7890' is valid: True
'1-123-456-7890' is valid: True
'invalid-phone' is valid: False
'123-4567' is valid: False
'(123)X456-7890' is valid: False


# Enhanced Phone Number Validation with Country Codes

In [18]:
import re

def validate_phone_number(phone_number: str) -> str:
    """
    Validates a phone number against country-specific patterns and categorizes it.

    Args:
        phone_number (str): The phone number string to validate.

    Returns:
        str: A string indicating the validation status (e.g., "Valid - India",
             "Valid - US/Canada", "Valid - UK", "Valid - Australia",
             "Valid - Other International", "Invalid", "Spam").
    """

    # Clean the phone number by removing common non-digit characters
    # (except for the leading + sign, which is handled by regex).
    cleaned_number = re.sub(r'[()\s.-]', '', phone_number)

    # Define regex patterns for different countries
    # Each pattern includes optional country codes and common separators.
    # The patterns are compiled for efficiency.

    # 1. India (+91)
    # Allows +91, 0, or no prefix, followed by 10 digits.
    # Indian mobile numbers typically start with 6, 7, 8, or 9.
    india_pattern = re.compile(r"^((\+91|0)?)?[6789]\d{9}$")
    if india_pattern.fullmatch(cleaned_number):
        return "Valid - India"

    # 2. US/Canada (+1)
    # Allows +1, 1, or no prefix. Supports (XXX) XXX-XXXX, XXX-XXX-XXXX, XXXXXXXXXX.
    # The original pattern was quite robust for NA numbers.
    us_canada_pattern = re.compile(r"^((\+1|1)?)?(\d{3})(\d{3})(\d{4})$")
    if us_canada_pattern.fullmatch(cleaned_number):
        return "Valid - US/Canada"

    # 3. United Kingdom (+44)
    # Allows +44, 0, or no prefix. UK mobile numbers generally start with 7 and are 10 digits long (after the 0 or +44).
    # This is a simplified pattern for common mobile numbers.
    uk_pattern = re.compile(r"^((\+44|0)?)?7\d{9}$")
    if uk_pattern.fullmatch(cleaned_number):
        return "Valid - UK"

    # 4. Australia (+61)
    # Allows +61, 0, or no prefix. Australian mobile numbers generally start with 4 and are 9 digits long (after the 0 or +61).
    australia_pattern = re.compile(r"^((\+61|0)?)?4\d{8}$")
    if australia_pattern.fullmatch(cleaned_number):
        return "Valid - Australia"

    # 5. Generic International (starts with + and has at least 7 digits, but not too many)
    # This is a very broad pattern for numbers that start with '+' but don't fit specific country patterns above.
    # It assumes international numbers are generally between 7 and 15 digits long after the '+'.
    international_pattern = re.compile(r"^\+\d{7,15}$")
    if international_pattern.fullmatch(cleaned_number):
        return "Valid - Other International"

    # If none of the above patterns match, consider it invalid.
    # You can customize the "Spam" criteria further if needed.
    # For this example, any number that doesn't fit a valid pattern is marked as "Spam".
    return "Spam - Unusual/Invalid Format"

# --- Example Usage ---
print("--- Phone Number Validation Examples ---")

# Indian Numbers
print(f"'+919876543210' status: {validate_phone_number('+919876543210')}")
print(f"'09876543210' status: {validate_phone_number('09876543210')}")
print(f"'9876543210' status: {validate_phone_number('9876543210')}")
print(f"'+91 98765 43210' status: {validate_phone_number('+91 98765 43210')}") # With spaces, will be cleaned

# US/Canada Numbers
print(f"'(123) 456-7890' status: {validate_phone_number('(123) 456-7890')}")
print(f"'123-456-7890' status: {validate_phone_number('123-456-7890')}")
print(f"'123.456.7890' status: {validate_phone_number('123.456.7890')}")
print(f"'1234567890' status: {validate_phone_number('1234567890')}")
print(f"'+1 123-456-7890' status: {validate_phone_number('+1 123-456-7890')}")

# UK Numbers
print(f"'+447911123456' status: {validate_phone_number('+447911123456')}")
print(f"'07911123456' status: {validate_phone_number('07911123456')}")
print(f"'7911123456' status: {validate_phone_number('7911123456')}") # No prefix, but matches pattern

# Australia Numbers
print(f"'+61412345678' status: {validate_phone_number('+61412345678')}")
print(f"'0412345678' status: {validate_phone_number('0412345678')}")
print(f"'412345678' status: {validate_phone_number('412345678')}") # No prefix, but matches pattern

# Other International Numbers
print(f"'+5511987654321' status: {validate_phone_number('+5511987654321')}") # Example Brazil
print(f"'+8613800138000' status: {validate_phone_number('+8613800138000')}") # Example China

# Spam/Invalid Numbers
print(f"'invalid-phone' status: {validate_phone_number('invalid-phone')}")
print(f"'123-4567' status: {validate_phone_number('123-4567')}") # Too short
print(f"'(123)X456-7890' status: {validate_phone_number('(123)X456-7890')}") # Invalid character 'X'
print(f"'+91123' status: {validate_phone_number('+91123')}") # Too short for India


--- Phone Number Validation Examples ---
'+919876543210' status: Valid - India
'09876543210' status: Valid - India
'9876543210' status: Valid - India
'+91 98765 43210' status: Valid - India
'(123) 456-7890' status: Valid - US/Canada
'123-456-7890' status: Valid - US/Canada
'123.456.7890' status: Valid - US/Canada
'1234567890' status: Valid - US/Canada
'+1 123-456-7890' status: Valid - US/Canada
'+447911123456' status: Valid - UK
'07911123456' status: Valid - India
'7911123456' status: Valid - India
'+61412345678' status: Valid - Australia
'0412345678' status: Valid - US/Canada
'412345678' status: Valid - Australia
'+5511987654321' status: Valid - Other International
'+8613800138000' status: Valid - Other International
'invalid-phone' status: Spam - Unusual/Invalid Format
'123-4567' status: Spam - Unusual/Invalid Format
'(123)X456-7890' status: Spam - Unusual/Invalid Format
'+91123' status: Spam - Unusual/Invalid Format


# 2. Develop a simple web scraper to extract data from a website?

I have used w3school url to extract information about varibales in python

In [19]:
import requests
from bs4 import BeautifulSoup

def simple_web_scraper(url: str, css_selector: str) -> list[str]:
    """
    A simple web scraper to extract text content based on a CSS selector from a given URL.

    Args:
        url (str): The URL of the webpage to scrape.
        css_selector (str): The CSS selector to target specific elements.

    Returns:
        list[str]: A list of text content extracted from the matched elements.
                   Returns an empty list if no content is found or an error occurs.
    """
    try:
        # Make an HTTP GET request to the specified URL
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)

        # Parse the HTML content of the page
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all elements that match the given CSS selector
        elements = soup.select(css_selector)

        extracted_data = []
        if elements:
            for element in elements:
                # Extract text content and remove leading/trailing whitespace
                text = element.get_text(strip=True)
                extracted_data.append(text)
            return extracted_data
        else:
            return []

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the URL: {e}")
        return []
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return []



if __name__ == "__main__":
    # Target URL for Python Variables on W3Schools
    target_url = "https://www.w3schools.com/python/python_variables.asp"
    # Selector to get paragraphs, headings, list items, and preformatted text (code examples)
    # within the main content area of the page.
    target_selector = "#main p, #main h2, #main ul, #main pre"

    print(f"Attempting to scrape from: {target_url} with selector: '{target_selector}'")
    data = simple_web_scraper(target_url, target_selector)

    if data:
        print("\nExtracted Data:")
        for item in data:
            print(f"- {item}\n---") # Added separator for readability
    else:
        print("\nNo data extracted.")

Attempting to scrape from: https://www.w3schools.com/python/python_variables.asp with selector: '#main p, #main h2, #main ul, #main pre'

Extracted Data:
- Variables
---
- Variables are containers for storing data values.
---
- Creating Variables
---
- Python has no command for declaring a variable.
---
- A variable is created the moment you first assign a value to it.
---
- Variables do not need to be declared with any particulartype, and can even change type after they have been set.
---
- Casting
---
- If you want to specify the data type of a variable, this can be done with casting.
---
- Get the Type
---
- You can get the data type of a variable with thetype()function.
---
- Single or Double Quotes?
---
- String variables can be declared either by using single or double quotes:
---
- Case-Sensitive
---
- Variable names are case-sensitive.
---
- This will create two variables:
---
- Video: Python Variables
---
