<a href="https://colab.research.google.com/github/GitMishka/2/blob/main/python_prac.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

2025/3/4 Masking

In [None]:
import pandas as pd

# Sample Data
data = {'Email': ['john.doe@example.com', 'jane.smith@example.com'],
        'Phone': ['123-456-7890', '987-654-3210']}
df = pd.DataFrame(data)

# Define the Masking Functions
def mask_email(email):
    parts = email.split('@')
    masked_local = parts[0][0] + '*' * (len(parts[0]) - 2) + parts[0][-1] if len(parts[0]) > 2 else parts[0] + '*'
    return masked_local + '@' + parts[1]

def mask_phone(phone):
    return '*' * (len(phone) - 4) + phone[-4:]

# Apply Masking Functions
df['Email'] = df['Email'].apply(mask_email)
df['Phone'] = df['Phone'].apply(mask_phone)

df


Hash

In [None]:
import hashlib

def hash_pii(data):
    """Hashes PII using SHA-256."""
    return hashlib.sha256(data.encode()).hexdigest()

# Example Usage
pii_data = "123-45-6789"  # Example SSN
hashed_pii = hash_pii(pii_data)

print("Hashed PII:", hashed_pii)


Hashed PII: 01a54629efb952287e554eb23ef69c52097a75aecc0e3a93ca0855ab6d7a31a0


crypto

In [None]:
from cryptography.fernet import Fernet

# Generate a key and initialize Fernet
key = Fernet.generate_key()
cipher = Fernet(key)

def encrypt_data(data):
    """Encrypts PII using Fernet encryption."""
    return cipher.encrypt(data.encode()).decode()

def decrypt_data(encrypted_data):
    """Decrypts the encrypted PII."""
    return cipher.decrypt(encrypted_data.encode()).decode()

# Example Usage
pii_data = "johndoe@example.com"
encrypted_pii = encrypt_data(pii_data)
decrypted_pii = decrypt_data(encrypted_pii)

print("Encrypted:", encrypted_pii)
print("Decrypted:", decrypted_pii)


Encrypted: gAAAAABnx4fkPxr8ZYXT8fjq2AGbBwq9p-pyaI3ixnfvn6NAmCEVL7_y8J5EQgR-r594SUutTvZvfYIH9h-ZhTfX6vTXdoA79Rp4Q2eQQbsu8cIksYpuApk=
Decrypted: johndoe@example.com


In [None]:
import pandas as pd
import mimetypes
import os

def validate_file_format(file_path, expected_format):
    """
    Validates if the file format matches the expected format.

    Args:
        file_path (str): Path to the file
        expected_format (str): Expected file format (e.g., 'csv', 'json', 'parquet')

    Returns:
        bool: True if the file is valid, False otherwise
    """
    # Check file extension
    file_extension = os.path.splitext(file_path)[1].lower()
    if file_extension != f".{expected_format}":
        print(f"Invalid file extension: {file_extension}. Expected: .{expected_format}")
        return False

    # Check MIME type to confirm file content
    mime_type, _ = mimetypes.guess_type(file_path)
    valid_mime_types = {
        "csv": "text/csv",
        "json": "application/json",
        "parquet": "application/octet-stream"  # Parquet may not always have a specific MIME
    }

    if mime_type != valid_mime_types.get(expected_format, mime_type):
        print(f"Invalid MIME type: {mime_type}. Expected: {valid_mime_types[expected_format]}")
        return False

    print(f"File format validation passed for {file_path}")
    return True

# Example usage
validate_file_format("data.csv", "csv")


In [None]:
import pandas as pd

# Define expected schema with column names and expected data types
expected_schema = {
    "id": "int64",
    "name": "object",  # "object" is Pandas' equivalent of a string
    "age": "int64",
    "email": "object"
}

def validate_schema(df, expected_schema):
    """
    Validates if the DataFrame conforms to the expected schema.

    Args:
        df (pd.DataFrame): The DataFrame to validate
        expected_schema (dict): Dictionary defining expected columns and their data types

    Returns:
        bool: True if schema matches, False otherwise
    """
    for column, expected_dtype in expected_schema.items():
        if column not in df.columns:
            print(f"Missing column: {column}")
            return False
        if df[column].dtype != expected_dtype:
            print(f"Incorrect data type for {column}. Expected: {expected_dtype}, Found: {df[column].dtype}")
            return False

    print("Schema validation passed")
    return True

# Example usage with a sample DataFrame
df = pd.DataFrame({
    "id": [1, 2],
    "name": ["Alice", "Bob"],
    "age": [30, 25],
    "email": ["alice@example.com", "bob@example.com"]
})

validate_schema(df, expected_schema)


In [None]:
import pandas as pd

def check_mandatory_fields(df, required_columns):
    """
    Checks if mandatory fields are present and non-null.

    Args:
        df (pd.DataFrame): DataFrame to check
        required_columns (list): List of required column names

    Returns:
        bool: True if no missing values, False otherwise
    """
    missing_values = df[required_columns].isnull().sum()
    if missing_values.any():
        print("Missing values found:\n", missing_values)
        return False
    print("Mandatory fields check passed")
    return True

# Example usage
df = pd.DataFrame({
    "id": [1, 2, None],
    "name": ["Alice", None, "Charlie"],
    "age": [30, 25, 29]
})

check_mandatory_fields(df, ["id", "name", "age"])


In [None]:
import psycopg2
from psycopg2 import sql

# PostgreSQL Connection Details
pg_host = ""
pg_database = ""
pg_user = ""
pg_password = ""
pg_port = "5432"  # Default PostgreSQL port

# Azure PostgreSQL requires SSL mode
conn_string = f"dbname='{pg_database}' user='{pg_user}' password='{pg_password}' host='{pg_host}' port='{pg_port}' sslmode='require'"

def test_db_connection():
    try:
        # Connect to PostgreSQL
        conn = psycopg2.connect(conn_string)
        cursor = conn.cursor()

        # Run a test query
        cursor.execute("SELECT 1;")
        result = cursor.fetchone()

        if result and result[0] == 1:
            print("✅ PostgreSQL connection to Azure is successful!")
        else:
            print("❌ Unexpected result from test query.")

        # Close connection
        cursor.close()
        conn.close()

    except Exception as e:
        print(f"❌ Database connection failed: {e}")

# Run the test
test_db_connection()
