This project involves in detecting dates ,names and phone numbers then blurring them 

In [1]:
import cv2
import pytesseract
import spacy
from spacy.matcher import PhraseMatcher

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define sensitive phrases
sensitive_phrases = ["name", "phone number", "birthday"]

# Initialize phrase matcher
matcher = PhraseMatcher(nlp.vocab)
patterns = [nlp(text) for text in sensitive_phrases]
matcher.add("SensitiveInfo", None, *patterns)

def detect_sensitive_info(text):
    doc = nlp(text)
    matches = matcher(doc)
    return any(matches)

def blur_text(image, bbox):
    x, y, w, h = bbox
    blurred = cv2.GaussianBlur(image[y:y+h, x:x+w], (51, 51), 0)
    image[y:y+h, x:x+w] = blurred
    return image

def process_frame(frame):
    # Convert frame to grayscale for better OCR
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Use Tesseract for text detection
    text = pytesseract.image_to_string(gray)
    
    # Check if detected text contains sensitive information
    if detect_sensitive_info(text):
        # Blur text box
        return blur_text(frame, (0, 0, frame.shape[1], frame.shape[0]))
    else:
        return frame

def process_video(input_video_path, output_video_path):
    cap = cv2.VideoCapture(input_video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'XVID'), fps, (frame_width, frame_height))
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        processed_frame = process_frame(frame)
        out.write(processed_frame)
    
    cap.release()
    out.release()
    cv2.destroyAllWindows()

# Example usage
input_video_path = r"C:\Users\Elhamd Pc\Videos\Captures\Photos 2024-05-15 14-24-33.mp4"
output_video_path = r"C:\Users\Elhamd Pc\Videos\Captures\the_output.mp4"
process_video(input_video_path, output_video_path)
