<a href="https://colab.research.google.com/github/2003UJAN/SIH_RE-DACT/blob/main/RE_DACT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import spacy
from transformers import pipeline
import cv2
import os

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

In [None]:
def perform_ner(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

In [None]:
def redact_text(text, entities, degree):
    for entity, label in entities:
        if degree == 1 and label in ["PERSON", "ORG"]:
            text = text.replace(entity, "[REDACTED]")
        elif degree == 2 and label in ["PERSON", "ORG", "DATE"]:
            text = text.replace(entity, "[REDACTED]")
        elif degree == 3:
            text = text.replace(entity, "[REDACTED]")
    return text

In [None]:
def generate_synthetic_data(prompt, max_length=50):
    generator = pipeline("text-generation", model="gpt2")
    synthetic_data = generator(prompt, max_length=max_length, num_return_sequences=1)
    return synthetic_data[0]['generated_text']

In [None]:
def redact_image_with_haar(image_path, output_path, degree=1):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)

    for (x, y, w, h) in faces:
        if degree == 1:
            face = image[y:y+h, x+x+w]
            face = cv2.GaussianBlur(face, (99, 99), 30)
            image[y:y+h, x+x+w] = face
        elif degree == 2:
            face = image[y+y+h, x+x+w]
            face = cv2.resize(face, (w // 10, h // 10), interpolation=cv2.INTER_LINEAR)
            face = cv2.resize(face, (w, h), interpolation=cv2.INTER_NEAREST)
            image[y+y+h, x+x+w] = face
        elif degree == 3:
            image[y+y+h, x+x+w] = 0

    cv2.imwrite(output_path, image)

In [None]:
def handle_text_redaction_own_input():
    input_text = input("Enter your text: ")
    entities = perform_ner(input_text)
    degree = int(input("Enter redaction degree (1-3): "))
    redacted_text = redact_text(input_text, entities, degree)

    output_path = input("Enter output file path (with .txt extension): ")
    with open(output_path, 'w') as file:
        file.write(redacted_text)
    print(f"Redacted text saved to {output_path}")

In [None]:
def handle_image_redaction_own_input():
    image_path = input("Enter image file path: ")
    degree = int(input("Enter redaction degree (1-3): "))

    base_name, ext = os.path.splitext(image_path)
    output_path = f"{base_name}_redacted{ext}"

    redact_image_with_haar(image_path, output_path, degree)

    print(f"Redacted image saved to {output_path}")

In [None]:
def handle_text_redaction_from_dataset(dataset_dir):
    selected_file = select_input_from_dataset(dataset_dir)
    with open(selected_file, 'r') as file:
        input_text = file.read()
    entities = perform_ner(input_text)
    degree = int(input("Enter redaction degree (1-3): "))
    redacted_text = redact_text(input_text, entities, degree)

    output_path = input("Enter output file path (with .txt extension): ")
    with open(output_path, 'w') as file:
        file.write(redacted_text)
    print(f"Redacted text saved to {output_path}")

In [None]:
def handle_image_redaction_from_dataset(dataset_dir):
    selected_file = select_input_from_dataset(dataset_dir)
    degree = int(input("Enter redaction degree (1-3): "))
    output_path = input("Enter output image path (with .jpg extension): ")
    redact_image_with_haar(selected_file, output_path, degree)
    print(f"Redacted image saved to {output_path}")

In [None]:
def select_input_from_dataset(dataset_dir):
    files = os.listdir(dataset_dir)
    for i, file in enumerate(files):
        print(f"{i + 1}. {file}")
    choice = int(input("Select the file number from the dataset: ")) - 1
    return os.path.join(dataset_dir, files[choice])

In [None]:
def main():
    print("1. Redact Own Input Text")
    print("2. Redact Own Input Image")
    print("3. Redact Text from Dataset")
    print("4. Redact Image from Dataset")
    choice = int(input("Enter your choice: "))

    if choice == 1:
        handle_text_redaction_own_input()
    elif choice == 2:
        handle_image_redaction_own_input()
    elif choice == 3:
        dataset_dir = input("Enter the path to the dataset directory: ")
        handle_text_redaction_from_dataset(dataset_dir)
    elif choice == 4:
        dataset_dir = input("Enter the path to the dataset directory: ")
        handle_image_redaction_from_dataset(dataset_dir)

if __name__ == "__main__":
    main()

1. Redact Own Input Text
2. Redact Own Input Image
3. Redact Text from Dataset
4. Redact Image from Dataset
Enter your choice: 2
