In [None]:
import difflib
import re
from collections import Counter

# Function to read the privacy policies from files
def read_policy(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# Function to find key GDPR terms in the privacy policy
def find_gdpr_terms(policy_text):
    gdpr_keywords = [
        'GDPR', 'personal data', 'data protection', 'data subject',
        'consent', 'data processing', 'right to access', 'right to erasure',
        'data portability', 'privacy', 'security', 'compliance'
    ]
    
    # Count occurrences of each GDPR-related term
    terms_count = {term: len(re.findall(term, policy_text, re.IGNORECASE)) for term in gdpr_keywords}
    return terms_count

# Function to compare the privacy policies
def compare_privacy_policies(policy1, policy2):
    policy1_text = read_policy(policy1)
    policy2_text = read_policy(policy2)
    
    # Line-by-line comparison using difflib
    diff = difflib.unified_diff(policy1_text.splitlines(), policy2_text.splitlines(), lineterm='')
    differences = '\n'.join(diff)
    
    # Analyze GDPR compliance keywords
    policy1_keywords = find_gdpr_terms(policy1_text)
    policy2_keywords = find_gdpr_terms(policy2_text)

    # Summarize differences in GDPR terms
    gdpr_compliance = {
        'Policy1 GDPR Terms': policy1_keywords,
        'Policy2 GDPR Terms': policy2_keywords
    }
    
    return differences, gdpr_compliance

# Function to save comparison result to file
def save_comparison_report(differences, gdpr_compliance, output_file="comparison_report.txt"):
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write("Privacy Policy Comparison Report\n\n")
        file.write("Line-by-Line Differences:\n")
        file.write(differences)
        file.write("\n\nGDPR Compliance Summary:\n")
        
        for policy, terms in gdpr_compliance.items():
            file.write(f"\n{policy}:\n")
            for term, count in terms.items():
                file.write(f"  - {term}: {count} occurrences\n")

# Example usage
policy1_path = "policy1.txt"
policy2_path = "policy2.txt"
differences, gdpr_compliance = compare_privacy_policies(policy1_path, policy2_path)
save_comparison_report(differences, gdpr_compliance)

print("Comparison report has been generated.")