In [6]:
from owlready2 import *

onto = get_ontology("http://example.org/email_url.owl")

with onto:
    # Classes
    class Email(Thing): pass
    class URL(Thing): pass
    class SSLStatus(Thing): pass
    class URLStructure(Thing): pass
    class RiskLevel(Thing): pass
    class SuspiciousWord(Thing): pass

    # Object Properties
    class hasURL(Email >> URL): pass
    class hasSSLStatus(URL >> SSLStatus): pass
    class hasStructure(URL >> URLStructure): pass
    class hasRiskLevel(URL >> RiskLevel): pass
    class emailRisk(Email >> RiskLevel): pass
    class containsWord(Email >> SuspiciousWord): pass

    # Data Properties
    class hasLength(URL >> int, DataProperty): pass
    class hasPhishingScore(URL >> float, DataProperty): pass
    class hasSubject(Email >> str, DataProperty): pass
    class hasBody(Email >> str, DataProperty): pass
    class isPhishing(Email >> bool, DataProperty): pass

    # Instances
    ssl_no = SSLStatus("ssl_no")
    struct_ip = URLStructure("struct_ip")
    risk_high = RiskLevel("high")
    suspicious_keywords = ["bank", "urgent", "prize", "password", "click", "verify"]

    for word in suspicious_keywords:
        SuspiciousWord(word)

    # Example email with suspicious words
    email1 = Email("email_phishing")
    
    # Correct way to assign values to data properties
    email1.hasSubject.append("Urgent: Verify your bank account")
    email1.hasBody.append("Click here to verify your password and win a prize!")

    # Add related suspicious words (manual matching for now)
    email1.containsWord.append(onto.bank)
    email1.containsWord.append(onto.urgent)
    email1.containsWord.append(onto.password)
    email1.containsWord.append(onto.prize)

    # Example phishing URL
    url1 = URL("phishy_url")
    
    # Correct way to assign values to data properties
    url1.hasSSLStatus.append(ssl_no)  # Append SSL status
    url1.hasStructure.append(struct_ip)  # Append structure
    url1.hasLength.append(100)  # Append length as list
    url1.hasPhishingScore.append(0.95)  # Append phishing score
    url1.hasRiskLevel.append(risk_high)  # Append risk level

    # Link URL to email
    email1.hasURL.append(url1)
    email1.emailRisk.append(risk_high)  # Append risk level
    email1.isPhishing.append(True)  # Append boolean value

onto.save(file="email_url_combined.owl", format="rdfxml")
print("✅ Enhanced ontology with text + URL saved.")


✅ Enhanced ontology with text + URL saved.


In [8]:
# Query to find emails with high phishing risk
for email in onto.Email.instances():
    # Check if the email is marked as phishing
    if email.isPhishing:
        print(f"Phishing email: {email}")
        # Access the 'hasSubject' property as a list
        for subject in email.hasSubject:
            print(f"Subject: {subject}")


Phishing email: email_url.email_phishing
Subject: Urgent: Verify your bank account
Subject: Urgent: Verify your bank account


In [9]:
class FinancialTerm(SuspiciousWord):
    pass
class UrgentTerm(SuspiciousWord):
    pass

# Create instances
bank_term = FinancialTerm("bank")
urgent_term = UrgentTerm("urgent")


In [10]:
from owlready2 import get_ontology, sync_reasoner

onto = get_ontology("file://email_url_combined.owl").load()

# Apply reasoning to infer additional facts
sync_reasoner()


* Owlready2 * Running HermiT...
    java -Xmx2000M -cp C:\Users\admin\AppData\Roaming\Python\Python311\site-packages\owlready2\hermit;C:\Users\admin\AppData\Roaming\Python\Python311\site-packages\owlready2\hermit\HermiT.jar org.semanticweb.HermiT.cli.CommandLine -c -O -D -I file:///C:/Users/admin/AppData/Local/Temp/tmpn3jc62q4
* Owlready2 * HermiT took 2.3249847888946533 seconds
* Owlready * Reparenting email_url.urgent: {email_url.SuspiciousWord, email_url.UrgentTerm} => {email_url.UrgentTerm}
* Owlready * Reparenting email_url.bank: {email_url.SuspiciousWord, email_url.FinancialTerm} => {email_url.FinancialTerm}
* Owlready * (NB: only changes on entities loaded in Python are shown, other changes are done but not listed)


In [11]:
onto.save(file="email_url_combined.owl", format="rdfxml")  # Save in RDF/XML format


In [14]:
from owlready2 import *

# Load the ontology
onto = get_ontology("email_url_combined.owl").load()

# Test the instances of Email and URL classes

# Query for emails marked as phishing
for email in onto.Email.instances():
    if email.isPhishing:
        print(f"Phishing email: {email}")
        
        # Retrieve subject and body of the email
        for subject in set(email.hasSubject):  # Using set to avoid duplicates
            print(f"Subject: {subject}")
        for body in set(email.hasBody):  # Using set to avoid duplicates
            print(f"Body: {body}")
        
        # List suspicious words without duplicates
        print("Suspicious words in the email:")
        for word in set(email.containsWord):  # Using set to avoid duplicates
            print(f"- {word}")
        
        # Check associated URL(s) and risk levels
        for url in email.hasURL:
            print(f"Phishing URL: {url}")
            for ssl_status in set(url.hasSSLStatus):  # Using set to avoid duplicates
                print(f"SSL Status: {ssl_status}")
            for structure in set(url.hasStructure):  # Using set to avoid duplicates
                print(f"URL Structure: {structure}")
            for length in set(url.hasLength):  # Using set to avoid duplicates
                print(f"URL Length: {length}")
            for score in set(url.hasPhishingScore):  # Using set to avoid duplicates
                print(f"Phishing Score: {score}")
            for risk in set(url.hasRiskLevel):  # Using set to avoid duplicates
                print(f"Risk Level: {risk}")
        
        # Risk level of the email itself
        for risk in set(email.emailRisk):  # Using set to avoid duplicates
            print(f"Email Risk Level: {risk}")
        
        # Check if the email is flagged as phishing
        print(f"Is this email phishing? {email.isPhishing[0]}")

# Testing for URLs
for url in onto.URL.instances():
    print(f"URL: {url}")
    for ssl_status in set(url.hasSSLStatus):  # Using set to avoid duplicates
        print(f"SSL Status: {ssl_status}")
    for structure in set(url.hasStructure):  # Using set to avoid duplicates
        print(f"URL Structure: {structure}")
    for length in set(url.hasLength):  # Using set to avoid duplicates
        print(f"URL Length: {length}")
    for score in set(url.hasPhishingScore):  # Using set to avoid duplicates
        print(f"Phishing Score: {score}")
    for risk in set(url.hasRiskLevel):  # Using set to avoid duplicates
        print(f"Risk Level: {risk}")


Phishing email: email_url.email_phishing
Subject: Urgent: Verify your bank account
Body: Click here to verify your password and win a prize!
Suspicious words in the email:
- email_url.bank
- email_url.prize
- email_url.urgent
- email_url.password
Phishing URL: email_url.phishy_url
SSL Status: email_url.ssl_no
URL Structure: email_url.struct_ip
URL Length: 100
Phishing Score: 0.95
Risk Level: email_url.high
Email Risk Level: email_url.high
Is this email phishing? True
URL: email_url.phishy_url
SSL Status: email_url.ssl_no
URL Structure: email_url.struct_ip
URL Length: 100
Phishing Score: 0.95
Risk Level: email_url.high


In [19]:
import random
from owlready2 import *
import string

# Load the ontology
onto = get_ontology("email_url_combined.owl").load()

# Sample lists for random data
subjects = ["Urgent: Verify your bank account", "Limited offer: Claim your prize now", 
            "Important: Update your payment info", "Action required: Confirm your identity", 
            "Congratulations! You've won a free gift"]
bodies = ["Click here to verify your password and win a prize!", "Claim your free gift by clicking here.",
          "Update your bank account details immediately.", "Your account has been compromised, click here.",
          "Please confirm your identity to secure your account."]
suspicious_words = ["bank", "urgent", "prize", "password", "click", "verify", "gift", "claim", "update", "account"]
ssl_statuses = ["ssl_yes", "ssl_no"]
structures = ["struct_ip", "struct_https", "struct_domain"]
risk_levels = ["high", "medium", "low"]
phishing_scores = [0.95, 0.85, 0.75, 0.65, 0.99]
url_lengths = [50, 60, 80, 100, 120]

# Function to generate a random string (for URL, etc.)
def random_string(length=8):
    return ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))

# Generate random email instance
for i in range(5):  # Create 5 random emails
    email = Email(f"email_{random_string()}")
    
    # Random subject and body
    email.hasSubject.append(random.choice(subjects))
    email.hasBody.append(random.choice(bodies))
    
    # Add some suspicious words randomly (create instances of SuspiciousWord)
    for _ in range(random.randint(1, 3)):  # Random number of suspicious words
        suspicious_word = random.choice(suspicious_words)
        suspicious_word_instance = SuspiciousWord(suspicious_word)  # Create an instance
        email.containsWord.append(suspicious_word_instance)
    
    # Random phishing detection
    email.isPhishing.append(random.choice([True, False]))
    
    # Generate random URL linked to the email
    url = URL(f"url_{random_string()}")
    
    # Assign random values to the URL properties
    ssl_status_instance = SSLStatus(random.choice(ssl_statuses))  # Create instance for SSLStatus
    url.hasSSLStatus.append(ssl_status_instance)
    
    structure_instance = URLStructure(random.choice(structures))  # Create instance for URLStructure
    url.hasStructure.append(structure_instance)
    
    url.hasLength.append(random.choice(url_lengths))  # Length is a data property
    url.hasPhishingScore.append(random.choice(phishing_scores))  # Phishing score is a data property
    
    risk_level_instance = RiskLevel(random.choice(risk_levels))  # Create instance for RiskLevel
    url.hasRiskLevel.append(risk_level_instance)
    
    # Link the URL to the email
    email.hasURL.append(url)
    
    # Assign a random risk level to the email itself (create an instance of RiskLevel)
    email_risk_instance = RiskLevel(random.choice(risk_levels))  # Create instance for email risk
    email.emailRisk.append(email_risk_instance)

# You can now interact with the ontology in memory, and make queries or tests.
# No need to save the file if you're just testing in the current session.
print("✅ Random data added to ontology in memory.")


✅ Random data added to ontology in memory.


In [22]:
import csv

# Prepare the combined data for export
with open('combined_email_url_data.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Write the header row with both Email and URL fields
    writer.writerow([
        'Email Subject', 'Email Body', 'Is Phishing', 'Email Risk Level', 'URL SSL Status', 
        'URL Structure', 'URL Length', 'URL Phishing Score', 'URL Risk Level'
    ])
    
    # Loop through all emails and associated URLs
    for email in onto.Email.instances():
        # Safely retrieve email properties
        email_subject = email.hasSubject[0] if email.hasSubject else "N/A"
        email_body = email.hasBody[0] if email.hasBody else "N/A"
        is_phishing = email.isPhishing[0] if email.isPhishing else "N/A"
        email_risk_level = email.emailRisk[0] if email.emailRisk else "N/A"
        
        # Loop through all URLs linked to the email
        for url in email.hasURL:
            # Safely retrieve URL properties
            ssl_status = url.hasSSLStatus[0] if url.hasSSLStatus else "N/A"
            url_structure = url.hasStructure[0] if url.hasStructure else "N/A"
            url_length = url.hasLength[0] if url.hasLength else "N/A"
            phishing_score = url.hasPhishingScore[0] if url.hasPhishingScore else "N/A"
            url_risk_level = url.hasRiskLevel[0] if url.hasRiskLevel else "N/A"
            
            # Write the combined data to the CSV file
            writer.writerow([
                email_subject, email_body, is_phishing, email_risk_level,
                ssl_status, url_structure, url_length, phishing_score, url_risk_level
            ])

print("Combined data exported successfully to combined_email_url_data.csv")


Combined data exported successfully to combined_email_url_data.csv
