In [1]:
import os

In [2]:
os.chdir("..")

In [3]:
from phishing_domain_detection.entity.feature_extractor import FeatureExtractor

In [4]:
from phishing_domain_detection.logger import logging
from phishing_domain_detection.exception import Phishing_Exception

In [5]:
from phishing_domain_detection.util.util import load_object

In [10]:
import os,sys

In [7]:
extractor = FeatureExtractor()

In [12]:
model_directory = "saved_models"

In [9]:
max(os.listdir(model_directory)) ## getting the latest model in production

'20221002005943'

In [10]:
from typing import List

In [11]:
class PhishingEstimator:
    """class responsible formaking final predictions
    """
    def __init__(self, model_dir: str):
        try:
            self.model_dir = model_dir
        except Exception as e:
            raise Phishing_Exception(e,sys)
        
    def get_latest_model_path(self):
        try:
            folder_names = list(map(int, os.listdir(self.model_dir)))
            latest_model_dir = os.path.join(self.model_dir, f"{max(folder_names)}")
            file_name = os.listdir(latest_model_dir)[0]
            latest_model_path = os.path.join(latest_model_dir, file_name)
            return latest_model_path
        except Exception as e:
            raise Phishing_Exception(e,sys) from e
        
    def predict(self,url_list:List[str]):
        try:
            model_in_production_path = self.get_latest_model_path() ## The latest model is the model in production
            extractor = FeatureExtractor()
            features_from_url_df = extractor.generate_dataframe_from_urls(url_list)
            model = load_object(model_in_production_path)
            
            return model.predict(features_from_url_df)
            
        except Exception as e:
            raise Phishing_Exception(e,sys) from e
        
    def predict_proba(self,url_list:List[str]):
        try:
            model_in_production_path = self.get_latest_model_path() ## The latest model is the model in production
            extractor = FeatureExtractor()
            features_from_url_df = extractor.generate_dataframe_from_urls(url_list)
            model = load_object(model_in_production_path)
            
            return model.predict_proba(features_from_url_df)
            
        except Exception as e:
            raise Phishing_Exception(e,sys) from e
        

In [12]:
pe = PhishingEstimator(model_directory)

In [8]:
urls = ["https://bloqueioltau.site/Sublime_Sublime_1084356737/Risos_Mae_560734658/#&token=1810416138","https://www.geeksforgeeks.com/","https://ip-72-182-82-263.ip.gw-designs.de/index2.php"]

In [14]:
pe.predict(urls)

Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed


array([1, 0, 1])

In [15]:
pe.predict_proba(urls)

Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed


array([[0.02517857, 0.97482143],
       [0.87354167, 0.12645833],
       [0.13959307, 0.86040693]])

# Checking the component

In [75]:
from phishing_domain_detection.entity.phishing_estimator import PhishingEstimator

In [76]:
pe = PhishingEstimator(model_directory)

In [77]:
pe.predict(urls)

array([0, 1, 1])

# Building endpoints

True

## prediction via api endpoint

In [14]:
import json

In [26]:
model_directory = "saved_models"

In [18]:
json.dumps({
    "response":"No model in prodiction"
})

'{"response": "No model in prodiction"}'

In [20]:
os.path.exists(model_directory)

False

In [21]:
urls = "https://github.com/"

In [25]:
type(urls) == str

True

In [78]:
pe = PhishingEstimator(model_directory)

In [79]:
urls = ['https://www.google.com', "https://mining79.co/#/","https://www.lemonhunt.com/"]

In [80]:
res = pe.predict(urls)

In [81]:
res = list(res)

In [82]:
res

[0, 1, 1]

In [83]:
for index,result in enumerate(res):
    if result == 0:
        res[index] = "Not Phishing"
    else:
        res[index] = "Phishing"

In [84]:
res

['Not Phishing', 'Phishing', 'Phishing']

In [85]:
pe.predict_proba(urls)

array([[0.98461111, 0.01538889],
       [0.46911389, 0.53088611],
       [0.64977598, 0.35022402]])

In [86]:
dict(zip(urls, res))

{'https://www.google.com': 'Not Phishing',
 'https://mining79.co/#/': 'Phishing',
 'https://www.lemonhunt.com/': 'Phishing'}

In [69]:
a = "sas"

In [70]:
a = [a]

In [71]:
a

['sas']

In [72]:
z = "jdahdla "

In [74]:
z.strip()

'jdahdla'

In [87]:
res = dict(zip(urls, res))

In [93]:
for i in res.items():
    print(i[1])

Not Phishing
Phishing
Phishing


In [90]:
res

{'https://www.google.com': 'Not Phishing',
 'https://mining79.co/#/': 'Phishing',
 'https://www.lemonhunt.com/': 'Phishing'}