# Domain Based Features:
Many features can be extracted that come under this category. Out of them, below mentioned were considered for this project.

1. DNS Record
2. Website Traffic
3. Age of Domain
4. End Period of Domain

In [80]:
import re
import pandas as pd
import time
import whois
import urllib
import urllib.request
from urllib.parse import urlparse,urlencode
from datetime import datetime

In [81]:
# 1.DNS Record availability (DNS_Record)
def check_DNS(url):
    dns = 1
    try:
        domain_name = whois.whois(urlparse(url).netloc)
    except:
        dns = 0
    return dns
# 0->No DNS record found
# 1->DNS record found

In [82]:
# 2.Web traffic (Web_Traffic)
# Written on separate notebook due to complexity

In [83]:
# 3.Survival time of domain: The difference between termination time and creation time (Domain_Age)  
def domainAge(domain_name):
    try:
        w = whois.whois(domain_name)
    except:
        return 0
    creation_date = w.creation_date
    expiration_date = w.expiration_date
    if(isinstance(creation_date, list)):
        cd = creation_date[0]
    else:
        cd = creation_date
    if(isinstance(expiration_date, list)):
        ed = expiration_date[0]
    else:
        ed = expiration_date
    if not creation_date:
        return 0
    if not expiration_date:
        return 0
    today = datetime.now()
    if today>ed:
        age = ed - cd
    else:
        age = today - cd
    return age.days
# 0->could not retrieve dates
# Natural number->age of domain

In [84]:
# 4.End time of domain: Checks if domain is active or inactive 
def domainEnd(domain_name):
    try:
        w = whois.whois(domain_name)
    except:
        return 2
    expiration_date = w.expiration_date
    if(isinstance(expiration_date, list)):
        ed = expiration_date[len(expiration_date)-1]
    else:
        ed = expiration_date
    if not expiration_date:
        return 2
    today = datetime.now()
    try:
        if today>ed:
            return 0
        else:
            return 1
    except TypeError:
        end = datetime.strptime(ed[0:19], '%Y-%m-%d %H:%M:%S')
        if today>end:
            return 0
        else:
            return 1
# 0->deactivated domain
# 1->active domain
# 2->No expiration date found

In [85]:
#Function to extract features
def featureExtraction(url,label):
    features = []
    dns = check_DNS(url)
    domain_name = urlparse(url).netloc
    features.append(dns)
    print("Check 10 of 17 complete.", end="\r")
    features.append(1 if dns == 0 else domainAge(domain_name))
    print("Check 12 of 17 complete.", end="\r")
    features.append(1 if dns == 0 else domainEnd(domain_name))
    print("Check 13 of 17 complete.", end="\r")
    return features

In [86]:
legi_features = []
label = 0
df_phish = pd.read_csv(r'data\selected_phishing_urls.csv')
for i in range(3649, 5000):
    url = df_phish['URLs'][i]
    print("Extracting features for url number "+str(i)+" : "+url)
    legi_features.append(featureExtraction(url,label))

Extracting features for url number 3649 : http://www.ziboleq.com/app.onesignal.com/login.html
Extracting features for url number 3650 : http://klanten-registrate.info
Extracting features for url number 3651 : http://cafecitoperez.ivanidzakovic.com/admin/aociufd/goodl.vci/3bd9c5e87c04b0e36a8100de63884c84/view/login.php?cmd=login_submit&amp;id=0875d6431c5e1e9a3366a4d046d05c320875d6431c5e1e9a3366a4d046d05c32&amp;session=0875d6431c5e1e9a3366a4d046d05c320875d6431c5e1e9a3366a4d046d05c32
Extracting features for url number 3652 : https://storage.googleapis.com/makwvdhbjebdckisncacns9.appspot.com/rhevjndc/HKVAJBKSCNASLK.html
Extracting features for url number 3653 : http://lpryu.gov.la/wp-content/plugins/ubh/Home/signin/0009N12/
Extracting features for url number 3654 : https://the100prosystems.com/wp-admin/css/colors/sunrise/Update/signin/
Extracting features for url number 3655 : https://attloginnewupdatingservicemailcommunication.weebly.com/
Extracting features for url number 3656 : https://

Error trying to connect to socket: closing socket - timed out
Extracting features for url number 3708 : http://ijikc.co.in/sites/ijikc20/rating/ken7xx7y0p9/wemail_al.html/error.php?L=
Extracting features for url number 3709 : http://ziboleq.com/app.onesignal.com/login.html
Extracting features for url number 3710 : https://csgomagic-win.com/lrrh32ga29ygf6epe49rsdm3wo5zfc35
Extracting features for url number 3711 : http://p3plvcpnl353719.prod.phx3.secureserver.net/~ss7hbp8n23fy/Montepio/www.montepio.pt/index2.php
Extracting features for url number 3712 : https://www.itunes.losua.cn/mim/5t42p54812pt1w0694314d1q13w658waxclz28x6sw6y53ra41.html
Extracting features for url number 3713 : https://moodle.ncirl.ie/login/index.php
Extracting features for url number 3714 : https://millionsacademy.com/as
Extracting features for url number 3715 : https://wlrembroidery.com/go6/?ID=HdcHctN61Kqm3pxiUIW2DhNCe2by7j5BOCRDpr8s+tq+bl3XOjC+3k4Fd0phB_LJ0a3Vls0VqXdTnWilWoFUfK3Tcm25f
Extracting features for url 

Extracting features for url number 3774 : https://team.cmgcharleston.com/wp-content/upgrade/roujett/Some/customer_center/xBanana-MotherFucker441/myaccount/signin/
Extracting features for url number 3775 : https://www.thelogodept.com/wp-admin/includes/acess2/wells/
Extracting features for url number 3776 : https://www.hitekscaffolding.co.uk/0pen/oneddrive
Extracting features for url number 3777 : http://cheaproomsvalencia.com/Paypal/Support/ID-NUMB293/myaccount/signin/
Extracting features for url number 3778 : https://www.pompanogroup.com/banks/directing/desjardins/identifiantunique/indentification.php
Extracting features for url number 3779 : https://www.whatsapps-invites.zzux.com/
Extracting features for url number 3780 : http://meljackson.net/adobe/pdf.php?l=_jehfuq_vjoxk0qwhtogydw_payment-ttcopy&amp;userid=
Extracting features for url number 3781 : http://c1511482.ferozo.com/LOGIN/mabanquepostale/id/99fd6
Extracting features for url number 3782 : https://acct-piedmontcancerinstitute

Extracting features for url number 3839 : https://theaquaticmall.com/theaquaticmall_new/wp-content/plugins/revslider/Amile-A80050X001/
Extracting features for url number 3840 : https://gabesconstruction-my.sharepoint.com/:o:/g/personal/dmoncho_gabes_com/EmafwYaDEM9Ej2yFo9mmEnIBqVPjWb1EDKqLQhlLV7lEig?e=kFkEFF
Extracting features for url number 3841 : http://lagelijundiai.com.br/pagamento/cadastro.php
Extracting features for url number 3842 : http://cheaproomsvalencia.com/Paypal/Support/ID-NUMB376/myaccount/signin/
Extracting features for url number 3843 : http://staffandfaculty.my-free.website
Extracting features for url number 3844 : http://k99915xt.beget.tech/
Extracting features for url number 3845 : http://oralco.co/wp-admin/images/n/WE-TRANSFER/login.php?l=_JeHFUq_VJOXK0QWHtoGYDw1774256418
Extracting features for url number 3846 : http://fb-recovery-10000076926-it.tk/update_security.htm
Extracting features for url number 3847 : https://bookedandboarding.com/.tin/At&amp;t/index2.htm

Extracting features for url number 3897 : https://services.runescape.com-as.ru
Extracting features for url number 3898 : https://wirereach53.com/WED2020DC/www.file.com/d/view_document/
Extracting features for url number 3899 : https://catholichub.tv/aut/centurylink/3e87a
Extracting features for url number 3900 : https://cyber-punk2077.com/wp-admin/NetBanking/Login.htm
Extracting features for url number 3901 : https://csgofreexskins.fun/
Extracting features for url number 3902 : http://www.amazon.de.signin.verification.openid.5935156.viekofinews.cf/274879/germ/konto/p4kq3CnFTIL8OuhKGvYErfscUxV52NlAj/kunden256222/iTp5mhB9uqDaIn8oEYR2xlrFNXwSQ4AjJ/logmein.php
Extracting features for url number 3903 : http://mail-generali.com
Extracting features for url number 3904 : http://rollarcecilee.com/css/
Extracting features for url number 3905 : http://angelalindvall.com/wp-includes/SimplePie/Net/wp_redirect/c_popup.php/npvpu/wmxbp/?wait=r1g1ghr1u0dm0
Extracting features for url number 3906 : http

Extracting features for url number 3953 : https://livestudentccc-my.sharepoint.com/personal/rparry_student_ccc_edu/_layouts/15/guestaccess.aspx?guestaccesstoken=jaVdU5gwfh1XiTlwN%2fBfbrE8IBuejwo02it2q1uEKQs%3d&docid=1_1f09b2d1b1add4b17a7c945658ff6856a&wdFormId=%7B520F1FB4%2DC5EC%2D4F24%2DB378%2D2FF59E5F9DD7%7D
Extracting features for url number 3954 : http://bbsignage.com/images/brand/page
Extracting features for url number 3955 : http://www.netcabo.co
Extracting features for url number 3956 : https://my-wallet-platinum.su
Extracting features for url number 3957 : http://anthonytalks.com/home.html
Extracting features for url number 3958 : https://workshopstl.com/www.elite-transports.com_Invoice29034_Open_OneDrivePortal/updated_drive_shared_securely_online%20-%20Copy/
Extracting features for url number 3959 : https://netregistry.com.au.login.netregistryprivacylook.com/?gets=true&query0=2116&query1=8276&query2=6306&query3=1270&query4=9339&query5=2876&query6=7925&query7=9260&query8=6345&q

Extracting features for url number 4014 : https://martazancanaro.com/reca/No_cap/FBG/
Extracting features for url number 4015 : https://www.venue-hire.net.au/secure/ok/index.php
Extracting features for url number 4016 : https://forms.gle/uGjm1KN5rRTUdArS6
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Extracting features for url number 4017 : http://epg.unas.edu.pe/sites/all/libraries/nivo-slider/themes/bar/netflix.com.br
Extracting features for url number 4018 : https://storage.googleapis.com/staging.y6t5rfchghui7t86tuku79.appspot.com/f%20ry%206gb-igo/fr5uyfg#EMAIL_REDACTED
Extracting features for url number 4019 : https://fareast.qa/wp/onedrive/b94194dfc4e4db2c2a7f23f56998eb38/
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to co

Extracting features for url number 4076 : https://waytoketo.com/wp-content/themes/New%20Inquiry/login.php?cmd=login_submit&amp;id=12084d797fcdda259a395afffcbe2df512084d797fcdda259a395afffcbe2df5&amp;session=12084d797fcdda259a395afffcbe2df512084d797fcdda259a395afffcbe2df5
Error trying to connect to socket: closing socket - timed out
Extracting features for url number 4077 : http://ramgarhiamatrimonial.ca/phpmailer/inc-113-local/idex.php
Extracting features for url number 4078 : https://tiscali-rimborso.it/rimb/
Extracting features for url number 4079 : http://zbyk.drl.pl/google
Extracting features for url number 4080 : http://loginlogon.com/gmail-login-history/
Extracting features for url number 4081 : http://www.villasakuraubud.com/wp-content/plugins/arsip/admin/2b6d65b9a9445c4271ab9076ead5605a/signin.php
Error trying to connect to socket: closing socket - timed out
Extracting features for url number 4082 : https://lonosoneteams.blogspot.com/?id=info@studiobelleville.com
Extracting fea

Extracting features for url number 4135 : https://thegioisport.com/index/verification.php?sf58gfd1s689sxd2sdf8angf264s9df23sd2f1n495K3L2C151645172991f1477dbd26917ef3822423f62e984a91f1477dbd26917ef3822423f62e984a91f1477dbd
Extracting features for url number 4136 : http://www.envirotechcorp.com/bmline/Bmo/bmo.com/onlinebanking/cgi-bin/netbnx
Extracting features for url number 4137 : http://mdmplus.com.ph/uphty/
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Extracting features for url number 4138 : http://web-whatsappjoin.itsaol.com
Extracting features for url number 4139 : https://cartoes-caixa.com/cartoes/home/
Extracting features for url number 4140 : https://cnsenxiang.com/myBOA/en/challenge.php
Extracting features for url number 4141 : http://oralco.co/wp-admin/images/n/WE-TRANS

Extracting features for url number 4201 : http://eset-store.gr/dropboxbusiness/login.php
Extracting features for url number 4202 : https://wvk12-my.sharepoint.com/personal/rdean_k12_wv_us/_layouts/15/WopiFrame.aspx?guestaccesstoken=TEAE9E6KsiFgHRzFXGArVhO3qtsxKAe9zcFzfpFyb%2fM%3d&docid=1_17d87b45a88e74a27b6a87732f7b227a5&wdFormId=%7BC7D50609%2D17F5%2D441E%2DB328%2D0A974A4D585E%7D&action=formsubmit
Extracting features for url number 4203 : https://www.citigroupsmy.com/en/
Extracting features for url number 4204 : https://onedrivestoragedatayu.dns-cloud.net/Home/login1.php
Extracting features for url number 4205 : http://environaquatech.com/images/slider/boa/login.php?cmd=login_submit&amp;id=911ed3186255dfd81b4c820443012376911ed3186255dfd81b4c820443012376&amp;session=911ed3186255dfd81b4c820443012376911ed3186255dfd81b4c820443012376
Extracting features for url number 4206 : http://storage.googleapis.com/syswells/wells.html
Extracting features for url number 4207 : http://advonationusa.com/

Extracting features for url number 4265 : http://hamt.jp/program/wp-content/themes/twentyfourteen/home/latampass/cadastro/?cli=cliente&amp;/1vo8yxwj8t/wbqygpbmsv.php
Extracting features for url number 4266 : https://4ktvmagazineluiza.tftpd.net/4k-tvlg/LG-60/smart-tv4k-led60-60um7270psa-wifihdrinteligencia-artificial-controle-smart-magic.php?ass=uNsTw-o0GQ2OBs-zKv#$bEKGAS!RWbi$Tb-Y#p*obhZgi3N-TaG43pk%euswSpRgZ
Extracting features for url number 4267 : http://sanjoaquinvalleybrewfest.com/backup/wp-content/plugins/dude/configuration/themes/mak/index.php?email=contact@ironscales.com&amp;id=432526cfdsd6567656dgvdhytdfbhjgff4536365353
Extracting features for url number 4268 : http://livecalgary.com/smbc.co.jp/jp/jp/smbc.co.jp
Extracting features for url number 4269 : http://62.234.180.243/
Extracting features for url number 4270 : http://engeim.com.br/js/acesso/digital/
Extracting features for url number 4271 : http://autorizador5.com.br/landpage/img/icons/verifie/paypal.fr/ca7b2b3c52a7510f9

Extracting features for url number 4334 : https://valleyobgynassociates.com/wp-content/page5?a=settings
Extracting features for url number 4335 : http://interestingfurniture.com/img/create/
Extracting features for url number 4336 : http://trecable.ga/?r=SG-LP8-GT
Extracting features for url number 4337 : http://www.franktechnik.com/13DCAE46FBCDDC29B0A839E91820F07A
Extracting features for url number 4338 : http://vmorefraud.com/neaca/newyear/xb/304ca96ecdb5=
Extracting features for url number 4339 : http://denser-recognition.000webhostapp.com/BOAFULL3/1/update.php
Extracting features for url number 4340 : http://resgatemobilebb.com
Extracting features for url number 4341 : http://gamingakhada.com/a/chase/surf3.php
Extracting features for url number 4342 : http://sparkpassedecom.com/telecome/5d99f64cc2b319a7/
Extracting features for url number 4343 : http://www.decifra.com.br/at&t-mail/ATT/f3dcaa43248bc2428ff08179d016905e/
Extracting features for url number 4344 : http://03418f6.netsolho

Extracting features for url number 4392 : http://denser-recognition.000webhostapp.com/BOAFULL3/1
Extracting features for url number 4393 : https://meganmacylesolutions.com/secure/login.onlinebanking.suntrust.com/online.htm
Extracting features for url number 4394 : https://ubrewco.com/Fuckboy/office365/lixm4e8362szg7b91f5rcpoywj0udhvtankqxi2c87jowqn1uyp05fz3h6k4dltameg9rsvbmdcwib25z0ave73j4n1polk8fy6xsuhq9grt?data=bWVAeW91ci5tb20=
Extracting features for url number 4395 : http://senirolifestyle.com/link/
Extracting features for url number 4396 : http://8leap.com/invoice/doccssiigggnn.scrs/doccssiigggnn.scrs/okdocusign
Extracting features for url number 4397 : https://myadminatt.weebly.com/
Extracting features for url number 4398 : http://itir4.app.link/L0dNQhpxy4
Extracting features for url number 4399 : https://litelink.at/600paratodos
Extracting features for url number 4400 : http://betasus20.blogspot.com
Extracting features for url number 4401 : https://vppartners2-my.sharepoint.com/

Extracting features for url number 4455 : https://app.help-recovery-pages.my.id/?/Confirmationpage
Extracting features for url number 4456 : http://www.tptelecom.it/public/one.stting/inc-style/grece.paypai-id.pro428/myaccount/signin/
Extracting features for url number 4457 : http://forgesmithvr.com/ourtime/
Extracting features for url number 4458 : https://play.google.com/store/apps/details?id=com.itau
Extracting features for url number 4459 : https://sitioprocrediteng.azurewebsites.net/
Extracting features for url number 4460 : https://www.frigerio-corazza.com/.well-known/NEDBRANCH/NedbankMoney.htm
Extracting features for url number 4461 : https://app.box.com/s/bqjrkxs7pfyfcf10sqcrn9gwah0p1d7k
Extracting features for url number 4462 : https://faithpays.sowetoinnovations.co.za//KhRo/NedMoney~PROFILE~PIN~PASSWORD.htm
Extracting features for url number 4463 : http://rajwebtechnology.com/adfs/
Extracting features for url number 4464 : https://dzd.rksmb.org/conf07/--/https:/www2.netflix.co

Extracting features for url number 4523 : http://myauctionclock.com/filelist.xml
Extracting features for url number 4524 : http://3659nnn.cc/
Extracting features for url number 4525 : http://itemboxpubgm.my.id/
Extracting features for url number 4526 : http://fx23032022fx1apps87-1310418193.cos.eu-frankfurt.myqcloud.com/edocs.html
Extracting features for url number 4527 : http://somcoms.com/win
Extracting features for url number 4528 : http://schwab.bynder.com/default/redirecttoken/4a0b42b0-f4f4-4d7a-9f4a81117c456c5f
Extracting features for url number 4529 : http://61.tkservers.com/webmail/
Extracting features for url number 4530 : http://599227.selcdn.ru/ytss-bwsw-wxvd/index6.html?email=
Extracting features for url number 4531 : http://595221.selcdn.ru/crush-super-share/index4.html?email=
Extracting features for url number 4532 : http://595221.selcdn.ru/crush-super-share/index2.html?email=
Extracting features for url number 4533 : http://595221.selcdn.ru/crush-super-share/index9.html?e

Extracting features for url number 4595 : https://www--wellsfargo--com--m449329d48d6c.wsipv6.com/
Extracting features for url number 4596 : https://zabahr.com/Poslovni/Prijava/ZABAprijava_HR.php
Extracting features for url number 4597 : https://www--wellsfargo--com--1r49329d48d6c.wsipv6.com/
Extracting features for url number 4598 : https://www.36596666.com:8989/
Extracting features for url number 4599 : http://three-retail-live.devicetradein.co.uk/
Extracting features for url number 4600 : https://inbuild.pt/dir/
Error trying to connect to socket: closing socket - timed out
Error trying to connect to socket: closing socket - timed out
Error trying to connect to socket: closing socket - timed out
Extracting features for url number 4601 : https://inbuild.pt/dir
Error trying to connect to socket: closing socket - timed out
Error trying to connect to socket: closing socket - timed out
Error trying to connect to socket: closing socket - timed out
Extracting features for url number 4602 : h

Extracting features for url number 4655 : http://marketitem6248379803593582204723.atwebpages.com/
Extracting features for url number 4656 : https://delivery-ashutjem.strangled.net/Find?sslchannel=true&sessionid=0ue4XHkIuw6LdkjMtwGr4uUw6r397zyV0bWT9Xrm3apTHCVCLZc2huMYHi9fUgPvizPCBZ0cT6AU2w0qBkpJWs6rO1orb6fRCFW4BxygcZ2Mxq8mGumVmZXnkji9PEAHOp
Extracting features for url number 4657 : https://service-truswallet.com/
Extracting features for url number 4658 : https://particuliers-professionnels-societegenerale.fr/
Extracting features for url number 4659 : http://servicodevalidacion.revisar.repl.co/
Extracting features for url number 4660 : https://khanshakilahmed.com/impex.htm
Extracting features for url number 4661 : http://35.77.248.33:3000/regform
Extracting features for url number 4662 : http://35.77.248.33:3000/login
Extracting features for url number 4663 : https://supportteam8863122.co.vu/
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error tryin

Error trying to connect to socket: closing socket - timed out
Error trying to connect to socket: closing socket - timed out
Extracting features for url number 4730 : http://steamconmmuity.com/profles/76561197960279929988
Extracting features for url number 4731 : http://hsdsds.godaddysites.com/
Extracting features for url number 4732 : http://userblocked.co.vu/
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Extracting features for url number 4733 : http://userblocked.co.vu/crec.php
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Extracting features for url number 4734 : http://te

Extracting features for url number 4795 : https://selfsamebaboon.build067.wpsandbox.app/dew/A32Q0S/post/internet/Group/16d26/SSLAuthUI.html
Extracting features for url number 4796 : https://app.pancaekeswap.org/
Extracting features for url number 4797 : https://societe-generale-client.fr/
Extracting features for url number 4798 : https://delivery.buyvenoms.com/public/xhO70r49ucC90lUIBjoOyKGEqfINSlRx
Extracting features for url number 4799 : https://delivery.buyvenoms.com/public/jH0PsoA7m2K5Ze0q6riyYZs1mZE6qE2h
Extracting features for url number 4800 : https://instagram-produkimage.b0tnet.com/
Extracting features for url number 4801 : https://login.info.dokument.52-50-135-125.cprapid.com/id/dklogin.php
Extracting features for url number 4802 : http://ionos-webmail-login.com/
Extracting features for url number 4803 : https://s3.amazonaws.com/appforest_uf/f1673564595892x188635259025309320/view.html
Extracting features for url number 4804 : http://uk-dpd.com/billing.php
Extracting features

Extracting features for url number 4857 : http://www.iplayukulele.com/POST/BIbAZTGkMv4876/
Extracting features for url number 4858 : http://www.adolfopez.com/login/login.php
Extracting features for url number 4859 : http://www.wealthcreationforum.com/Boamn09/Bofa/verify-otp.php
Extracting features for url number 4860 : http://www.wealthcreationforum.com/Boamn09/Bofa/info.php
Extracting features for url number 4861 : http://www.wealthcreationforum.com/Boamn09/Bofa/otp.php
Extracting features for url number 4862 : http://www.wealthcreationforum.com/Boamn09/Process/userlog.php
Extracting features for url number 4863 : http://www.punirgaurav.in/punirg/landing-page/socder/yzjlmmjq=/
Extracting features for url number 4864 : http://www.xolares.com.ar/wp-includes/50/GlobalSources/
Extracting features for url number 4865 : http://www.xolares.com.ar/wp-includes/50/GlobalSources/index2.php?email=
Extracting features for url number 4866 : http://www.lastlocation-iphone-us.mipaginaweb.us/
Extracti

Extracting features for url number 4918 : http://www.55k365.com/mobile-client/index/
Extracting features for url number 4919 : https://nfcuif.link/login
Extracting features for url number 4920 : https://delivery.buyvenoms.com/public/kPQtb1keuuU5oJEDm9Kcs8ahf9o3pM9b
Extracting features for url number 4921 : https://delivery.buyvenoms.com/public/6WMhQfAGbRaNtzfhm8rkiuU9AOLAwbbg
Extracting features for url number 4922 : https://delivery.buyvenoms.com/public/wNjT6oiAJJ2P3PxIZWzhxTcphlfL2e3f
Extracting features for url number 4923 : https://www--wellsfargo--com--l349329d48d6c.wsipv6.com/es/biz/
Extracting features for url number 4924 : https://www.etc-co-buranden.xxchuban.com.cn/jp
Extracting features for url number 4925 : https://home.edu.my/lbp/ilandbank.ph/
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001

Extracting features for url number 4989 : https://boran.live/35/9.html
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
Extracting features for url number 4990 : http://checkered-tortoiseshell-quarter.glitch.me/onlineaccess298482nfcu.HTM
Extracting features for url number 4991 : https://kymtolley.com.au/wp-admin/Odrivex/
Extracting features for url number 4992 : https://beff163.com/assets/dad2fd42bgfuy.ejs
Extracting features for url number 4993 : http://ipfs.fleek.co/ipfs/bafybeibfefexvcdku5ak7niep3fq2ehqoiypcswkdvg6vgrckrewivc7ii?&
Extracting features for url number 4994 : https://fleek.ipfs.io/ipfs/bafybeibfefexvcdku5ak7niep3fq2ehqoiypcswkdvg6vgrckrewivc7ii?&
Extracting features for url number 4995 : https://my-metamessk.com/imports/
Extracting features for url number 4996 : https

In [87]:
feature_names = ['DNS_Record', 'Domain_Age', 'Domain_Active']

legitimate = pd.DataFrame(legi_features, columns= feature_names)
legitimate.head(10)

Unnamed: 0,DNS_Record,Domain_Age,Domain_Active
0,1,2062,1
1,1,0,2
2,0,1,1
3,0,1,1
4,1,0,2
5,0,1,1
6,1,6148,1
7,1,0,2
8,1,3654,1
9,1,3657,1


In [88]:
# legitimate.to_csv("feature_extracted_data\phishing_urls_DBF.csv", header=True, index=False)

In [89]:
df = pd.read_csv(r'feature_extracted_data\phishing_urls_DBF.csv')
df1 = pd.concat([df, legitimate])
df1.to_csv("feature_extracted_data\phishing_urls_DBF.csv", header=True, index=False)