In [1]:
from OpenSSL import crypto
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
import os
import time

In [2]:
def normalise_counter(ctr):
    total = sum(ctr.values())
    for key in ctr.keys():
        ctr[key] /=total
    return ctr

In [3]:
def get_existing_certs(distinct_urls,folder):
    successes = []
    data_path = folder+'/'
    for url in distinct_urls:
        try:
            f = open(data_path+url)    
            if f.readline() != '':
                successes.append(url)
        except:
            pass
    return successes

In [4]:
phish_df = pd.read_csv('final_certificate_datasets/phish_dataset.csv')
benign_df = pd.read_csv('final_certificate_datasets/benign_certs.csv')

phish_total = len(phish_df)
benign_total = len(benign_df)

In [5]:
base_path = "url_datasets/phishtank-"
days = ['16-04','18-04','19-04','20-04','21-04','22-04','23-04','24-04','30-04','01-05','03-05','04-05','05-05','07-05','08-05']
data_path = 'phish_data-'

In [6]:
def get_domain_mapping(raw_urls):
    new_urls = [u.split("/")[2] for u in raw_urls]
    return new_urls
def get_existing_raw_url_certs(raw_urls,successes):
    new_urls = get_domain_mapping(raw_urls)

    url_dict = dict()
    for i,u in enumerate(raw_urls):
            if new_urls[i] in successes:
                if new_urls[i] in url_dict.keys():
                    url_dict[new_urls[i]].append(u)
                else:
                    url_dict[new_urls[i]] = [u]
                
    return url_dict

In [7]:
cert_lists = []
missing_lists = []
vercode_lists = []
time_lists = []
url_lists = []
for day in days:
    print(day)
    
    df = pd.read_csv(base_path+day+'.csv')
    urls = df['url']
    new_urls = [u.split("/")[2] for u in urls]
    distinct_urls = list(set(new_urls))
    
    
    successes = get_existing_certs(distinct_urls,data_path+day)
    url_dict = get_existing_raw_url_certs(urls,successes)

    missing_cert_urls = []
    found_url_indices = np.zeros(len(successes),dtype=bool)
    certs = []
    creation_times = []
    for i,url in enumerate(successes):
        cert_file = data_path+day+'/'+url
        f = open(cert_file)
        line = f.readline()
        found_cert = False
        while line != '':
            line = f.readline()
            if "-----BEGIN CERTIFICATE-----" in line:
                found_cert=True
                line = ''
        f.close()
        if found_cert:
            found_url_indices[i] = True
            cert = crypto.load_certificate(crypto.FILETYPE_PEM, open(cert_file).read())
            certs.append(cert)
            url_lists.append(url_dict[url])
            t = os.path.getctime(cert_file)
            creation_times.append(t)
        else:
            missing_cert_urls.append(url)
            
    verifications = []   
    for i,url in enumerate(successes):
        if found_url_indices[i]:
            f = open(data_path+day+'/'+url)
            line = f.readline()
            verified=False
            while line != '':
                if "Verify return code:" in line:
                    if verified:
                        #print(url)
                        pass
                    else:
                        verifications.append(line)
                        verified=True
                line = f.readline()
            if not verified:
                #print("FAILED: "+url)
                verifications.append('Verify return code: None (none)')
            f.close()  
            
    new_verifications = [v[5:].split(' ')[3] for v in verifications]
    print("certs found: {} | empty cert sessions: {}".format(len(certs),len(missing_cert_urls)))
    cert_lists.append(certs)
    time_lists.append(creation_times)
    missing_lists.append(missing_cert_urls)
    vercode_lists.append(new_verifications)

16-04
certs found: 3189 | empty cert sessions: 1287
18-04
certs found: 3901 | empty cert sessions: 444
19-04
certs found: 4304 | empty cert sessions: 475
20-04
certs found: 4466 | empty cert sessions: 480
21-04
certs found: 4323 | empty cert sessions: 384
22-04
certs found: 3323 | empty cert sessions: 306
23-04
certs found: 1684 | empty cert sessions: 153
24-04
certs found: 4295 | empty cert sessions: 389
30-04
certs found: 4428 | empty cert sessions: 214
01-05
certs found: 3345 | empty cert sessions: 155
03-05
certs found: 0 | empty cert sessions: 0
04-05
certs found: 4490 | empty cert sessions: 243
05-05
certs found: 4290 | empty cert sessions: 245
07-05
certs found: 5088 | empty cert sessions: 486
08-05
certs found: 386 | empty cert sessions: 44


In [8]:
len(url_lists)

51512

In [9]:
full_list = []
for s in url_lists:
    full_list+=s
len(full_list)

93194

In [10]:
len(set(full_list))

13977

In [11]:
dfs = []
i = 0
for n,day in enumerate(days):
    final_df = pd.DataFrame()
    certs = cert_lists[n]
    
    phish_urls = []
    new_certs = []
    new_vers = []
    new_times = []
    for z,cert in enumerate(certs):
        curr_urls = url_lists[i]
        for u in curr_urls:
            new_certs.append(cert)
            new_vers.append(vercode_lists[n][z])
            new_times.append(time_lists[n][z])
            phish_urls.append(u)
        i+=1
    certs = new_certs
    subjects = [cert.get_subject() for cert in certs]
    issuers = [cert.get_issuer() for cert in certs]

    expiries = [cert.has_expired() for cert in certs]
    final_df['url'] = phish_urls
    final_df['CN'] = [s.CN for s in subjects]
    final_df['O'] = [s.O for s in subjects]
    final_df['C'] = [s.C for s in subjects]
    final_df['businessCategory'] = [s.businessCategory for s in subjects]
    final_df['serialNumber'] = [s.serialNumber for s in subjects]
    final_df['juristiction'] = [s.jurisdictionLocalityName for s in subjects]
    final_df['issuer_CN'] = [i.CN for i in issuers]
    final_df['issuer_O'] = [i.O for i in issuers]
    final_df['expired'] = expiries
    final_df['notBefore'] = [c.get_notBefore() for c in certs]
    final_df['notAfter'] = [c.get_notAfter() for c in certs]
    dfs.append(final_df)

    final_df['verCode'] = new_vers
    final_df['time_collected'] = new_times

1
1
2
1
1
2
1
1
1
2
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
2
1
4
1
2
1
2
1
2
3
1
1
1
2
2
2
1
1
2
2
5
1
2
1
2
1
1
1
2
1
2
1
2
2
2
1
1
1
1
1
1
1
1
1
2
1
2
1
2
1
2
2
3
2
2
2
2
1
2
7
1
1
10
1
1
1
2
2
1
1
2
1
1
2
2
1
1
1
2
1
2
1
2
1
2
2
1
1
1
1
2
1
2
2
1
2
2
2
2
1
1
1
4
1
1
1
1
2
2
1
1
1
1
1
1
1
1
2
1
1
2
1
1
1
10
1
1
1
1
1
2
1
1
1
2
2
1
1
2
1
1
1
2
2
2
1
4
1
1
2
1
1
1
1
3
1
6
3
1
1
1
1
1
1
1
1
1
1
1
5
1
2
1
3
2
1
1
2
1
1
1
2
1
1
1
1
2
2
2
1
1
1
1
1
1
2
2
2
1
1
1
1
2
2
2
2
1
2
1
1
1
1
1
1
2
2
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
2
1
1
4
1
1
2
1
2
1
1
4
1
1
1
2
2
4
2
2
1
2
2
1
1
1
1
16
1
1
3
1
1
2
1
1
1
1
2
71
1
2
1
3
1
1
1
1
1
1
2
1
1
2
2
1
1
1
1
3
1
1
1
2
2
1
1
2
1
1
1
1
2
1
1
1
2
1
2
1
2
2
2
1
1
1
2
1
1
1
1
1
2
1
1
1
2
1
3
1
1
4
1
1
1
1
1
1
1
1
1
4
1
3
1
1
1
2
1
1
2
1
1
2
2
3
2
2
1
1
1
2
2
2
2
1
1
2
1
2
1
1
1
1
1
2
1
1
2
2
1
1
2
1
1
2
3
1
1
2
1
2
1
1
1
1
1
1
1
2
2
1
2
2
1
11
1
9
1
1
2
1
1
1
1
1
2
1
2
1
1
1
1
1
2
1
1
1
2
2
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
2
1
1
2
2
1
1
1
1
1
1
1
3
2
2
1
1
1
1
4
1
2
1
2
1

2
1
2
2
1
1
1
2
1
5
3
1
1
2
1
2
1
6
1
2
1
37
1
2
1
2
5
1
1
2
1
1
2
2
1
2
1
1
2
1
1
1
1
2
2
3
1
1
1
2
1
1
1
4
2
1
1
1
1
2
1
1
1
1
1
1
2
1
1
2
1
1
2
1
1
1
3
3
1
1
2
2
1
1
1
2
1
3
1
1
2
2
2
1
1
1
2
1
3
1
2
2
1
2
1
1
1
1
1
1
1
1
1
1
1
1
2
2
1
1
2
1
5
1
1
1
1
1
2
1
2
1
1
1
2
1
1
1
1
2
2
1
1
2
1
1
1
1
1
1
1
1
1
22
1
1
2
1
1
1
1
3
1
1
1
1
1
3
2
1
1
2
1
1
1
2
1
1
2
1
1
1
2
3
1
1
2
1
2
21
1
1
4
3
2
1
2
4
2
1
1
1
1
1
1
1
1
2
2
1
1
1
1
1
1
2
1
1
1
1
2
2
16
1
1
1
1
1
2
1
2
2
1
2
1
1
1
2
1
1
1
1
1
6
1
2
1
8
2
1
2
5
1
2
1
1
1
2
1
1
2
1
1
1
2
1
1
1
1
2
1
1
1
4
2
1
2
2
2
1
2
1
9
1
6
1
2
6
2
1
2
1
2
1
2
1
2
1
1
2
1
1
2
1
1
10
2
1
1
3
1
1
1
1
1
1
4
2
1
4
1
1
1
14
3
1
1
2
1
3
2
1
1
1
2
2
1
3
2
1
1
1
1
5
2
1
1
4
1
1
2
2
35
2
1
1
1
1
1
1
2
1
1
2
5
2
2
1
1
1
4
1
1
1
1
2
1
1
1
1
1
1
2
1
2
1
2
1
1
1
1
2
1
1
1
2
1
1
1
1
1
1
1
1
5
1
1
2
2
2
3
1
1
2
1
1
2
2
1
1
1
1
1
1
2
1
1
2
1
3
1
1
1
1
1
1
2
2
1
12
1
2
1
2
1
2
1
2
11
2
2
1
2
1
1
1
1
1
2
3
1
1
2
3
2
2
1
1
1
1
1
2
9
4
1
1
1
1
1
2
2
1
2
1
1
1
1
1
2
2
2
1
2
4
1
1

1
2
2
1
2
2
1
2
1
1
2
6
1
1
1
2
1
2
1
2
2
2
3
3
2
2
1
2
1
1
1
2
1
1
1
2
1
2
2
1
3
1
1
1
1
1
2
1
2
3
1
2
1
1
1
1
1
1
2
1
10
2
1
4
1
4
2
1
1
1
2
1
97
2
1
1
1
2
2
1
1
2
1
1
2
1
1
10
1
1
1
1
1
3
4
3
2
2
1
1
2
6
2
2
1
4
2
14
1
1
2
1
1
1
2
1
2
1
1
1
1
1
1
1
1
1
2
1
2
1
2
1
1
1
1
1
1
2
1
2
2
1
1
1
2
2
1
2
2
1
1
2
1
2
2
1
2
1
1
2
1
2
1
1
1
1
2
1
1
2
1
3
1
2
1
4
1
1
1
1
4
1
2
2
2
1
2
1
2
1
2
2
1
1
2
1
1
2
1
1
1
1
1
1
1
1
1
4
1
1
2
1
3
1
2
1
1
2
1
2
2
2
1
4
1
1
1
1
1
1
1
1
2
1
1
1
4
1
2
1
1
2
13
1
1
1
1
1
2
62
1
2
1
1
2
1
1
2
3
2
1
1
1
1
5
3
1
1
2
1
1
1
1
1
2
2
1
1
1
1
1
1
2
2
1
1
2
5
2
1
3
3
1
2
1
1
1
1
1
4
1
1
20
1
1
1
2
1
2
1
1
1
1
1
1
1
6
1
1
5
1
2
1
1
1
2
1
1
1
1
1
3
2
1
3
1
1
2
1
1
1
1
1
2
1
1
2
2
1
2
9
1
9
1
1
1
1
1
1
1
2
1
1
1
2
1
1
1
2
1
2
1
1
2
1
1
1
1
1
1
1
1
3
1
1
1
1
2
1
1
1
3
1
1
1
5
2
1
1
1
1
1
2
1
2
1
2
2
1
1
2
8
1
4
1
1
1
1
1
3
1
2
1
1
2
2
2
1
3
2
2
4
1
2
1
5
1
1
1
1
2
2
1
1
1
1
2
2
2
2
1
25
1
1
2
1
1
1
1
2
1
1
2
1
1
1
2
2
2
2
2
2
2
2
3
2
1
2
1
1
1
4
1
1
2
1
1
2
1
1
1
2
1
1
2
2


1
2
1
10
2
1
4
1
1
4
2
2
1
1
2
1
97
2
1
1
1
2
2
1
1
1
1
1
2
1
1
11
1
2
4
1
1
1
1
3
4
3
2
1
2
2
1
1
2
1
1
6
2
2
1
1
4
2
14
1
1
2
1
1
1
2
1
1
2
2
2
1
1
1
1
1
1
1
1
1
2
1
1
2
1
1
1
1
1
2
1
2
1
1
1
1
2
1
2
2
1
1
2
1
2
2
1
2
1
1
2
2
1
1
1
2
1
2
1
3
1
1
1
4
1
1
1
1
4
1
2
2
2
1
2
1
2
1
2
2
1
1
2
1
1
2
1
1
1
1
1
1
1
2
1
1
4
1
1
2
1
3
1
2
1
1
1
2
1
2
2
2
1
1
4
1
1
1
1
1
1
1
1
2
1
1
1
4
1
2
1
1
2
13
1
1
1
1
1
1
2
63
1
2
1
1
2
1
2
3
2
1
1
1
1
5
3
1
1
2
1
1
1
2
1
2
2
1
1
1
1
1
1
2
2
1
1
2
2
1
3
3
1
2
1
1
1
1
1
1
1
1
20
1
1
1
2
1
2
1
1
1
1
1
1
1
6
1
1
5
1
2
1
1
1
2
2
1
1
1
1
1
1
3
2
1
3
1
1
1
1
1
1
1
2
1
1
2
2
6
1
2
5
1
9
1
1
1
1
1
1
1
2
1
1
1
1
1
2
1
2
1
1
2
2
1
1
1
1
1
1
1
3
1
1
1
1
2
1
1
3
1
1
5
1
1
1
1
1
2
1
2
1
2
2
1
1
2
8
1
4
1
1
1
3
1
1
2
1
1
2
2
2
1
3
2
2
2
4
1
2
1
5
1
1
1
1
2
2
2
1
1
1
1
1
2
2
2
2
25
1
1
1
2
1
1
1
1
2
1
1
2
1
1
1
2
2
2
2
2
1
2
1
2
2
2
2
2
1
2
1
1
1
1
1
4
1
1
2
1
1
2
1
1
1
2
1
1
2
1
2
2
2
1
1
2
2
1
1
3
3
1
1
1
1
2
1
2
1
1
2
1
2
1
2
2
1
1
97
1
1
1
2
1
1
1
1
2
2
1
1
1
2
11
1


7740
4323
1
2
1
1
1
1
1
1
2
2
1
1
1
1
2
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
2
1
1
4
1
1
2
1
2
1
1
1
1
2
2
1
1
1
1
2
3
1
1
2
1
1
1
1
2
1
2
3
1
1
1
1
1
1
1
1
7
1
1
1
2
1
2
1
1
1
1
2
3
1
2
1
2
1
2
1
1
1
1
1
2
7
1
1
8
1
2
1
2
1
2
1
1
2
1
1
1
2
1
1
2
1
1
1
3
2
1
1
4
1
1
2
3
1
1
2
1
4
2
2
2
2
2
1
1
1
1
1
2
1
1
1
2
1
1
1
1
1
1
1
2
1
2
1
1
1
1
1
1
1
1
1
1
1
2
1
2
1
1
1
1
1
1
1
1
2
2
1
1
2
2
2
1
1
1
2
1
1
1
1
1
1
1
6
1
1
1
1
1
1
1
1
1
1
1
1
5
1
1
1
1
1
1
2
1
2
1
1
1
1
1
1
1
2
2
2
1
1
2
1
2
2
1
1
1
2
1
2
2
2
2
1
1
2
1
1
2
1
5
1
1
1
1
2
2
1
1
1
1
2
3
1
1
1
1
1
1
1
1
1
1
2
1
2
1
1
4
1
1
1
1
1
1
1
1
1
10
1
1
1
2
4
5
1
2
1
1
1
3
1
15
1
1
1
2
1
1
1
2
77
1
1
1
2
1
1
1
1
1
1
1
1
2
1
1
1
2
1
6
1
1
1
2
1
2
1
1
1
1
1
1
3
1
1
2
2
1
1
2
1
1
1
1
2
1
1
1
1
1
2
1
1
2
5
1
2
1
1
2
1
1
1
1
1
1
1
4
1
2
1
1
1
1
3
1
2
1
1
1
3
1
4
1
1
1
1
1
4
2
1
1
1
2
2
1
1
1
1
3
1
1
1
2
2
7
2
1
2
1
1
3
2
1
2
1
1
1
1
1
2
1
1
2
1
1
2
3
1
1
1
2
2
2
1
1
1
2
2
2
1
1
10
1
2
4
1
1
1
2
4
1
2
1
1
1
1
2
1
1
1
1
2
1
2
1
1
1
1
1
3
1
1
1
1
1
2
1
1


2821
1684
1
1
2
1
1
1
1
1
1
1
1
1
2
1
2
1
1
1
1
1
1
2
1
1
1
1
1
1
2
1
2
1
1
1
1
1
1
1
2
1
1
1
4
1
2
1
1
1
1
2
1
1
1
1
1
1
1
1
2
1
2
2
1
2
1
1
1
1
2
1
3
1
1
1
1
1
2
3
1
1
1
1
2
1
1
2
3
1
2
1
1
1
1
1
1
1
1
1
1
2
1
1
1
2
2
1
2
1
1
1
1
2
3
1
2
1
2
2
1
2
1
1
2
1
1
3
1
1
1
1
7
1
1
1
7
1
1
2
1
1
2
1
1
1
2
1
1
2
2
1
1
1
1
1
2
1
1
1
1
2
1
2
2
1
1
1
3
2
1
1
4
1
1
2
3
1
1
1
1
1
1
2
1
4
2
2
2
1
2
2
1
1
2
2
1
1
4
1
1
1
1
1
2
1
1
1
2
1
1
1
1
1
1
1
1
1
1
2
1
2
2
1
1
1
1
1
1
1
1
1
1
2
1
2
1
1
1
1
1
1
1
2
1
1
1
1
2
1
2
2
1
1
1
1
2
1
1
1
1
1
1
1
3
1
1
4
1
1
6
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
2
1
1
1
1
2
2
2
1
2
1
2
1
1
1
1
1
1
2
2
1
1
1
1
1
1
1
2
1
1
2
2
1
1
1
2
1
2
1
3
2
2
2
2
1
1
2
1
1
1
2
2
1
5
1
1
1
1
1
2
2
3
1
1
1
1
1
2
3
1
1
1
1
1
1
1
1
1
1
2
1
3
1
1
2
1
1
4
1
1
1
1
1
1
1
1
9
1
1
1
2
4
5
2
1
2
2
2
1
1
1
3
1
14
1
4
1
2
1
1
1
2
1
2
1
1
1
1
1
76
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
2
1
6
1
1
1
1
1
2
2
1
1
1
1
3
1
1
1
2
2
2
1
1
2
2
1
1
1
1
1
2
2
1
1
1
1
1
2
1
2
1
2
5
1
1
1
1
2
2
1
2
1
1
1
1
1
2
1
1
1
1
4


1
1
1
2
1
1
1
1
1
1
1
1
2
5
2
1
1
1
1
3
1
1
1
4
1
3
2
1
23
1
1
1
1
2
2
1
1
1
1
1
2
1
1
3
1
1
2
2
1
1
2
1
1
1
1
1
1
1
2
1
2
1
1
3
1
1
1
1
1
14
1
1
1
1
1
1
1
1
1
2
1
2
1
1
1
1
2
1
1
1
1
2
2
2
1
1
2
1
5
3
1
1
1
1
1
1
1
1
2
3
2
1
1
1
3
1
3
1
2
2
1
1
2
1
3
2
1
1
2
1
2
1
1
1
2
3
7708
4295
7708
4295
1
1
2
1
1
1
1
1
1
1
1
2
2
1
1
1
1
1
2
2
1
1
1
1
1
2
1
1
1
2
1
1
1
1
1
2
1
1
1
1
4
1
1
1
1
2
1
2
1
2
1
2
1
1
1
1
1
1
2
1
2
2
1
1
2
2
2
1
1
1
1
2
1
3
1
1
1
1
2
1
1
1
1
2
1
1
1
2
3
2
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
2
2
1
1
2
2
2
1
1
1
1
1
2
3
1
2
1
1
2
2
1
2
1
1
2
2
1
1
1
1
1
1
1
1
1
2
1
7
2
1
1
7
2
2
2
1
1
2
1
1
1
1
2
1
1
2
2
2
1
1
1
1
1
2
1
2
1
1
2
2
1
1
1
1
2
1
2
1
4
1
1
2
3
1
1
1
1
2
1
4
2
2
1
1
2
2
2
2
1
1
2
4
1
1
1
1
1
2
1
1
1
2
1
1
1
1
1
1
2
1
1
1
1
1
2
1
2
1
2
2
1
1
1
1
10
1
1
1
1
1
1
1
2
1
2
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
2
1
2
1
1
1
1
1
1
1
1
1
3
1
1
1
1
6
1
1
1
1
1
2
1
1
3
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
1
1
2
2
1
2
1
2
1
1
1
1
1
2
2
1
2
1
1
1
1
1
1
2
1
2
2
1
1
1
2
1
1
2
1
3
2
2
2
2
1
1
1
1

7930
4428
1
1
1
1
1
2
1
1
1
1
2
2
1
1
1
1
2
1
1
2
1
1
1
1
2
1
1
1
1
1
2
1
2
1
2
1
1
1
1
1
2
2
2
2
1
2
1
2
1
1
2
1
2
1
1
1
1
2
1
1
3
2
1
1
1
2
1
1
1
1
1
1
1
1
2
1
1
2
1
2
1
1
1
1
2
3
1
2
1
1
1
4
2
2
1
2
1
2
1
1
1
1
1
1
2
1
2
1
1
7
2
1
2
1
1
2
1
1
1
1
2
1
2
1
1
1
2
1
1
2
2
2
1
2
4
1
4
1
2
3
1
1
1
1
1
4
2
2
2
2
2
1
1
2
2
1
1
1
1
2
1
1
4
1
1
1
1
1
1
2
1
1
1
1
2
1
1
1
2
1
2
2
1
1
1
10
1
1
1
1
1
2
2
1
1
1
1
1
1
2
1
1
1
2
1
2
1
1
1
2
1
1
1
1
1
1
3
1
1
1
6
1
1
1
2
1
1
1
1
1
1
1
1
12
1
1
1
1
1
2
2
2
1
2
1
2
1
1
1
1
1
2
1
2
1
1
1
1
1
1
2
1
2
2
1
1
1
1
2
1
2
2
1
3
2
1
1
1
2
1
2
1
5
1
1
1
2
2
1
1
2
1
2
1
3
1
1
1
2
1
1
1
1
2
1
4
1
1
1
1
1
1
10
1
2
2
4
5
2
1
2
2
2
1
1
3
1
14
1
1
1
1
1
8
1
1
73
1
3
2
1
1
1
1
1
1
1
2
1
6
1
1
1
1
1
2
2
1
1
1
1
3
2
1
1
1
1
2
2
1
2
1
1
2
1
1
1
1
2
1
2
1
2
5
1
1
1
2
1
2
1
1
2
1
2
1
2
1
2
1
2
4
1
2
1
1
1
1
3
1
2
1
1
2
1
1
1
4
3
1
1
2
4
1
4
2
1
1
2
1
1
1
2
1
2
1
2
2
1
1
2
2
7
1
2
1
2
1
2
1
1
2
1
2
2
1
1
1
1
1
2
2
1
3
2
1
1
2
3
1
1
2
2
2
1
1
1
1
1
2
1
10
1
1
1
2
4
1
1
1
1
2


1
1
3
1
1
1
2
1
1
2
2
1
4
2
1
1
2
1
2
1
2
1
1
1
2
12
5
1
2
1
2
1
1
1
2
2
1
1
2
2
1
1
1
1
1
1
2
1
1
1
1
2
3
1
1
1
1
2
4
2
1
1
1
1
1
2
1
1
1
1
3
1
2
1
1
1
2
1
2
2
2
1
1
1
3
1
2
1
1
1
1
2
2
2
1
1
3
1
3
1
2
1
1
4
1
2
1
2
1
2
5
2
5
1
3
10
1
2
4
1
2
2
2
1
1
1
1
1
1
1
2
2
1
1
1
1
1
1
1
1
1
1
1
10
1
1
2
1
3
1
1
1
1
1
1
1
1
1
1
3
1
1
1
2
1
1
1
1
2
2
1
2
1
1
1
1
1
2
2
1
2
2
1
1
5
2
1
1
1
2
1
1
2
2
1
2
1
1
1
2
1
1
1
2
1
1
2
1
2
4
2
2
2
2
1
1
1
2
1
2
1
1
5
1
1
1
1
2
6
2
1
4
1
2
1
1
1
1
1
1
2
2
1
2
1
1
1
2
1
2
1
2
2
1
1
1
1
2
1
2
1
1
3
3
7
1
2
1
1
2
1
1
5
1
1
1
1
1
2
2
1
1
1
1
1
2
1
1
1
2
1
1
2
1
5
3
1
2
1
1
1
1
1
1
1
1
1
1
2
1
1
2
2
2
2
2
2
1
1
1
2
1
2
1
1
2
1
1
1
2
1
1
2
1
1
1
1
2
1
1
1
1
1
1
2
1
2
1
3
1
2
1
2
2
2
1
1
1
2
2
1
1
1
2
1
1
2
1
6
5
3
1
2
2
1
3
2
1
1
37
1
2
2
1
2
2
1
2
1
1
2
2
1
1
1
1
2
1
1
2
1
1
1
6
2
2
1
1
1
2
1
1
1
4
1
1
1
1
1
2
1
1
3
1
1
1
1
2
1
1
1
1
1
1
1
2
1
1
3
1
2
2
1
1
2
1
1
2
2
1
1
1
1
1
1
2
2
2
2
2
1
1
1
1
1
2
1
3
1
2
1
2
1
5
2
2
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
2
2
6
1
1
1


2
5
3
2
1
1
2
1
3
8
1
2
1
2
2
2
1
1
1
2
1
2
1
3
3
2
1
1
1
1
2
1
3
2
3
1
1
13
2
1
1
1
1
2
3
1
1
2
1
1
8
1
2
1
2
1
2
1
2
22
1
1
1
2
1
1
1
1
2
1
2
9
1
1
5
1
1
1
1
1
2
1
1
1
1
2
2
2
1
1
1
1
1
2
1
1
10
2
1
2
1
2
1
3
2
1
1
1
3
1
1
1
1
1
2
3
2
1
1
1
1
2
1
1
11
1
2
1
1
1
2
1
1
1
2
2
1
2
1
2
1
1
2
1
2
1
2
3
1
5
1
1
1
2
1
2
1
2
1
1
2
1
1
1
2
2
1
1
1
2
1
1
1
2
2
1
1
1
1
1
1
2
1
1
2
1
2
1
1
2
1
1
4
2
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
2
2
2
2
2
1
1
1
1
1
1
1
1
2
1
2
2
1
1
3
1
1
1
2
2
1
1
1
1
1
5
2
1
5
2
1
1
1
1
1
1
1
1
1
1
1
1
1
4
1
6
1
1
2
2
1
1
1
2
2
1
1
4
1
1
2
2
1
1
1
1
2
1
1
1
1
1
1
1
2
1
1
1
1
1
2
1
1
1
3
2
2
2
1
2
2
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
2
1
3
1
1
1
2
1
2
1
2
8
2
2
1
2
1
1
1
2
1
1
1
1
1
1
1
2
1
1
1
1
1
2
4
1
1
2
2
8
1
2
1
2
2
2
2
8
7
1
1
1
1
1
2
1
2
1
2
1
2
1
2
1
1
1
2
1
1
1
2
2
1
1
1
1
1
2
4
1
1
1
2
1
1
1
1
1
1
1
1
2
2
1
1
2
1
2
2
1
1
1
1
1
2
5
1
2
2
1
1
1
1
1
1
2
1
1
1
2
1
1
2
1
1
2
1
2
1
1
4
1
1
8
1
1
2
1
1
4
1
1
1
1
1
2
1
2
2
1
1
1
2
1
2
1
1
1
1
2
1
1
2
2
1
1
2
5
1
4


3
2
1
2
2
2
1
1
2
2
1
3
2
8
1
2
1
2
1
2
1
1
1
2
2
1
1
1
3
3
2
1
1
1
1
2
2
1
1
1
3
2
3
1
1
13
2
1
1
1
1
1
1
1
1
1
2
1
1
1
8
1
2
2
1
2
1
1
2
4
1
22
1
1
1
1
2
1
1
4
1
1
1
2
2
1
1
2
9
1
1
5
1
1
1
1
1
1
1
2
4
1
1
1
2
2
1
2
2
2
1
1
1
1
1
1
1
2
1
1
9
2
1
1
2
1
2
1
3
2
1
1
3
1
1
1
1
1
1
1
1
2
2
3
2
1
1
1
1
1
2
1
1
11
1
2
1
1
1
2
1
1
1
2
1
1
2
1
2
1
1
2
2
1
2
1
2
3
1
5
1
1
1
1
2
1
1
2
4
1
1
1
2
1
2
2
1
1
2
1
2
1
1
1
2
2
1
1
1
2
6
1
2
1
1
1
2
1
2
1
1
1
1
1
2
1
1
1
1
2
2
1
2
1
1
2
1
1
2
2
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
2
2
1
2
2
2
1
4
1
1
1
1
1
1
2
1
2
2
2
1
1
3
1
1
1
1
2
2
1
1
1
1
1
1
1
5
2
1
5
2
2
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
4
1
6
1
1
1
1
1
2
2
1
1
1
2
2
1
1
1
1
4
1
1
2
2
1
1
1
2
2
1
2
1
1
2
1
1
1
1
1
2
2
1
1
1
1
1
2
1
1
1
3
1
1
2
2
2
1
2
1
2
1
1
1
1
1
1
2
1
1
2
2
1
1
2
1
2
2
1
3
1
1
1
1
1
2
1
2
1
1
2
7
2
1
2
1
1
1
1
2
1
1
1
1
1
1
1
2
1
1
1
1
1
2
1
2
3
1
1
1
1
2
1
1
8
2
2
1
1
1
2
2
2
2
8
7
1
1
1
1
2
1
2
1
2
1
1
2
1
2
1
1
1
1
2
1
1
1
1
1
2
1
1
1
1
2
2
1
1
1
1
1
2
4
1
1
1

In [12]:
len(url_lists)

51512

In [13]:
full_cert_list = []
for s in cert_lists:
    full_cert_list+=s
len(full_cert_list)

51512

In [14]:
final_df = pd.concat(dfs)
final_df

Unnamed: 0,url,CN,O,C,businessCategory,serialNumber,juristiction,issuer_CN,issuer_O,expired,notBefore,notAfter,verCode,time_collected
0,http://ph.zanqap.com,*.arvixeshared.com,,,,,,COMODO RSA Domain Validation Secure Server CA,COMODO CA Limited,1.0,b'20160715000000Z',b'20190802235959Z',10,1.620480e+09
1,https://customerrs-sercive.com/packge/pak/bun/...,*.web-hosting.com,,,,,,Sectigo RSA Domain Validation Secure Server CA,Sectigo Limited,0.0,b'20200507000000Z',b'20220405235959Z',0,1.620480e+09
2,http://qesyvvvqcppmwlbamhmbzvfmoc-dot-gl099898...,*.google.com,Google LLC,US,,,,GTS CA 1O1,Google Trust Services,0.0,b'20210316192807Z',b'20210608192806Z',0,1.620480e+09
3,https://qesyvvvqcppmwlbamhmbzvfmoc-dot-gl09989...,*.google.com,Google LLC,US,,,,GTS CA 1O1,Google Trust Services,0.0,b'20210316192807Z',b'20210608192806Z',0,1.620480e+09
4,https://communicourt-my.sharepoint.com/persona...,*.sharepoint.com,Microsoft Corporation,US,,,,DigiCert Cloud Services CA-1,DigiCert Inc,0.0,b'20210205000000Z',b'20220204235959Z',0,1.620480e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
568,https://myhermes.redeliver-auth02.com/,myhermes.redeliver-auth02.com,,,,,,R3,Let's Encrypt,0.0,b'20210506140021Z',b'20210804140021Z',0,1.620495e+09
569,https://renew-myee-billing.com/,renew-myee-billing.com,,,,,,Sectigo RSA Domain Validation Secure Server CA,Sectigo Limited,0.0,b'20210508000000Z',b'20220508235959Z',0,1.620494e+09
570,https://renew-myee-billing.com/account/index?a...,renew-myee-billing.com,,,,,,Sectigo RSA Domain Validation Secure Server CA,Sectigo Limited,0.0,b'20210508000000Z',b'20220508235959Z',0,1.620494e+09
571,http://ghislain.dartois.pagesperso-orange.fr/i...,monsite-orange.fr,Orange,FR,,,,DigiCert TLS RSA SHA256 2020 CA1,DigiCert Inc,0.0,b'20210322000000Z',b'20220422235959Z',0,1.620495e+09


In [26]:
final_distinct_df = final_df.drop_duplicates(subset=['url'],inplace=False)
final_distinct_df

Unnamed: 0,url,CN,O,C,businessCategory,serialNumber,juristiction,issuer_CN,issuer_O,expired,notBefore,notAfter,verCode,time_collected
0,http://ph.zanqap.com,*.arvixeshared.com,,,,,,COMODO RSA Domain Validation Secure Server CA,COMODO CA Limited,1.0,b'20160715000000Z',b'20190802235959Z',10,1.620480e+09
1,https://customerrs-sercive.com/packge/pak/bun/...,*.web-hosting.com,,,,,,Sectigo RSA Domain Validation Secure Server CA,Sectigo Limited,0.0,b'20200507000000Z',b'20220405235959Z',0,1.620480e+09
2,http://qesyvvvqcppmwlbamhmbzvfmoc-dot-gl099898...,*.google.com,Google LLC,US,,,,GTS CA 1O1,Google Trust Services,0.0,b'20210316192807Z',b'20210608192806Z',0,1.620480e+09
3,https://qesyvvvqcppmwlbamhmbzvfmoc-dot-gl09989...,*.google.com,Google LLC,US,,,,GTS CA 1O1,Google Trust Services,0.0,b'20210316192807Z',b'20210608192806Z',0,1.620480e+09
4,https://communicourt-my.sharepoint.com/persona...,*.sharepoint.com,Microsoft Corporation,US,,,,DigiCert Cloud Services CA-1,DigiCert Inc,0.0,b'20210205000000Z',b'20220204235959Z',0,1.620480e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565,http://dhidsuport-855c81.ingress-earth.easywp....,*.ingress-earth.easywp.com,,,,,,Sectigo RSA Domain Validation Secure Server CA,Sectigo Limited,0.0,b'20210407000000Z',b'20220407235959Z',0,1.620495e+09
566,https://dhidsuport-855c81.ingress-earth.easywp...,*.ingress-earth.easywp.com,,,,,,Sectigo RSA Domain Validation Secure Server CA,Sectigo Limited,0.0,b'20210407000000Z',b'20220407235959Z',0,1.620495e+09
567,https://dhidsuport-855c81.ingress-earth.easywp...,*.ingress-earth.easywp.com,,,,,,Sectigo RSA Domain Validation Secure Server CA,Sectigo Limited,0.0,b'20210407000000Z',b'20220407235959Z',0,1.620495e+09
569,https://renew-myee-billing.com/,renew-myee-billing.com,,,,,,Sectigo RSA Domain Validation Secure Server CA,Sectigo Limited,0.0,b'20210508000000Z',b'20220508235959Z',0,1.620494e+09


In [27]:
final_distinct_df.to_csv('final_certificate_datasets/phish_dataset_urls.csv')