In [31]:
import boto3
import os,sys
import datetime as dt
import glob
import pandas as pd
pd.set_option('display.max_rows', 1000)
import hvplot.pandas
import holoviews as hv
import subprocess as sp

In [32]:
os.makedirs('/dev/shm/logs',exist_ok=True)
os.chdir('/dev/shm/logs')

In [33]:
!aws s3 sync s3://earthbigdata/logs/ /dev/shm/logs --quiet

In [34]:
def make_entry(ip_dict,date_dict,logfile):  
    with open(logfile,"r") as f:
        lines=f.readlines()
    date = lines[0].split('[')[1].split('+')[0].strip()
    date = dt.datetime.strptime(date,'%d/%b/%Y:%H:%M:%S')
    ipaddr=lines[0].split(']')[1].split('-')[0].strip()
    if not ipaddr in ip_dict:
        ip_dict[ipaddr]=[]
    if not date.date() in date_dict:
        date_dict[date.date()]=0
    ip_dict[ipaddr].append(date)
    date_dict[date.date()]+=1
    

In [35]:
ip_dict={}
date_dict={}

for log in glob.glob('ebd-covid19*'):
    make_entry(ip_dict,date_dict,log)

In [36]:
access=pd.DataFrame.from_dict(date_dict,orient='index')

In [37]:
access.sort_index(inplace=True,ascending=False)

In [38]:
access.hvplot.barh(padding=0.1,grid=True,ylabel='Access Times per day',xlabel='',title='EBD COVID 19 Access')

In [60]:
access.head()

Unnamed: 0,0
2020-04-04,102
2020-04-03,437
2020-04-02,603
2020-04-01,582
2020-03-31,663


In [39]:
ip_count={}
for i in ip_dict:
    ip_count[i]=len(ip_dict[i])

In [40]:
access_count=pd.DataFrame.from_dict(ip_count,orient='index')

access_count=access_count[access_count>=5].dropna()

access_count = access_count.sort_values(0,ascending=False)

access_count['nslookup']=''
for i in access_count.index:
    cmd='nslookup {}'.format(i)
    try:
        ret= sp.check_output(cmd.split())
        nslookup = ret.decode().split('name = ')[1].split('\n')[0].strip('.')
        # print(nslookup)
        access_count.loc[i,'nslookup']=nslookup
    except:
        pass

In [41]:
access_count.head(40)

Unnamed: 0,0,nslookup
24.218.227.219,618.0,c-24-218-227-219.hsd1.ma.comcast.net
52.44.93.197,161.0,ec2-52-44-93-197.compute-1.amazonaws.com
34.232.127.140,116.0,ec2-34-232-127-140.compute-1.amazonaws.com
73.23.120.180,112.0,c-73-23-120-180.hsd1.fl.comcast.net
34.231.157.157,111.0,ec2-34-231-157-157.compute-1.amazonaws.com
72.205.80.22,83.0,ip72-205-80-22.sb.sd.cox.net
93.218.246.5,73.0,p5DDAF605.dip0.t-ipconnect.de
107.193.48.19,72.0,107-193-48-19.lightspeed.sndgca.sbcglobal.net
71.232.190.220,64.0,c-71-232-190-220.hsd1.ma.comcast.net
68.48.240.2,61.0,c-68-48-240-2.hsd1.mi.comcast.net


In [42]:
len(list(ip_dict.keys()))

2369

In [43]:
list(ip_dict.keys())[:10]

['73.16.195.238',
 '75.21.242.151',
 '3.86.107.205 arn:aws:iam::811071659227:user/josefk 0E99A4812F0A875C REST.PUT.OBJECT index.html "PUT /ebd',
 '24.218.227.219',
 '173.63.26.215',
 '40.77.167.216',
 '3.86.107.205 arn:aws:iam::811071659227:user/josefk 29EDAEE93F4BB232 REST.PUT.OBJECT index.html "PUT /ebd',
 '70.95.79.104',
 '173.252.95.18',
 '98.250.114.207']

In [44]:
for i in list(ip_dict.keys()):
    if ip_dict[i][-1].date() == dt.datetime.now().date():
        if i in list(access_count.index):
            print(ip_dict[i][-1].date(),access_count.loc[i][0],'\t',access_count.loc[i].nslookup)

2020-04-04 5.0 	 cpe-70-95-79-104.san.res.rr.com


In [59]:
access_list=[]
for i in ip_dict:
    last_access=ip_dict[i][-1].date()
    if i in list(access_count.index):
        a = int(access_count.loc[i][0])
        who = access_count.loc[i].nslookup
        access_list.append((last_access,a,i,who))
access_list.sort(reverse=True)
for i in access_list:
    print('{} {:5d}  {:17s} {}'.format(*i))

2020-04-04     5  70.95.79.104      cpe-70-95-79-104.san.res.rr.com
2020-04-03    30  173.69.48.213     pool-173-69-48-213.prvdri.fios.verizon.net
2020-04-03     6  23.115.160.199    23-115-160-199.lightspeed.livnmi.sbcglobal.net
2020-04-03     5  194.59.251.43     
2020-04-03     5  129.168.79.105    
2020-04-02    33  98.110.160.63     pool-98-110-160-63.bstnma.fios.verizon.net
2020-04-02    26  108.7.205.117     pool-108-7-205-117.bstnma.fios.verizon.net
2020-04-02    17  50.105.116.149    
2020-04-02    15  170.223.207.5     bwhmaincampuspat5.partners.org
2020-04-02    10  166.137.12.117    mobile-166-137-012-117.mycingular.net
2020-04-02     9  177.124.96.25     
2020-04-02     9  108.20.173.216    pool-108-20-173-216.bstnma.fios.verizon.net
2020-04-02     8  68.36.48.51       c-68-36-48-51.hsd1.mi.comcast.net
2020-04-02     7  72.209.21.124     ip72-209-21-124.ri.ri.cox.net
2020-04-02     7  24.147.19.142     c-24-147-19-142.hsd1.ma.comcast.net
2020-04-02     6  71.29.227.222    