In [1]:
pip install OTXv2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting OTXv2
  Downloading OTXv2-1.5.12-py3-none-any.whl (16 kB)
Installing collected packages: OTXv2
Successfully installed OTXv2-1.5.12


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from OTXv2 import OTXv2,NotFound
import IndicatorTypes
import datetime
import re

class AlienVault:

    API_KEY = 'Your API KEy'  

    SERVER = 'https://otx.alienvault.com/'

    def __init__(self, ioc: str, ioc_type: str,source_date: str=None) -> None:

        self.ioc = ioc

        self.otx = OTXv2(AlienVault.API_KEY, AlienVault.SERVER)

        self.status = None
        self.pulse_count = None
        self.score = None
        self.alert_length = None
        self.alerts = None
        self.cve = None
        self.first_submission_date = None
        self.first_submission = None
        self.first_submission_diff = None
        self.first_submission_diff_sec = None

        if ioc_type.lower() in ['hash']:
            self.hash(ioc)

        elif ioc_type == 'url':
            self.url(ioc)

        elif ioc_type == 'ip':
            self.ip(ioc)

        elif ioc_type == 'domain':
            self.domain(ioc)

        self.getDateDifference(source_date)

        del self.otx

    def get_response(self,ioc_type,ioc):

        response = 'RetryError'

        for _ in range(2):
            try:
                response = self.otx.get_indicator_details_full(ioc_type, ioc)
                break

            except NotFound:
                response = 'NotFound'
                break
            
            except Exception as e:
                print(e)  
        
        return response

    def getValue(self,results, keys):
        if type(keys) is list and len(keys) > 0:

            if type(results) is dict:
                key = keys.pop(0)
                if key in results:
                    return self.getValue(results[key], keys)
                else:
                    return None
            else:
                if type(results) is list and len(results) > 0:
                    return self.getValue(results[0], keys)
                else:
                    return results
        else:
            return results


    def hash(self, hash):

        ioc_type = IndicatorTypes.FILE_HASH_MD5
        if len(hash) == 64:
            ioc_type = IndicatorTypes.FILE_HASH_SHA256

        response = self.get_response(ioc_type, hash)

        if type(response) == str:
            self.status = response
            return

        
        self.pulse_count = self.getValue(response, ['general','pulse_info','count'])
        
        self.score = self.getValue(response,['analysis','analysis','plugins','cuckoo','result','info','combined_score'])

        alerts = []

        avg = self.getValue(response, ['analysis', 'analysis',
                   'plugins', 'avg', 'results', 'detection'])
        if avg:
            alerts.append(avg)

        clamav = self.getValue(response, ['analysis', 'analysis',
                        'plugins', 'clamav', 'results', 'detection'])
        if clamav:
            alerts.append(clamav)

        avast = self.getValue(response, ['analysis', 'analysis',
                        'plugins', 'avast', 'results', 'detection'])
        if avast:
            alerts.append(avast)

        microsoft = self.getValue(response, ['analysis', 'analysis', 'plugins',
                            'cuckoo', 'result', 'virustotal', 'scans', 'Microsoft', 'result'])
        if microsoft:
            alerts.append(microsoft)

        symantec = self.getValue(response, ['analysis', 'analysis', 'plugins',
                            'cuckoo', 'result', 'virustotal', 'scans', 'Symantec', 'result'])
        if symantec:
            alerts.append(symantec)

        kaspersky = self.getValue(response, ['analysis', 'analysis', 'plugins',
                            'cuckoo', 'result', 'virustotal', 'scans', 'Kaspersky', 'result'])
        if kaspersky:
            alerts.append(kaspersky)

        suricata = self.getValue(response, ['analysis', 'analysis', 'plugins',
                            'cuckoo', 'result', 'suricata', 'rules', 'name'])
        if suricata and 'trojan' in str(suricata).lower():
            alerts.append(suricata)

        self.alert_length = len(alerts)

        self.alerts = ', '.join(alerts)

        if (self.pulse_count == 0 or self.pulse_count is None ) and (self.score == 0 or self.score is None) and self.alert_length == 0:
            self.status = 'Clean'
            return

        date = self.getValue(response,['analysis','analysis','datetime_int'])
        
        if date:
            try:
                self.first_submission_date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%S')
            except Exception as e:
                print(f'Date Error -> {self.ioc} : {e}')
        else:
            created_dates = []
            for pulse in self.getValue(response,['general', 'pulse_info', 'pulses']):
                if pulse:
                    try:
                        created_date = datetime.datetime.strptime(pulse['created'][:19],'%Y-%m-%dT%H:%M:%S')
                        created_dates.append(created_date)
                    except Exception as e:
                        print(f'Pulse date error -> {self.ioc} : {e}')
            
            self.first_submission_date = min(created_dates)

        self.status = 'Malicious'

        self.find_cve(response)

        return


    def url(self,url_address):

        ioc_type = IndicatorTypes.URL

        response = self.get_response(ioc_type, url_address)

        if type(response) == str:
            self.status = response
            return

        alerts = []

        google = self.getValue(response, ['url_list', 'url_list', 'result', 'safebrowsing'])

        if google and 'response_code' in str(google):
            alerts.append('malicious')

        clamav = self.getValue(response, ['url_list', 'url_list', 'result', 'multiav', 'matches', 'clamav'])

        if clamav:
            alerts.append(clamav)

        avast = self.getValue(response, ['url_list', 'url_list','result', 'multiav', 'matches', 'avast'])

        if avast:
            alerts.append(avast)

        self.alert_length = len(alerts)

        self.alerts = ', '.join(alerts)

        #self.pulse_count = self.getValue(response, ['general','pulse_info','count'])

        if self.alert_length == 0:
            self.status = 'Clean'
            return

        date = self.getValue(response,['analysis','analysis','datetime_int'])

        if date:
            try:
                self.first_submission_date = datetime.datetime.strptime(
                    date, '%Y-%m-%dT%H:%M:%S')
            except Exception as e:
                print(f'Date Error -> {self.ioc} : {e}')

        self.status = 'Malicious'

        self.find_cve(response)

        return


    def ip(self, ip_address):
        ioc_type = IndicatorTypes.IPv4

        response = self.get_response(ioc_type, ip_address)
        if type(response) == str:
        	self.status = response
        	return
        alerts = []
        created_dates=[]
        google = self.getValue(response, ['url_list', 'url_list', 'result', 'safebrowsing'])

        if google and 'response_code' in str(google):
            alerts.append('malicious')
        malware=self.getValue(response,['malware','data'])
        length=len(malware)
        if length>0:
        	for ind in range(0,length):
        		date_key=response['malware']['data'][ind]
        		avg = response['malware']['data'][ind]['detections']['avg']
        		if avg:
        			alerts.append(avg)
        			try:
        				date = datetime.datetime.strptime(date_key['date'][:19], '%Y-%m-%dT%H:%M:%S')
        				created_dates.append(date)
        			except Exception as e:
        				print(f'Pulse date error -> {self.ioc} : {e}')
        		clamav = response['malware']['data'][ind]['detections']['clamav']
        		if clamav:
        			alerts.append(clamav)
        			try:
        				date = datetime.datetime.strptime(date_key['date'][:19], '%Y-%m-%dT%H:%M:%S')
        				created_dates.append(date)
        			except Exception as e:
        				print(f'Pulse date error -> {self.ioc} : {e}')
        				
        		avast = response['malware']['data'][ind]['detections']['avast']
        		if avast:
        			alerts.append(avast)
        			try:
        				date = datetime.datetime.strptime(date_key['date'][:19], '%Y-%m-%dT%H:%M:%S')
        				created_dates.append(date)
        			except Exception as e:
        				print(f'Pulse date error -> {self.ioc} : {e}')
        		msdefender = response['malware']['data'][ind]['detections']['msdefender']
        		if msdefender:
        			alerts.append(msdefender)
        			try:
        				date = datetime.datetime.strptime(date_key['date'][:19], '%Y-%m-%dT%H:%M:%S')
        				created_dates.append(date)
        			except Exception as e:
        				print(f'Pulse date error -> {self.ioc} : {e}')
        self.alert_length = len(alerts)
        self.alerts = ', '.join(alerts)
        if self.alert_length == 0:
        	self.status = 'Clean'
        	return
        date = self.getValue(response,['analysis','analysis','datetime_int'])
        if date:
        	try:
        		self.first_submission_date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%S')
        	except Exception as e:
        		print(f'Date Error -> {self.ioc} : {e}')
        self.status = 'Malicious'
        self.find_cve(response)
        return

    def domain(self,domain_name):

        response = self.get_response(IndicatorTypes.HOSTNAME, domain_name) or self.get_response(IndicatorTypes.DOMAIN, domain_name)
        print(type(response))
        if type(response) == str:
            self.status = response
            return
        alerts = []
        created_dates=[]
        malware=self.getValue(response,['malware','data'])
        length=len(malware)
        print(length)
        if length>0:
        	for ind in range(0,length):
        		date_key=response['malware']['data'][ind]
        		avg = response['malware']['data'][ind]['detections']['avg']
        		if avg:
        			alerts.append(avg)
        		clamav = response['malware']['data'][ind]['detections']['clamav']
        		if clamav:
        			alerts.append(clamav)
        		avast = response['malware']['data'][ind]['detections']['avast']
        		if avast:
        			alerts.append(avast)
        		msdefender = response['malware']['data'][ind]['detections']['msdefender']
        		if msdefender:
        			alerts.append(msdefender)
        print(alerts)
        print(len(alerts))
        self.alert_length = len(alerts)
        print(len(alerts))
        print(self.alert_length)
        self.alerts = ', '.join(alerts)
        if self.alert_length == 0:
        	self.status = 'Clean'
        	return
        date = self.getValue(response,['analysis','analysis','datetime_int'])
        print(date)
        if date:
        	try:
        		self.first_submission_date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%S')
        	except Exception as e:
        		print(f'Date Error -> {self.ioc} : {e}')
        self.status = 'Malicious'
        self.find_cve(response)
        return
        

    def find_cve(self,response):

        cve_expression=r"\bCVE-\d{4}-\d{4,7}\b"
        content_text = str(response)
        
        result = re.findall(cve_expression, content_text, re.IGNORECASE)

        self.cve = ', '.join(list(set(elements.upper() for elements in result)))
        return

    
    def getDateDifference(self, source_date, date_format=r'%Y-%m-%d %H:%M:%S'):

        if self.first_submission_date is None:
            return

        try:
            source_date = datetime.datetime.strptime(source_date, date_format)
        except Exception as e:
            print(e)
            return
        print(source_date)
        print(self.first_submission_date)
        if source_date < self.first_submission_date:
            print("Twitter")
            duration = self.first_submission_date - source_date
            print(duration)
            self.first_submission = 'Twitter'
            self.first_submission_diff = duration
            self.first_submission_diff_sec = duration.total_seconds()
            return

        elif source_date > self.first_submission_date:
            print("Alienvault")
            duration = source_date - self.first_submission_date

            self.first_submission = 'AlienVault'
            self.first_submission_diff = duration
            self.first_submission_diff_sec = duration.total_seconds()
            return
        print(self.first_submission)
        print(self.first_submission_diff)
        print(self.first_submission_diff_sec)
        print(self.pulse_count)
        print(self.score)

In [4]:
import time
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

start_time = time.time()

# Field names for the dataset
fields_name = ['tweet_date', 'account', 'ioc_type', 'ioc_value', 'type_of_attack', 'tweet_url', 'text','status', 'pulse_count','score','alert_length','alerts','cve','first_submission_date',
               'first_submission', 'first_submission_diff', 'first_submission_diff_sec']


# To read saved local copy of database
df = pd.read_csv('/content/drive/MyDrive/Twitter_Work_Code/TIP/AV/AV_Output_Non_CVE.csv',names=fields_name, encoding='utf-8',engine='python')

In [None]:
df

In [5]:
def report(index):

    if df.loc[index, 'status'] in ['Malicious', 'Clean']:
        return

    ioc = df.loc[index, 'ioc_value']
    ioc_type = df.loc[index, 'ioc_type']
    source_date = df.loc[index,'tweet_date']
    source_date=source_date[:19]
    av = AlienVault(ioc, ioc_type, source_date)

    print(index, ioc, av.status)

    df.loc[index, 'status'] = av.status
    df.loc[index, 'pulse_count'] = av.pulse_count
    df.loc[index, 'score'] = av.score
    df.loc[index, 'alert_length'] = av.alert_length
    df.loc[index, 'alerts'] = av.alerts
    df.loc[index, 'cve'] = av.cve
    df.loc[index, 'first_submission_date'] = av.first_submission_date
    df.loc[index, 'first_submission'] = av.first_submission
    df.loc[index, 'first_submission_diff'] = av.first_submission_diff
    df.loc[index, 'first_submission_diff_sec'] = av.first_submission_diff_sec
    print(df.loc[index, 'status'])
    print(df.loc[index, 'first_submission_date'])
    print(df.loc[index, 'first_submission'] )
    return
start = 300000
end = 350000

AlienVault.API_KEY = '21f702b35f39631cbf89168e5cf20ebb26c61a9c36366118cf23584c11b8ad60'

indices = list(range(30000,50000))

try:
    with ThreadPoolExecutor(max_workers=10) as executor:

        executor.map(report, indices)

        executor.shutdown(wait=True)

except Exception as e:
    print(e)

print(df)

df.to_csv('/content/drive/MyDrive/Twitter_Work_Code/TIP/AV/AV_Output_Non_CVE.csv', index=False)

end_time = time.time()

print(f'\nTime : {end_time - start_time : .2f}')


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
None
37386 https://cve.mitre.org/news/archives/2021/news.html#March222021_DeepSurface_Added_as_CVE_Numbering_Authority_CNA NotFound
NotFound
None
None
37397 http://ong33download.com NotFound
NotFound
None
None37407 37412 https://t.co/f46EiaxmJS NotFound
https://otx.alienvault.com/pulse/6058ed45d99db72212fb68d5 NotFound
NotFound
NotFound
None

None
None
None
37448 http://joingrup-wa.xyz/Curlnya.zip NotFound
NotFound
None
None
37479 http://alltokyos.org NotFound
NotFound
None
None
37442 http://www.myetherwallete.vip/#/access-my-wallet NotFound
NotFound
None
None
37499 http://dcvseutefrvcsdgd.club/ NotFound
NotFound
None
None
37509 http://02customer-help832.com/ NotFound
37493 http://23.251.62.2 NotFound
NotFound
None
None
NotFound
None
None
37458 http://hhhuwsn.club/sharepoint%20/share.zip NotFound
NotFound
None
37510 http://wallets-paxful.com/trade/paxful/l.php NotFound
NotFound
None
None
None
37515 http://monespacelcl.com

In [None]:
# To read saved local copy of database
df = pd.read_csv('/content/drive/MyDrive/TIP/AV/Final_AV_Output_20_7_2022.csv',names=fields_name, encoding='utf-8',engine='python')

In [None]:
df=df[2:]

In [None]:
df['status'].value_counts()

Clean         73392
NotFound      10382
Malicious      6055
RetryError     1809
Name: status, dtype: int64

In [None]:
df.to_csv('/content/drive/MyDrive/TIP/AV/Final_AV_Output_20_7_2022.csv', index=False)

In [None]:

def report(index):

    ioc = df.loc[index, 'ioc']

    ioc_type = df.loc[index, 'ioc_type']
    if (ioc_type=='url'):
     if(df.loc[index, 'status']=='Malicious'):
       if((df.loc[index, 'alert_length'] == 0) or (df.loc[index, 'alert_length'] is None)):
          df.loc[index, 'status'] = 'Clean'
          df.loc[index, 'pulse_count'] = None
          df.loc[index, 'score'] = None
          df.loc[index, 'alert_length'] = None
          df.loc[index, 'alerts'] = None
          df.loc[index, 'cve'] = None
          df.loc[index, 'first_submission_date'] = None
          df.loc[index, 'first_submission'] = None
          df.loc[index, 'first_submission_diff'] = None
          df.loc[index, 'first_submission_diff_sec'] = None
    print(index, ioc, df.loc[index, 'status'])
    return

start =1
end = 65000

#AlienVault.API_KEY = '4a7ae91cad86f294815870767e89739ec1428fdc516d51bca148468bdbfe05e4'

indices = list(range(start,end))

try:
    with ThreadPoolExecutor(max_workers=50) as executor:

        executor.map(report, indices)

        executor.shutdown(wait=True)

except Exception as e:
    print(e)

#print(df)

df.to_csv('/content/drive/MyDrive/av/AlienVault_Output_Final_Jan_March_2022.csv', na_rep='', index=None)

end_time = time.time()

print(f'\nTime : {end_time - start_time : .2f}')


In [None]:
df = pd.read_csv('/content/drive/MyDrive/av/AlienVault_Output_Final_Jan_March_2022.csv', names=['t_time', 'account', 'ioc_type', 'ioc', 'attack_type', 't_url', 'status', 'pulse_count', 'score',
                 'alert_length', 'alerts', 'cve', 'first_submission_date', 'first_submission', 'first_submission_diff', 'first_submission_diff_sec'])
N = 15
new_df = df.iloc[N: , :]
new_df.to_csv('/content/drive/MyDrive/av/AlienVault_Output_Final_Jan_March_2022.csv', na_rep='', index=None)


  exec(code_obj, self.user_global_ns, self.user_ns)


In [None]:
first_submission_date='2019-09-05 08:59:29'	
source_date='2021-07-29 13:35:02+00:00'
date_format=r'%Y-%m-%d %H:%M:%S'
source_date= source_date[:19]

In [None]:
source_date

datetime.datetime(2021, 7, 29, 13, 35, 2)

In [None]:
if source_date < first_submission_date:
            print("Twitter")
            duration = first_submission_date - source_date
            print(duration)
else:
  print("TIP")
  duration = first_submission_date - source_date
  print(duration)