# Malware Analysis & Triage Kit
This notebook performs the initial stages of immediate malware triage.

## How To
Take your malware specimen and drop it into the `dropbox` directory. The notebook will walk you through the stages of initial analysis.

At the end of this process, you will have a triage report in the `saved-specimens` diretory. This report includes findings from initial triage, including the defanged specimen in a password-proteced Zip file and static analysis artifacts.

# Imports and Setup

In [None]:
# Imports
from hashlib import *
import sys
import os
from getpass import getpass
from virus_total_apis import PublicApi as VirusTotalPublicApi
import json
from MalwareSample import *
from pprint import pprint
import os.path
from time import sleep
import subprocess
import json
#import yara
!pip3 install pandas
from pandas import pandas as pandas
from pandas import DataFrame as df
import numpy
import requests
import pprint
import io
import math
import csv
from datetime import date, timedelta
from functools import reduce
import time
import urllib
import bz2
import datetime
import lxml
import re
import asyncio

!pip3 install itables
import itables.options as opt
pandas.set_option('display.max_rows', 100000)
pandas.set_option('display.max_columns', 500)
pandas.get_option('display.max_columns')
pandas.set_option('display.width', 10000)

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
pandas.set_option('display.max_colwidth', None)
opt.maxBytes = 90000**6
!pip install yara-python
#import os
#import requests
from bs4 import BeautifulSoup
#import time
#import re
#import io
from zipfile import ZipFile
print("Importing Complete")


## Download Samples from Malware Traffic Analysis

This code is used to download all samples for the given years (in script). The files are downloaded into folders by month.  Then the files are moved in to the dropbox folder and the other directories removed.

In [None]:


EXTENSION_LIST = [
    'doc',
    'xls',
    'ppt',
    'cab',
    'exe',
    'pe',
    'apk',
    'macos',
    'elf',
    'dylib', # Mach-O/Unibin
    'bundle', # Mach-O/Unibin
    'dmg',
    'jar',
    'pdf',
    'eicar',
    'swf'
]


class MaliciousSamples:

    DATE_RE = re.compile('^\d{4}-\d{2}-\d{2}$')
    MALWARE_TRAFFIC_ANALYSIS_URL = 'https://malware-traffic-analysis.net'

    def malware_traffic_analysis(self, 
                                 years=['2022'], 
                                 extensions=EXTENSION_LIST,
                                 save_path='dropbox/'
        ):
        self.extensions = extensions
        date_links = []
        for year in years:
            year_url = '{}/{}/index.html'.format(self.MALWARE_TRAFFIC_ANALYSIS_URL, year)
            year_response = requests.get(year_url)
            if year_response.status_code == 200:
                year_text = year_response.text
                soup = BeautifulSoup(year_text, 'lxml')
                year_links = soup.find_all('a')
                for year_link in year_links:
                    text = year_link.get_text()
                    href = year_link.get('href')
                    if self.DATE_RE.match(text):
                        link = '{}/{}/{}'.format(self.MALWARE_TRAFFIC_ANALYSIS_URL, year, href)
                        date_links.append(link)
        malware_zip_links = []
        for date_url in date_links:
            time.sleep(1)
            date_folder = '/'.join(date_url.split('/')[0:-1])
            folder_path =date_folder.split(self.MALWARE_TRAFFIC_ANALYSIS_URL)[-1].strip()
            folder_path = save_path + folder_path
            if not os.path.exists(folder_path):
                os.makedirs(folder_path)
            date_response = requests.get(date_url)
            if date_response.status_code == 200:
                date_text = date_response.text
                soup = BeautifulSoup(date_text, 'lxml')
                links = soup.find_all('a')
                for link in links:
                    text = link.get_text().upper()
                    if 'MALWARE' in text and '.ZIP' in text:
                        href = link.get('href')
                        malware_link = '{}/{}'.format(date_folder, href)
                        malware_request = requests.get(malware_link)
                        if malware_request.status_code == 200:
                            contents = malware_request.content
                            filebytes = io.BytesIO(contents)
                            zf = ZipFile(filebytes)
                            if 'all' in self.extensions:
                                zf.extractall(path=folder_path, pwd=b'infected')
                            else:
                                for name in zf.namelist():
                                    for extension in self.extensions:
                                        if name.endswith(extension):
                                            zf.extract(name, folder_path, pwd=b'infected')
                            malware_zip_links.append(folder_path)
        return malware_zip_links

    def test_files(self):
        '''
            If you have SSL decryption enabled on the firewall, use one of the following URLs:
                PE—https://wildfire.paloaltonetworks.com/publicapi/test/pe
                APK—https://wildfire.paloaltonetworks.com/publicapi/test/apk
                MacOSX—https://wildfire.paloaltonetworks.com/publicapi/test/macos
                ELF—https://wildfire.paloaltonetworks.com/publicapi/test/elf
            If you do not have SSL decryption enabled on the firewall, use one of the following URLs instead:
                PE—http://wildfire.paloaltonetworks.com/publicapi/test/pe
                APK—http://wildfire.paloaltonetworks.com/publicapi/test/apk
                MacOSX—http://wildfire.paloaltonetworks.com/publicapi/test/macos
                ELF—http://wildfire.paloaltonetworks.com/publicapi/test/elf
        '''



print(MaliciousSamples().malware_traffic_analysis())
!cp -a --link ../dropbox/2022/* /dropbbox/.  &&  rm -rf ../dropbox/2022/
#!rm -rf dropbox/2022
print('Samples have been downloaded.')

### Check Dropbox and Saved-Specimens

In [None]:
MalwareSample.check_dir("dropbox")
MalwareSample.check_dir("saved-specimens")
empty = MalwareSample.is_dir_empty("dropbox")
if empty:
    print(r"  \\--> " + recc + "Put some samples in the dropbox!")

### Enumerate Samples in the Dropbox

In [None]:
samples=!ls dropbox/*
for s in samples:
    print(info + "Sample: " + s)

In [None]:
sample_obj = [MalwareSample(s) for s in samples]

### Create a Saved Specimen directory for the specimen(s)

In [None]:
for obj in sample_obj:
    saved_sample_name = MalwareSample.create_specimen_dirs(obj.sample_name)
    obj.saved_sample_name = saved_sample_name

### Defang Sample

In [None]:
for obj in sample_obj:
    sample_path = MalwareSample.move_and_defang(obj.sample_name, obj.saved_sample_name)
    obj.sample_path = sample_path

---

## File Hashes

### SHA256 Sum

In [None]:
for obj in sample_obj:
    hash = MalwareSample.get_sha256sum(obj.sample_path, obj.saved_sample_name)
    obj.sha256sum = hash
    print(info + obj.sample_name + ": " + obj.sha256sum)

---

## String Analysis

### StringSifter
StringSifter is a FLARE developed tool that uses an ML model to rank a binary's strings by relevance to malware analysis.

In [None]:
length = int(input(recc + "Input your desired minimum string length [default is 4, 6-8 is recommended] > "))

In [None]:
for obj in sample_obj:
    MalwareSample.pull_strings(length, obj.saved_sample_name, obj.sample_path)

## VT Analysis
Submit samples to Virus Total and generate a malicious confidence level.

In [None]:
VT_API_KEY = getpass("Enter VirusTotal API Key (blank if none): ")

In [None]:
if VT_API_KEY:
    vt = VirusTotalPublicApi(VT_API_KEY)
else:
    print(info + "No VT API Key. Skipping...")

Note: If there are more than 4 samples in the dropbox, hashes are submitted with a sleep of 16 seconds to remain under the public API rate limit. So hit go, grab a beverage of choice, stretch out and relax. This could be a while depending on how many samples you're submitting.

In [None]:
if VT_API_KEY:
    for obj in sample_obj:
        print(info + obj.sample_name + ":")
        print(r"  \\--> " + info + "SHA256sum: " + obj.sha256sum)
        res = vt.get_file_report(obj.sha256sum)
        conf = malicious_confidence(res)
        print(r"  \\--> " + info + "Confidence level: " + str(conf))
        crit_level =  determine_criticality(conf)
        obj.criticality = crit_level
        

        if len(sample_obj) >= 5:
            sleep(16)
        
else:
    print(info + "No VT API Key. Skipping...")

## Binary Analysis

### Run PeFrame

Run PEFrame tool and output to json file for json parsing

In [None]:
from pandas.io.json import json_normalize
allsamples_df = pandas.DataFrame()
for s in sample_obj:
    with open("saved-specimens/" + s.saved_sample_name + "/peframe.txt", "w") as filep:
        print("analyzing " + s.sample_name + " with peframe")
        proc = subprocess.run("peframe -j dropbox/" + s.sample_name, stdout=filep,text=True,shell=True)
        with open("saved-specimens/" + s.saved_sample_name + "/peframe.txt",'r') as f:
            data = json.loads(f.read())
        flattened_json_hashes = pandas.json_normalize(data['hashes'])
        flattened_json_strings = pandas.json_normalize(data['strings'])
        sha256 = pandas.json_normalize(data['hashes'])
        url = pandas.json_normalize(data['strings'])
        ip = pandas.json_normalize(data['strings'])
        sha256 = sha256['sha256'][0]
        url = url['url'][0]
        ip = ip['ip'][0]
        row = [sha256,url,ip]
        row = pandas.DataFrame([row], columns=['sha256', 'url', 'ip'])
        allsamples_df = allsamples_df.append(row)
        #sample_df = pandas.DataFrame(flattened_json_hashes['sha256'][0], flattened_json_strings['url'][0], flattened_json_strings['ip'][0])
        #sample_df = pandas.DataFrame(sample_df, columns = ['sha256', 'url', 'ip']) 
print("peframe analysis completed")
allsamples_df

In [None]:
#Create urls.txt file for each sample
for s in sample_obj:
    os.system("cp saved-specimens/" + s.saved_sample_name + "/peframe.txt saved-specimens/" + s.saved_sample_name + "/peframe.json")
    with open("saved-specimens/" + s.saved_sample_name + "/peframe.json",'r') as peframe_results:
            peframedata = json.loads(peframe_results.read())
            normalize_urls = pandas.json_normalize(peframedata['strings'])
            get_urls = (normalize_urls['url'][0])
            print(*get_urls, sep = '')
            with open("saved-specimens/" + s.saved_sample_name + '/urls.txt', 'w') as f:
                print(*get_urls, sep = '', file=f) 
print('Url extraction is complete.')

## Submit Urls to VT (not working yet)

In [None]:
VT_API_KEY = getpass("Enter VirusTotal API Key (blank if none): ")

In [None]:
if VT_API_KEY:
    vt = VirusTotalPublicApi(VT_API_KEY)
else:
    print(info + "No VT API Key. Skipping...")

In [None]:
url_to_scan_VT = str(input(recc + "Input your the url you would like to scan with VirusTotal > "))

In [None]:
#!pip install vt-py
import vt

#get information about a url
client = vt.Client(VT_API_KEY)
#url_id = vt.url_id("http://www.virustotal.com")
#url = client.get_object("/urls/{}", url_id)

#Scan a url
analysis = client.scan_url(url_to_scan_VT)

## Scan URL with URLSCAN PyPi tool API for urlscan.io

In [None]:
URLSCAN_API_KEY = getpass("Enter URLSCAN API Key (blank if none): ")

In [None]:
url_to_scan = str(input(recc + "Input your the url you would like to scan with URLSCAN.IO > "))

#### The following command uses the url input in the variable 'url_to_scan' above and uses the urlscan 'scan' mode to get results from urlscan.io for that url. By default, scans will be saved to the directory '.urlsaved_scans'. Change this by using the --dir flag and specifying a different directory in the command below.  Scan multiple domains stored in file 'example-domains.txt'  `urlscan scan --file 'example-domains.txt'`

In [None]:
os.system("urlscan scan --url " + url_to_scan + ' > scan.json')
with open('scan.json', 'r') as urlscan_results:
    urlscan_data = json.load(urlscan_results)
    uuid = (urlscan_data['uuid'])
    print("The uuid for the completed scan is " + uuid)
    #sleep timer to avoid 404 results from the api web request
    time.sleep(10)
    #Retrieves the summary of the scan results.  Multiple uuid's can be entered. Remove '--summary' to get the full scan results. 
    os.system("urlscan retrieve --uuid " + uuid + " --summary")

In [None]:
#test of using urls.txt files to submit
for s in sample_obj:
    os.system("urlscan scan --file " + "saved-specimens/" + s.saved_sample_name + '/urls.txt' + ' > scan.json')
    with open('scan.json', 'r') as urlscan_results_all:
        urlscan_data_all = json.load(urlscan_results_all)
        uuids = (urlscan_data_all['uuid'])
        print("The uuid for the completed scan is " + uuids)
    #sleep timer to avoid 404 results from the api web request
        time.sleep(10)
    #Retrieves the summary of the scan results.  Multiple uuid's can be entered. Remove '--summary' to get the full scan results. 
        os.system("urlscan retrieve --uuid " + uuid + " --summary")

## YARA Scanning

In [None]:
import yara
rules = yara.compile(filepaths={
    'rule1':'yara_rules/process_manipulating.yar',
    'rule2':'yara_rules/network_evasion.yar',
    'rule3':'yara_rules/gen_cmd_script_obfuscated.yar',
    'rule4':'yara_rules/gen_cobaltstrike_by_avast.yar',
    'rule5':'yara_rules/gen_javascript_powershell.yar',
    'rule6':'yara_rules/gen_mal_scripts.yar',
    'rule7':'yara_rules/gen_rats_malwareconfig.yar',
    'rule8':'yara_rules/gen_susp_js_obfuscatorio.yar',
})
##This section uses yara-python instead of command line yara.  Does not print strings of the matches so it is not being used. In theory it would do the same as below.
#for s in sample_obj:
    #matches = rules.match('dropbox/' + s.sample_name)
    #print('YARA matches for the sample \'' + s.sample_name + '\' are: '), print(matches), print('')
    
for s in sample_obj:
    strings = os.system('yara -ms yara_rules/* ' + 'dropbox/' + s.sample_name)
    print('YARA matches for the sample \'' + s.sample_name + '\' are: '), print(strings), print('')

## Zip and Password Protect

In [None]:
for obj in sample_obj:
    zip_file = MalwareSample.zip_and_password_protect(obj.sample_path, obj.saved_sample_name)
    MalwareSample.delete_unzipped_sample(obj.sample_path, zip_file)

---

### Debug Object Vars

In [None]:
for obj in sample_obj:
    pprint(vars(obj))