# ELK indexing malware

### In this project we are creating indexing in ELK. We are bulk indexing records we got from VxVault, malc0de and malshare projects. We are checking if index already exists or not. If it exists, we delete it. In kibana(Localhost:5601) we go to  management and in kibana panel go to index patterns and create an index pattern using the 3 indices we made here. We can now create dashboard and visualizations as per our wish on any field/s 

##### Libraries

In [None]:
#import requests                                       #other libs when you get from a url
#import pandas as pd
#from bs4 import BeautifulSoup
#import urllib3
import csv
from elasticsearch import Elasticsearch                #for elastic search
import sys

##### Vx Vault

In [None]:
#FILE_URL = "http://apps.sloanahrens.com/qbox-blog-resources/kaggle-titanic-data/test.csv"
#http = urllib3.PoolManager()
#response = http.request('GET', FILE_URL)

ES_HOST = {"host" : "localhost", "port" : 9200}
INDEX_NAME = 'malware-vxvault'                          #so that we can create index pattern as malware*
TYPE_NAME = 'VxVault-Malwares'
ID_FIELD = 'md5'                                        #unique field that can be used as primary

f=open('D:\EDUCATIONAL\CERT-In\VxVault Project\VXVault.csv','r',encoding="utf-8")    #csv file that we pass for indexing
fileob = csv.reader(f)
header = next(fileob)                                                                #To get the header of fields

bulk_data=[]
header = [item.lower() for item in header]                                           #to convert header to lower case
print("Header=",header)
for row in fileob:
    data_dict = {}    
    for i in range(len(row)):
        data_dict[header[i]] = row[i]
#     print("Outside:",data_dict)
    op_dict = {
        "index": {
            "_index": INDEX_NAME, 
            "_type": TYPE_NAME, 
            "_id": data_dict[ID_FIELD]
        }
    }
    bulk_data.append(op_dict)                   #row by row bulk append 
    bulk_data.append(data_dict)
    
# create ES client, create index
try:
    es = Elasticsearch(hosts = [ES_HOST])

    if es.indices.exists(INDEX_NAME):
        print("\nIndex ",INDEX_NAME," already exists deleting index...")
        res = es.indices.delete(index = INDEX_NAME)
        print("Response: ",res)

    # since we are running locally, use one shard and no replicas
    request_body = {
        "settings" : {
            "number_of_shards": 1,
            "number_of_replicas": 0
        }
    }
    print("\nCreating ",INDEX_NAME," index...")
    res = es.indices.create(index = INDEX_NAME, body = request_body)
    print("Response: ",res)

    #code for refreshing
    print("\nBulk indexing...")
    res = es.bulk(index = INDEX_NAME, body = bulk_data, refresh = True)
    # print("Response: ",res)

    #match query
    print("\n***Putting query->match_all***")
    res = es.search(index = INDEX_NAME, size=2, body={"query": {"match_all": {}}})
    print("Response: ",res)

    #for output
    print("\nResults:")
    for hit in res['hits']['hits']:
        print(hit["_source"])

    print("\nOperations completed successfully")
except:
    print("\n\nERROR:  Can't connect to Localhost:9200.\n\tMake sure you have established connection to ElasticSearch and Kibana")
    sys.exit()

##### Malshare

In [None]:
ES_HOST = {"host" : "localhost", "port" : 9200}
INDEX_NAME = 'malware-malshare'
TYPE_NAME = 'Malshare-Malwares'
ID_FIELD = 'md5'

f=open('D:\EDUCATIONAL\CERT-In\Malshare Project\Malshare.csv','r',encoding="utf-8")
fileob = csv.reader(f)
header = next(fileob)

bulk_data=[]
header = [item.lower() for item in header]
print("Header=",header)
for row in fileob:
    data_dict = {} 
    if(len(row) != 0):
        for i in range(len(row)):
            data_dict[header[i]] = row[i]
        op_dict = {
            "index": {
                "_index": INDEX_NAME, 
                "_type": TYPE_NAME, 
                "_id": data_dict[ID_FIELD]
            }
        }
        bulk_data.append(op_dict)
        bulk_data.append(data_dict)
    else:
        pass
    
# create ES client, create index
try:
    es = Elasticsearch(hosts = [ES_HOST])

    if es.indices.exists(INDEX_NAME):
        print("\nIndex ",INDEX_NAME," already exists deleting index...")
        res = es.indices.delete(index = INDEX_NAME)
        print("Response: ",res)

    # since we are running locally, use one shard and no replicas
    request_body = {
        "settings" : {
            "number_of_shards": 1,
            "number_of_replicas": 0
        }
    }
    print("\nCreating ",INDEX_NAME," index...")
    res = es.indices.create(index = INDEX_NAME, body = request_body)
    print("Response: ",res)

    #code for refreshing
    print("\nBulk indexing...")
    res = es.bulk(index = INDEX_NAME, body = bulk_data, refresh = True)
    # print("Response: ",res)

    #match query
    print("\n***Putting query->match_all***")
    res = es.search(index = INDEX_NAME, size=2, body={"query": {"match_all": {}}})
    print("Response: ",res)

    #for output
    print("\nResults:")
    for hit in res['hits']['hits']:
        print(hit["_source"])

    print("\nOperations completed successfully")
except:
    print("\n\nERROR:  Can't connect to Localhost:9200.\n\tMake sure you have established connection to ElasticSearch and Kibana")
    sys.exit()

##### Malcode

In [None]:
ES_HOST = {"host" : "localhost", "port" : 9200}
INDEX_NAME = 'malware-malc0de'
TYPE_NAME = 'Malc0de-Malwares'
ID_FIELD = 'md5'

f=open('D:\EDUCATIONAL\CERT-In\Malc0de Project\Malc0de.csv','r',encoding="utf-8")
fileob = csv.reader(f)
header = next(fileob)

bulk_data=[]
header = [item.lower() for item in header]
print("Header=",header)
for row in fileob:
    data_dict = {} 
    if(len(row) != 0):
        for i in range(len(row)):
            data_dict[header[i]] = row[i]
        op_dict = {
            "index": {
                "_index": INDEX_NAME, 
                "_type": TYPE_NAME, 
                "_id": data_dict[ID_FIELD]
            }
        }
        bulk_data.append(op_dict)
        bulk_data.append(data_dict)
    else:
        pass
    
# create ES client, create index
try:
    es = Elasticsearch(hosts = [ES_HOST])

    if es.indices.exists(INDEX_NAME):
        print("\nIndex ",INDEX_NAME," already exists deleting index...")
        res = es.indices.delete(index = INDEX_NAME)
        print("Response: ",res)

    # since we are running locally, use one shard and no replicas
    request_body = {
        "settings" : {
            "number_of_shards": 1,
            "number_of_replicas": 0
        }
    }
    print("\nCreating ",INDEX_NAME," index...")
    res = es.indices.create(index = INDEX_NAME, body = request_body)
    print("Response: ",res)

    #code for refreshing
    print("\nBulk indexing...")
    res = es.bulk(index = INDEX_NAME, body = bulk_data, refresh = True)
    # print("Response: ",res)

    #match query
    print("\n***Putting query->match_all***")
    res = es.search(index = INDEX_NAME, size=2, body={"query": {"match_all": {}}})
    print("Response: ",res)

    #for output
    print("\nResults:")
    for hit in res['hits']['hits']:
        print(hit["_source"])

    print("\nOperations completed successfully")
except:
    print("\n\nERROR:  Can't connect to Localhost:9200.\n\tMake sure you have established connection to ElasticSearch and Kibana")
    sys.exit()