In [2]:
import boto3
import botocore
import os 
import multiprocessing
import numpy as np
import pandas as pd
import json
import elasticsearch as elastic
import datetime

import requests
from requests_aws4auth import AWS4Auth

# Step 0: set up ES API connection

This is mainly for AWS, you would need to set up an alternative connection to the ES server if AWS is not used. If AWS, make sure that the AWS package has been properly set up with keys.

In [3]:
cred = boto3.Session().get_credentials()
access = cred.access_key
secret = cred.secret_key
region = 'eu-west-1'
service = 'es'
token = cred.token
awsauth = AWS4Auth(access, secret, region, service,session_token = token)

In [4]:
es = elastic.Elasticsearch(
    hosts = [{'host': 'search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', 'port': 443}],
    http_auth = awsauth,
    use_ssl = True,
    verify_certs = True,
    connection_class = elastic.RequestsHttpConnection,
    request_timeout = 6000
)

# Step 0.5: Create Functions

In [5]:
# list of columns to exclude

ex = ['data.lastSeen','data.notificationType','data.ipAddress','data.floorId','data.floorRefId','data.moveDistanceInFt','data.apMacAddress','data.subscriptionName','data.confidenceFactor','data.band', 'data.entity','data.ssid','data.username','data.locationCoordinate.unit','data.locationCoordinate','data.geoCoordinate.unit','data.eventId']

In [56]:
# make dict to rename columns later 

df = pd.io.json.json_normalize(pd.DataFrame(es.search(index='cmx*',size=1,_source_exclude=ex)['hits']['hits'])['_source'].tolist())

lis = df.keys().tolist()
liss = [s[5:] for s in lis]
dic = dict(zip(lis, liss))

#convert timestamp to epoch time
def to_epoch(stamp):    
    epoch = datetime.datetime.utcfromtimestamp(0)
    timestamp = (pd.to_datetime(stamp) - epoch).total_seconds() * 1000
    return round(timestamp)

# function which downloads all data for a device and then turns it into a dataframe
def download_for_id(key):
    r = es.search(index='cmx*',body={
        "query": {
            "bool": {
                "must": {'match_phrase': {'data.deviceId': key}}
                            }}},
              scroll='1m', size = 10000, _source_exclude=ex,request_timeout = 6000)
    
    
    data =  r['hits']['hits']
    
    while len(data) < r['hits']['total']:
        r = es.scroll(scroll_id = r['_scroll_id'], scroll='1m',request_timeout = 6000)
        data += r['hits']['hits']

    return pd.io.json.json_normalize((pd.DataFrame(data)['_source'].tolist()))

# Function which splits lists of date time to smaller lists
# if time in between values is larger than 1 day
# if it is more than one day I am assuming that they have left
# returns a list of date segments

def date_segments(dates):
    output = []
    cur_list = [dates[0]]
    for dt_pair in zip(dates[1:], dates):
        if (dt_pair[0] - dt_pair[1]).days > 1:
            output.append(cur_list)
            cur_list = [dt_pair[0]]
        else:
            cur_list.append(dt_pair[0])
    output.append(cur_list)
    return output

# Works out seperate visits, and also determining starting/ending conditions
def seperate_visits(df):
    
    df.sort_values('timestamp',inplace=True)
    times = df.timestamp.tolist()
    
    all_visits = date_segments(pd.to_datetime(pd.Series(times), unit='ms'))
    
    num_visits = 0
    
    time_start_col = []
    time_end_col = []
    zone_start_col = []
    zone_end_col =[]
    type_col = []
    
    for visits in all_visits:
        
        length_visit = len(visits)
        
    #there used to be a block here deleting visits longer than 4 hours, however, there is no reason to not use Kibana filter instead :P
        
        
        num_visits += 1
        
        
        time_start = visits[0]
        time_end = visits[-1]
        zone_start = df[df.timestamp == to_epoch(time_start)].locationMapHierarchy
        zone_start = zone_start.tolist()[0]
        zone_end = df[df.timestamp == to_epoch(time_end)].locationMapHierarchy
        zone_end = zone_end.tolist()[0]
        start = zone_start
        end = zone_end
        
        # This is the catergorizing section
        # Basiclly identifies pax type depending on staring and ending locations
        # also has to be a complete path, with reasonable exits and entries
        # transit pax cant move to landside
        
        if ("Pier" in start):
            if ("Pier" in end):
                
                weird = 0
                
                for time in visits:
                    loc = df[df.timestamp == to_epoch(time)].locationMapHierarchy.tolist()[0]
                    if ("Land" in loc) | ("CSC" in loc):
                        weird = 1
                
                if weird == 1:
                    typ = 'broken'
                else:
                    typ = 'transit'
            
            
            
            elif ("Pier" not in end):
                typ = 'arrival'
        else:
            if ("Pier" in end):
                typ = 'departure'
            elif ("Pier" not in end):
                
                weird = 0
                
                for time in visits:
                    loc = df[df.timestamp == to_epoch(time)].locationMapHierarchy.tolist()[0]
                    if ("Land" not in loc) & ("CSC" not in loc):
                        weird = 1
                            
                if weird == 1:        
                    typ = 'greeter'
                else:
                    typ = 'broken'
        
        if not (("Pier" in start) | ("Land" in start) | ("CSC" in start)) & (("Pier" in end) | ("Land" in end) | ("CSC" in end)):
            typ = "broken"

        
        time_start_col += [time_start]*length_visit
        time_end_col += [time_end]*length_visit
        zone_start_col += [zone_start]*length_visit
        zone_end_col += [zone_end]*length_visit
        type_col += [typ]*length_visit
    
    df['start'] = zone_start_col
    df['fin'] = zone_end_col
    df['arrive'] = time_start_col
    df['left'] = time_end_col
    df['type'] = type_col
    
    
    return df

# find dwell times for each session
def find_dwell(df):
    
    
    df['comb'] = df.deviceId.str.cat(df.locationMapHierarchy)
    df['match'] = df.comb.eq(df.comb.shift())
    
    sesh = []
    diff = []
    dwell = []
    
    # detecting sessions
    count = 0
    for i in range(len(df)):
    
        if df.match.iloc[i] == True:
            sesh.append(count)
        else:
            count += 1
            sesh.append(count)
            count += 1
            
    df['sesh'] = sesh  
    
    #deteching time diff
    diff += ((df.timestamp - df.timestamp.shift()).tolist())
    df['diff'] = diff
    
    #detecting dwell
    
    for ses in df.sesh.unique():
        session = df[df.sesh == ses]
        
        dwell_msec = session.timestamp.max() - session.timestamp.min()
    
        if len(session) != 1:
            dwell += len(session)*[dwell_msec]
        else:
            dwell += [session['diff'].iloc[0]]
    
    df['dwell'] = dwell
    
    return df

print('functions saved')

functions saved


# Step 1: get list of all IDs that exists in ES, Filter out already processed IDs

In an actual deployment, we might also have to consider returning devices, but this should be enough for now, as most devices are non returning

In [7]:
r = es.search(index='cmx*',size=10000,scroll='1m',_source_include=['data.deviceId','data.associated'])
sid = r['_scroll_id']
data = [d['_source']['data'] for d in r['hits']['hits']]

filenum = 0

while len(data) < r['hits']['total']:
    
    r = es.scroll(scroll_id = sid, scroll='1m')
    sid = r['_scroll_id']
    data += [d['_source']['data'] for d in r['hits']['hits']]
    
    print(str(len(data)) + ' out of ' + str(r['hits']['total']), end="\r")

16730236 out of 16730236

In [8]:
iddf = pd.io.json.json_normalize(data)
iddf.head()

Unnamed: 0,associated,deviceId
0,True,00:00:ff:c8:11:cb
1,True,00:00:d0:63:34:99
2,True,00:00:c4:90:1d:38
3,False,00:00:cc:d9:01:1c
4,False,00:00:1e:4e:c0:d6


In [9]:
# print('We have this many rows: ' + str(len(iddf)))
# print('This many devices: ' + str(len(iddf['data.deviceId'].unique())))
# print('This many associated: ' + str(len(iddf[iddf['data.associated']]['data.deviceId'].unique())))
# display(iddf.head())

ids = iddf[iddf['associated']]['deviceId'].unique().copy()
new = ids

Find existing Id to filter out, only run this if there is already data

In [None]:
r = es.search(index='geo*',size=10000,scroll='1m',_source_include=['data.deviceId','data.associated'])
sid = r['_scroll_id']
data = r['hits']['hits']

while len(data) < r['hits']['total']:
    
    r = es.scroll(scroll_id = sid, scroll='1m')
    sid = r['_scroll_id']
    data += r['hits']['hits']
    
    print(str(len(data)) + ' out of ' + str(r['hits']['total']), end="\r")

In [None]:
existing = pd.io.json.json_normalize((pd.DataFrame(data)['_source'].tolist()))[exist_iddf['data.deviceId']].unique()

This new var is literally for new data, so change the varibles at the later download stage, if you want to use the filtered list instead

In [None]:
new = set(exisiting.tolist()) ^ set(ids.tolist())

# Step 2: create Index for geoloc, and initialize its fields

This is to ensure that ES understands the different formats of the data we are adding to it. It is also creating the index

In [57]:
url = ('https://search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com/geoloc/')

r = requests.put(url, auth=awsauth,json ={})
url = ('https://search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com/geoloc/_mapping/_doc/')

r = requests.post(url, auth=awsauth,
                  json = {"_doc": {
                      'properties': {
                          'location': {
                              'type': 'geo_point'
                          }, 'timestamp': {
                              'type' : 'date'
                          }, 'left': {
                              'type' : 'date'
                          }, 'arrive': {
                              'type' : 'date'
                          }
                      }
                  }
                         }
                 )
r.json()

{'acknowledged': True}

# Step 3: Download - Transform - Upload

In [58]:
# process raw download from ES
def process_id(id):

    df = download_for_id(id)
    df['data.locationMapHierarchy'] = df['data.locationMapHierarchy'].str.rsplit(pat= '>', expand=True, n=1)[1]
    df.rename(dic, axis='columns',inplace=True)
    
    seperate_visits(df)

    df['location'] = df['geoCoordinate.latitude'].map(str) + ',' + df['geoCoordinate.longitude'].map(str)

    df.arrive = df.arrive.map(to_epoch)
    df.left = df.left.map(to_epoch)

    df = find_dwell(df)

    df['kibana_id'] = df.deviceId.str.cat(df.timestamp.map(str), '-')
    del df['comb'], df['match'], df['sesh'], df['diff'], df['geoCoordinate.latitude'], df['geoCoordinate.longitude']
    
    return df

def upload_df(df):

    if len(df) < 10:
        return
    
    up_list = df.to_dict(orient='records')

    bulk_file = ''
    
    count = 0
    for rec in up_list:
        count += 1
        bulk_file += '{ "index" : { "_index" : "geoloc", "_type" : "_doc", "_id" : "' + rec['kibana_id'] + '" } }\n'
        bulk_file += json.dumps(rec) + '\n'

        if (count%10000) == 0:

            es.bulk(bulk_file)
            bulk_file = ''

        elif count == len(up_list):
            es.bulk(bulk_file)

            
def down_up_id(lis):
    tot = str(len(lis))
    count = 0
    for id in lis:
        if (count%100) == 0:
            print(str(count) + ' out of ' + tot)
        
        count += 1
        upload_df(process_id(id))
    
    print("DONE!")
    
# get list of list of inputs
def chunks(lis, num):
        for i in range(0, len(lis), num):
            yield lis[i:i + num]

### Applies m

In [59]:
import multiprocessing as mp

In [60]:
# set key list and how value of each thread in multi threading
inpu = new
split_to = 2000

processes = [mp.Process(target=down_up_id, args=([keys])) for keys in list(chunks(list(inpu),split_to))]

for p in processes:
    p.start()
    
for p in processes:
    p.join()

0 out of 2000
0 out of 2000
0 out of 2000


Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/elasticsearch/connection/http_requests.py", line 76, in perform_request
    response = self.session.send(prepared_request, **send_kwargs)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/requests/sessions.py", line 618, in send
    r = adapter.send(request, **kwargs)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/requests/adapters.py", line 440, in send
    timeout=timeout
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 601, in urlopen
    chunked=chunked)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/ana

OpenSSL.SSL.Error: [('SSL routines', 'SSL3_GET_RECORD', 'decryption failed or bad record mac')]


0 out of 2000






0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 2000
0 out of 859


Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 389, in _make_request
    self._raise_timeout(err=e, url=url, timeout_value=read_timeout)

  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 309, in _raise_timeout
    raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
Process Process-644:
urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/elasticsearch/conn

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-635:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-650:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-654:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-648:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-653:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/elasticsearch/client/__init__.py", line 1150, in bulk
    headers={'content-type': 'application/x-ndjson'})
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/elasticsearch/transport.py", line 318, in perform_request
    status, headers_response, data = connection.perform_request(method, url, params, body, headers=headers, ignore=ignore, timeout=timeout)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/elasticsearch/connection/http_requests.py", line 84, in perform_request
    raise ConnectionTimeout('TIMEOUT', str(e), e)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 294, in recv_

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-657:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-658:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

100 out of 859
100 out of 2000
100 out of 2000
100 out of 2000
200 out of 859
200 out of 2000
200 out of 2000
100 out of 2000
100 out of 2000
100 out of 2000
100 out of 2000
300 out of 859


Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/requests/adapters.py", line 440, in send
    timeout=timeout
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 639, in urlopen
    _stacktrace=sys.exc_info()[2])
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/util/retry.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda/envs/Python3/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/elasticsearch/connection/http_requests.py", line 84, in perform_request
    raise ConnectionTimeout('TIMEOUT', str(e), e)

elasticsearch.exceptions.ConnectionTimeout: ConnectionTimeout caused by - ReadTimeout(HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10))
Process Process-684:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()


requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-678:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-686:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_fro


During handling of the above exception, another exception occurred:

  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 309, in _raise_timeout
    raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Traceback (most recent call last):

During handling of the above exception, another exception occurred:

  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/elasticsearch/connection/http_requests.py", line 76, in perform_request
    response = self.session.send(prepared_request, **send_kwargs)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/requests/sessions.py", line 618, in send
    r = adapter.send(request, **kwargs)
  File "/opt/anaconda/envs/Python3/lib/python3.6/sit

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-687:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-634:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-689:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

  File "<ipython-input-58-1300c6df1844>", line 43, in upload_df
    es.bulk(bulk_file)
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py", line 294, in recv_into
    raise timeout('The read operation timed out')
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/elasticsearch/client/utils.py", line 76, in _wrapped
    return func(*args, params=params, **kwargs)
socket.timeout: The read operation timed out
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/elasticsearch/client/__init__.py", line 1150, in bulk
    headers={'content-type': 'application/x-ndjson'})

During handling of the above exception, another exception occurred:

  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/elasticsearch/transport.py", line 318, in perform_request
    status, headers_response, data = connection.perform_request(method, url, params, body, headers=headers, ignore=ignore, timeout=timeout)
Traceback (most recent call last):


requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-662:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-660:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='search-uclciscocmx-lmmraiyhzdhublvkgvjynznrby.eu-west-1.es.amazonaws.com', port=443): Read timed out. (read timeout=10)
Process Process-639:
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/opt/anaconda/envs/Python3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda/envs/Python3/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anac

300 out of 2000
400 out of 859
500 out of 859
400 out of 2000
200 out of 2000
100 out of 2000
100 out of 2000
600 out of 859
100 out of 2000
500 out of 2000
100 out of 2000
100 out of 2000
100 out of 2000
700 out of 859
600 out of 2000
100 out of 2000
100 out of 2000
800 out of 859
100 out of 2000
100 out of 2000
300 out of 2000
100 out of 2000
100 out of 2000
100 out of 2000
100 out of 2000
DONE!
700 out of 2000
100 out of 2000
400 out of 2000
800 out of 2000
900 out of 2000
200 out of 2000
500 out of 2000
200 out of 2000
1000 out of 2000
200 out of 2000
200 out of 2000
200 out of 2000
200 out of 2000
1100 out of 2000
200 out of 2000
600 out of 2000
200 out of 2000
200 out of 2000
200 out of 2000
200 out of 2000
200 out of 2000
200 out of 2000
1200 out of 2000
200 out of 2000
1300 out of 2000
700 out of 2000
1400 out of 2000
300 out of 2000
300 out of 2000
300 out of 2000
1500 out of 2000
300 out of 2000
800 out of 2000
300 out of 2000
1600 out of 2000
300 out of 2000
300 out of 2000




900 out of 2000
800 out of 2000
800 out of 2000
800 out of 2000
800 out of 2000
900 out of 2000
800 out of 2000
800 out of 2000
800 out of 2000
800 out of 2000
900 out of 2000
900 out of 2000
900 out of 2000
900 out of 2000
900 out of 2000
900 out of 2000
900 out of 2000
1000 out of 2000
900 out of 2000
600 out of 2000
1000 out of 2000
1000 out of 2000
900 out of 2000
900 out of 2000
1000 out of 2000
1000 out of 2000
1000 out of 2000
900 out of 2000
900 out of 2000
1000 out of 2000
1000 out of 2000
1000 out of 2000
1100 out of 2000
1000 out of 2000
1100 out of 2000
1100 out of 2000
1000 out of 2000
700 out of 2000
1100 out of 2000
1000 out of 2000
1100 out of 2000
1100 out of 2000
1100 out of 2000
1000 out of 2000
1000 out of 2000
1100 out of 2000
1100 out of 2000
1200 out of 2000
1100 out of 2000
1200 out of 2000
1200 out of 2000
1200 out of 2000
1100 out of 2000
1200 out of 2000
1100 out of 2000
1200 out of 2000
1200 out of 2000
1100 out of 2000
1200 out of 2000
1300 out of 2000
1100



1600 out of 2000
1600 out of 2000
1700 out of 2000
1500 out of 2000
1600 out of 2000
1700 out of 2000
1700 out of 2000
1600 out of 2000
1600 out of 2000
1800 out of 2000
1500 out of 2000
1600 out of 2000
1100 out of 2000
1700 out of 2000
1700 out of 2000
1600 out of 2000
1800 out of 2000
1700 out of 2000
1700 out of 2000
1700 out of 2000
1700 out of 2000
1800 out of 2000
1800 out of 2000
1700 out of 2000
1900 out of 2000
1800 out of 2000
1600 out of 2000
1800 out of 2000
1700 out of 2000
1700 out of 2000
1900 out of 2000
1800 out of 2000
1800 out of 2000
1200 out of 2000
1800 out of 2000
1900 out of 2000
1900 out of 2000
1800 out of 2000
DONE!
1900 out of 2000
1800 out of 2000
1800 out of 2000
1700 out of 2000
1800 out of 2000
1900 out of 2000
DONE!
1900 out of 2000
1900 out of 2000
DONE!
DONE!
1900 out of 2000
DONE!
1900 out of 2000
1900 out of 2000
1300 out of 2000
1900 out of 2000
1800 out of 2000
1900 out of 2000
DONE!
DONE!
DONE!
DONE!
DONE!
DONE!
DONE!
1900 out of 2000
DONE!
1400

In [33]:
len(list(chunks(ids,16000)))

8

In [48]:
test = list(range(15))
print(test)
print(list(chunks(test, 5)))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]


In [61]:
def print_num(lis):
    print(lis)

pool = ThreadPool(15)

pool.map(print_num, test)

pool.close()
pool.join()

0
1
2
3
45
6
7
8
9
10
11
12
13
14

