In [0]:
import urllib.request
import urllib.parse

import json
import math
import time

from bs4 import BeautifulSoup

from google.colab import drive

import pandas as pd

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
%cd "/content/drive/My Drive/Vilnius University/Vilnius City Crime Prediction"

/content/drive/My Drive/Vilnius University/Vilnius City Crime Prediction


In [0]:
from importlib.machinery import SourceFileLoader
common = SourceFileLoader('common', "Modules/common.py").load_module()

In [0]:
PARAMS = json.load(open('Params.json', 'r'))

CRIME_TYPES = PARAMS["CrimeCodes"]["HealthInjury"]

# Requests

In [0]:
def MakeRequest(url, params):
  query = urllib.parse.urlencode(params).encode("ascii")
  request = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
  with urllib.request.urlopen(request, query) as response:
    return response.read()

# Coordinate system convertions

In [0]:
def LksToWgs(lat, long): #TODO fix the return type
  url = "http://www.sauliukas.lt/lks/"
  params = {'lks' : str(long)+","+str(lat)}
  data = MakeRequest(url, params)
  soup = BeautifulSoup(data, "lxml")
  result = soup.find('textarea',{'name':'wgs'}).text[:-2].split(',')
  return float(result[0]), float(result[1])

LksToWgs(6061885.5, 582699.3)

(54.686601466784, 25.282611523493)

In [0]:
def WgsToLks(lat, long):
  url = "https://www.ird.lt/nvzrgis/map/proxy.jsp?"
  params = {
      "http://localhost:6080/arcgis/rest/services/Utilities/Geometry/GeometryServer/project?f" : "json",
      "outSR" : "3346",
      "inSR" : "4326",
      "geometries" : "{\"geometryType\":\"esriGeometryPoint\",\"geometries\":[{\"x\":"+ str(long) + ",\"y\":"+str(lat) +",\"spatialReference\":{\"wkid\":4326}}]}"
  }
  for key, value in params.items():
      url += key + '=' + value + '&'

  data = json.loads(MakeRequest(url, {}))
  return data['geometries'][0]['y'], data['geometries'][0]['x']

WgsToLks(54.686601466784, 25.282611523493)

(6061885.500246813, 582699.2982279152)

# Crime scraping

Gets only crime type and it's count.

In [0]:
def GetCrimesInRadiusNvzr(dateFrom, dateTo, radius, point, crimeType, crimeCodes): #currently not used
  data = {}
  data['dateTo'] = dateTo #YYYY-mm-dd
  data['dateFrom'] = dateFrom
  data['scale'] = 3
  data[crimeType] = crimeCodes
  data['type'] = crimeType
  data['point'] = point #lithuanian coordinate system (LKS-94) and X - longitude, Y - latidude
  data['radius'] = radius
  query = json.dumps(data)
  url = "https://www.ird.lt/nvzrgis/map/rest/featureservice/identifyradius"
  params = { "query" : query }
  return json.loads(MakeRequest(url, params))

GetCrimesInRadiusNvzr("2019-01-20","2019-02-20", 2000, {"y": 6061885.5, "x": 582699.3}, "eulocs", CRIME_TYPES)

{'bare': [[31, '10020202'], [7, '10020203']], 'count': 38}

In [0]:
def GetCrimesInRectangleNvzr(dateFrom, dateTo, positionFrom, positionTo, crimeType, crimeCodes):
  minLat, minLong = WgsToLks(positionFrom[0], positionFrom[1])
  maxLat, maxLong = WgsToLks(positionTo[0], positionTo[1])

  url = "https://www.ird.lt/nvzrgis/map/rest/featureservice/query"
  data = {}
  data['dateTo'] = dateTo #YYYY-mm-dd
  data['dateFrom'] = dateFrom
  data[crimeType] = crimeCodes
  data['extent'] = {"xmin": minLong,"ymin":minLat,"xmax":maxLong, "ymax":maxLat,"spatialReference":{"wkid":2600}}
  data['scale'] = 4
  data['type'] = crimeType
  query = json.dumps(data)
  params = { "query" : query }
  return json.loads(MakeRequest(url, params))

GetCrimesInRectangleNvzr("2019-01-20","2019-02-20", (54.681375, 25.258354), tuple(common.OffsetCoordinate(54.681375, 25.258354, 1, 1)), "eulocs", CRIME_TYPES)

{'bare': [[5896309, 581692, 6061406, '10020202']]}

In [0]:
def GetCrimeDetails(id):
  url = "https://www.ird.lt/nvzrgis/map/rest/featureservice/identify"
  query = "{\"ids\":" + json.dumps(id) + "}"
  params = { "query": query }
  return json.loads(MakeRequest(url, params))[::-1]

GetCrimeDetails([4636252, 4654821, 4656829])

[{'d14_f10': '2',
  'disp_data': '2015-01-05 00:00:00.0',
  'eulocs': '10020202',
  'eulocs_en': 'Causing minor bodily injury',
  'eulocs_lt': 'Nesunkus kūno sužalojimas',
  'kl_bk_aprasymas': 'Nesunkus sveikatos sutrikdymas',
  'p14_f10': '8',
  's14_f10': '138',
  'z14_f10': '0'},
 {'d14_f10': '2',
  'disp_data': '2015-03-02 00:00:00.0',
  'eulocs': '10020202',
  'eulocs_en': 'Causing minor bodily injury',
  'eulocs_lt': 'Nesunkus kūno sužalojimas',
  'kl_bk_aprasymas': 'Fizinio skausmo sukėlimas ar nežymus sveikatos sutrikdymas',
  'p14_f10': '0',
  's14_f10': '140',
  'z14_f10': '0'},
 {'d14_f10': '2',
  'disp_data': '2015-03-07 00:00:00.0',
  'eulocs': '10020202',
  'eulocs_en': 'Causing minor bodily injury',
  'eulocs_lt': 'Nesunkus kūno sužalojimas',
  'kl_bk_aprasymas': 'Fizinio skausmo sukėlimas ar nežymus sveikatos sutrikdymas',
  'p14_f10': '0',
  's14_f10': '140',
  'z14_f10': '0'}]

In [0]:
def _GetCrimesInRectangle(dateFrom, dateTo, positionFrom, positionTo, crimeType, crimeCodes):
  nvzrCrimesDirty = GetCrimesInRectangleNvzr(dateFrom, dateTo, positionFrom, positionTo, crimeType, crimeCodes)
  nvzrCrimes = nvzrCrimesDirty["bare"]
  crimeIds = [c[0] for c in nvzrCrimes]
  crimesDetails = GetCrimeDetails(crimeIds)
  crimes = []
  for i in range(len(nvzrCrimes)):
    lat, long = LksToWgs(nvzrCrimes[i][2], nvzrCrimes[i][1])
    crimes.append(common.Crime(nvzrCrimes[i][0], lat, long, nvzrCrimes[i][3], crimesDetails[i]["disp_data"].split()[0]))
  return crimes

def GetCrimesInRectangle(dateFrom, dateTo, positionFrom, positionTo, crimeType, crimeCodes):
  nvzrCrimesDirty = GetCrimesInRectangleNvzr(dateFrom, dateTo, positionFrom, positionTo, crimeType, crimeCodes)
  nvzrCrimes = nvzrCrimesDirty["bare"]
  crimes = []
  for crime in nvzrCrimes:
    crimeDetails = GetCrimeDetails([crime[0]]) #TODO not very effective
    lat, long = LksToWgs(crime[2], crime[1])
    crimes.append(common.Crime(crime[0], lat, long, crime[3], crimeDetails[0]["disp_data"].split()[0]))
  return crimes

crimes = GetCrimesInRectangle("2015-01-01","2016-01-01", (54.681375, 25.258354), tuple(common.OffsetCoordinate(54.681375, 25.258354, 1, 1)), "eulocs", CRIME_TYPES)
print(len(crimes))
for crime in crimes:
  print(crime)

21
Date:2015-01-05 ID:4636252 Lat:54.682458476059 Long:25.266859075439 Type:10020202
Date:2015-03-02 ID:4654821 Lat:54.686029046142 Long:25.271700821527 Type:10020202
Date:2015-03-02 ID:4676944 Lat:54.686029046142 Long:25.271700821527 Type:10020203
Date:2015-03-07 ID:4656829 Lat:54.681437107661 Long:25.265710677891 Type:10020202
Date:2015-03-08 ID:4657876 Lat:54.689368772107 Long:25.273697623278 Type:10020202
Date:2015-04-19 ID:4675326 Lat:54.684570268255 Long:25.272849437145 Type:10020202
Date:2015-04-19 ID:4803894 Lat:54.684570268255 Long:25.272849437145 Type:10020202
Date:2015-04-25 ID:4674114 Lat:54.682321626625 Long:25.258433684008 Type:10020202
Date:2015-07-01 ID:5467130 Lat:54.689575860505 Long:25.265079823058 Type:10020202
Date:2015-07-05 ID:4698809 Lat:54.688261577045 Long:25.273027004715 Type:10020202
Date:2015-08-01 ID:5467141 Lat:54.689575860505 Long:25.265079823058 Type:10020202
Date:2015-08-01 ID:5467148 Lat:54.689575860505 Long:25.265079823058 Type:10020202
Date:2015-08-

In [0]:
#TODO's are for the next time when I need to collect new data
posFrom = (54.667819, 25.248268) #TODO use params form file
x = 4.2
y = 4.2
posTo = tuple(common.OffsetCoordinate(posFrom[0], posFrom[1], x, y,))
crimes2015 = GetCrimesInRectangle("2015-01-01","2015-12-31", posFrom, posTo, "eulocs", CRIME_CODES) #TODO use loop
print("2015 Done!")
crimes2016 = GetCrimesInRectangle("2016-01-01","2016-12-31", posFrom, posTo, "eulocs", CRIME_CODES)
print("2016 Done!")
crimes2017 = GetCrimesInRectangle("2017-01-01","2017-12-31", posFrom, posTo, "eulocs", CRIME_CODES)
print("2017 Done!")
crimes2018 = GetCrimesInRectangle("2018-01-01","2018-12-31", posFrom, posTo, "eulocs", CRIME_CODES)
print("2018 Done!")
crimes = crimes2015 + crimes2016 + crimes2017 + crimes2018

#labels = ['Date', 'Id', 'Lat', 'Long', "Type"]
crimesDf = pd.DataFrame.from_records([c.to_dict() for c in crimes])
crimesDf.to_csv(PROJECT_PATH + "/Data/crimes.csv", sep="\t") #TODO save to path from params
print("Data size = {}".format(len(crimesDf.index)))
crimesDf.tail()

2015 Done!
2016 Done!
2017 Done!
2018 Done!
Data size = 2263


Unnamed: 0,Date,ID,Lat,Long,Type
2258,2018-12-30,5863840,54.69242,25.266394,10020203
2259,2018-12-30,5860335,54.70109,25.251753,10020202
2260,2018-12-30,5860247,54.680693,25.262958,10020202
2261,2018-12-31,5865198,54.677029,25.286709,10020202
2262,2018-12-31,5881366,54.677029,25.286709,10020202


In [0]:
class Place:
    def __init__(self, id, types, lat, long, place_id):
      self.id = id
      self.types = types
      self.lat = lat
      self.long = long
      self.place_id = place_id
        
    def __str__(self):
      return "ID:{} Lat:{:20} Long:{:20} Types:{}".format(self.id, self.lat, self.long, self.get_types())

    def get_types(self):
      return ','.join(self.types)
    
    def to_dict(self):
      return {
          "ID": self.id,
          "Lat": self.lat,
          "Long": self.long,
          "Types": self.types,
      }

In [0]:
def Get20PlacesByType(coords, radiusInMeters, placeType, nextPageToken):
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?"
    params = {
        'location' : "{},{}".format(coords[0], coords[1]),
        'radius' : str(radiusInMeters),
        'type' : placeType,
        'key' : PARAMS["GoogleApiKey"]
    }
    if nextPageToken:
      params["pagetoken"] = nextPageToken
    #print(params)
    for key, value in params.items():
        url += key + '=' + value + '&'
    print(url)
    response = json.loads(MakeRequest(url, {}))
    print(response)
    if "next_page_token" in response:
      return response["results"], response["next_page_token"]
    else:
      return response["results"], ""

def GetPlacesByType(coords, radiusInMeters, placeType):
  results, nextPageToken = Get20PlacesByType(coords, radiusInMeters, placeType, "")
  places = []
  prevToken = "First"
  while prevToken:
    #print(results)
    #print(nextPageToken)
    time.sleep(2)
    for r in results:
      firstType = r['types'][0]
      if firstType == placeType:
        places.append(Place(r['id'], r['types'], r['geometry']['location']['lat'], r['geometry']['location']['lng'], r['place_id']))
    prevToken = nextPageToken
    if nextPageToken:
      results, nextPageToken = Get20PlacesByType(coords, radiusInMeters, placeType, nextPageToken)
  return places
    

posTo = tuple(common.OffsetCoordinate(PARAMS["StartPosition"][0], PARAMS["StartPosition"][1], PARAMS["TotalSize"][0], PARAMS["TotalSize"][1]))
center = ((PARAMS["StartPosition"][0] + posTo[0]) / 2.0, (PARAMS["StartPosition"][1] + posTo[1]) / 2.0)
radius = math.sqrt(2 * (max(PARAMS["TotalSize"]) ** 2)) / 2.0

types = [
    'bar',
    'restaurant',
    'store',
    'night_club',
    
    'park',
    
    'bank',
    
    'bus_station',
    'train_station',
    'transit_station'
]

places = []
for t in types:
    print(t)
    places.extend(GetPlacesByType(center, radius * 1000.0, t))
    
placesDf = pd.DataFrame.from_records([p.to_dict() for p in places])
placesDf.to_csv("Data/places.csv", sep="\t") #TODO save to path from params
placesDf.tail()

bar
https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=54.68668402618023,25.28088862351344&radius=2969.8484809834995&type=bar&key=AIzaSyBqO9lQtCRIBdKx189wk8ze-pZQMuxLdTU&
{'html_attributions': [], 'next_page_token': 'CqQCGgEAAFQaD-5L8qy-SFcmQNRYtVafSiDB5K33qvpTK2BNzgYD9cE9PF5k20rpvMzs1RX_3_qFeVL5IfOiCl_hVBGFSunzec1Hr-bM6X1WKeryy-6nk9I4Z__6WH3MT_zw1xqN11x5iRNVdj_OHuIFu02lWmdH2G-Wa4sIB-TpHPHkHgWT5a8KdaHeMEwLtou0LfmXal9a_jU4nmmp_q5EcgRDt-a-AmWDsYc8WcGWc52MmoRtuMQfnXP-tOHKcve_0qNijBMpzBj6mUF2f_uotwkREbOqQhKtGgjUM1APVIWSUbCrsGIYLUYGmYDIQb0RYIPLVdhUg5oZ3mrBJ1gUIWjkqlOmWx10hc9hQtvTYvVFCu8hzAbgSB90GrJwvnT84590RxIQ0Ss0l0YMPxQYZSagSYi1WBoUqhwNmw6hf-gKWaKppiVDzrhD4jY', 'results': [{'geometry': {'location': {'lat': 54.6777622, 'lng': 25.3021638}, 'viewport': {'northeast': {'lat': 54.67909368029149, 'lng': 25.3038111802915}, 'southwest': {'lat': 54.6763957197085, 'lng': 25.3011132197085}}}, 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/camping-71.png', 'id': '96f1f10

Unnamed: 0,ID,Lat,Long,Types
372,3771ce1b4b7dfc6ebc509443180079ca6bf40c6e,54.67398,25.298155,"[transit_station, point_of_interest, establish..."
373,37adbf5f17da3183a0a712cc48a4c57592c3daf9,54.685025,25.30885,"[transit_station, point_of_interest, establish..."
374,f739c2c48af55322f8babeb0521a8b81c0f5f8fc,54.671865,25.269345,"[transit_station, point_of_interest, establish..."
375,175f9ce6b27687ec6e50fa86481b8a9173fccdb3,54.6985,25.26127,"[transit_station, point_of_interest, establish..."
376,2b4e634d2be9662f66603d24f735e7464be1deb1,54.671125,25.29048,"[transit_station, point_of_interest, establish..."
