diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..306fa06 --- /dev/null +++ b/install.sh @@ -0,0 +1,2 @@ +#!/bin/sh +pip install -r requirements.txt \ No newline at end of file diff --git a/notebooks/ex09_geolocation_moving_obj.csv b/notebooks/ex09_geolocation_moving_obj.csv new file mode 100644 index 0000000..833362e --- /dev/null +++ b/notebooks/ex09_geolocation_moving_obj.csv @@ -0,0 +1,6 @@ +cab_26,43.602508,39.715685,14:47:44 +cab_112,43.582243,39.752077,14:47:55 +cab_26,43.607480,39.721521,14:49:11 +cab_112,43.579258,39.758944,14:49:51 +cab_112,43.574906,39.766325,14:51:53 +cab_26,43.612203,39.720491,14:52:48 \ No newline at end of file diff --git a/notebooks/sn09_analyzing_location_data.ipynb b/notebooks/sn09_analyzing_location_data.ipynb new file mode 100644 index 0000000..d100a76 --- /dev/null +++ b/notebooks/sn09_analyzing_location_data.ipynb @@ -0,0 +1,718 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "d22b455e-8d88-49cd-82e6-55838d526cbc", + "metadata": {}, + "outputs": [], + "source": [ + "import geocoder\n", + "\n", + "geocode_res = geocoder.osm('1 Apple Park Way, Cupertino, California, United States')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1caf59dc-c1f8-4bc7-b478-39edf657d9b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-122.010747, 37.3317424]\n" + ] + } + ], + "source": [ + "coordinates = geocode_res.geometry['coordinates']\n", + "print(coordinates)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ddedf165-a1cd-4fb8-9d42-175eb5a56dc5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cablatlongtm
0cab_2643.60250839.71568514:47:44
1cab_11243.58224339.75207714:47:55
2cab_2643.60748039.72152114:49:11
3cab_11243.57925839.75894414:49:51
4cab_11243.57490639.76632514:51:53
5cab_2643.61220339.72049114:52:48
\n", + "
" + ], + "text/plain": [ + " cab lat long tm\n", + "0 cab_26 43.602508 39.715685 14:47:44\n", + "1 cab_112 43.582243 39.752077 14:47:55\n", + "2 cab_26 43.607480 39.721521 14:49:11\n", + "3 cab_112 43.579258 39.758944 14:49:51\n", + "4 cab_112 43.574906 39.766325 14:51:53\n", + "5 cab_26 43.612203 39.720491 14:52:48" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"ex09_geolocation_moving_obj.csv\", names=['cab', 'lat', 'long', 'tm'])\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9f5412aa-d334-45a5-af1f-b430f161413f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cablatlongtm
5cab_2643.61220339.72049114:52:48
4cab_11243.57490639.76632514:51:53
\n", + "
" + ], + "text/plain": [ + " cab lat long tm\n", + "5 cab_26 43.612203 39.720491 14:52:48\n", + "4 cab_112 43.574906 39.766325 14:51:53" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "latestrows = df.sort_values(['cab', 'tm'], ascending=False).drop_duplicates('cab')\n", + "display(latestrows)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dee21ad0-9ffb-42b5-bdc4-c7fcfa828427", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['cab_26', 43.612203, 39.720491, '14:52:48'],\n", + " ['cab_112', 43.574906, 39.766325, '14:51:53']]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "latestrows_numpy = latestrows.values\n", + "latestrows = latestrows_numpy.tolist()\n", + "display(latestrows)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "eaf3773d-9880-4026-86e0-0758a998fa2b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cab_26: 4636\n", + "cab_112: 1015\n" + ] + }, + { + "data": { + "text/plain": [ + "[['cab_26', 43.612203, 39.720491, '14:52:48', 4636],\n", + " ['cab_112', 43.574906, 39.766325, '14:51:53', 1015]]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# calculate the distance between each cab and a pickup place\n", + "from geopy.distance import distance\n", + "\n", + "pick_up = (43.578854, 39.754995)\n", + "\n", + "for i, row in enumerate(latestrows):\n", + " lat = row[1]\n", + " long = row[2]\n", + " cab = (lat, long)\n", + " dist_meters = distance(pick_up, cab).m\n", + "\n", + " print(row[0] + ':', round(dist_meters))\n", + " latestrows[i].append(round(dist_meters))\n", + "\n", + "display(latestrows)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e12cf848-4505-40c1-ae6e-a9a2c2c962e1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The closest cab is: cab_112 . Distance in meters: 1015\n" + ] + } + ], + "source": [ + "closest_cab = min(latestrows, key=lambda x: x[4])\n", + "print('The closest cab is: ', closest_cab[0], '. Distance in meters: ', closest_cab[4])" + ] + }, + { + "cell_type": "markdown", + "id": "3e8ca7ad-0680-4a7a-96e5-1ff5cecbf2d3", + "metadata": {}, + "source": [ + "Not always the closest object to the target is the most efficient solution. Some obstacles may infer in the time elapsed to reach the objective.\n", + "\n", + "If obstacles makes traject to the objective longer an alternative is to divide the area into a set of polygons, and then use these polygons to check which objects share a polygon with the target." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "84aacde9-d089-43ff-a99e-adea84c74ba4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cab_26 within the polygon: False\n", + "cab_112 within the polygon: True\n", + "pick_up within the polygon: True\n" + ] + } + ], + "source": [ + "from shapely.geometry import Point, Polygon\n", + "\n", + "coords = [\n", + " (46.082991, 38.987384),\n", + " (46.075489, 38.987599),\n", + " (46.079395, 38.997684),\n", + " (46.073822, 39.007297),\n", + " (46.081741, 39.008842)\n", + "]\n", + "poly = Polygon(coords)\n", + "cab_26 = Point(46.073852, 38.991890)\n", + "cab_112 = Point(46.078228, 39.003949)\n", + "pick_up = Point(46.080074, 38.991289)\n", + "\n", + "print('cab_26 within the polygon:', cab_26.within(poly))\n", + "print('cab_112 within the polygon:', cab_112.within(poly))\n", + "print('pick_up within the polygon:', pick_up.within(poly))" + ] + }, + { + "cell_type": "markdown", + "id": "65fc62a7-8a59-47e4-b144-679f361b1dce", + "metadata": {}, + "source": [ + "The best approach is to use both calculations:\n", + "\n", + "1. Determine which polygon each cab is in and use that determination to decide how to calculate the distance from that cab to the pick-up location. Then proceed to:\n", + " 1. Calculate direct straight line distance if the cab is in the same polygon as the pick-up location.\n", + " 2. The distance by way of the entry point if it’s in an adjacent polygon." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0c45585e-bab3-4432-8aca-8056ea3b15fc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1544\n" + ] + } + ], + "source": [ + "entry_point = Point(46.075357, 39.000298)\n", + "\n", + "if cab_26.within(poly):\n", + " dist = distance((pick_up.x, pick_up.y), (cab_26.x,cab_26.y)).m\n", + "else:\n", + " dist = distance((cab_26.x,cab_26.y), (entry_point.x,entry_point.y)).m + distance((entry_point.x,entry_point.y), (pick_up.x, pick_up.y)).m\n", + "\n", + "print(round(dist))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "cc38e942-8042-47d2-8478-ba25a6b357cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['cab_14', 'cab_79', 'cab_104', 'cab_45']" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "orders = [\n", + " ('order_039', 'open', 'cab_14'),\n", + " ('order_034', 'open', 'cab_79'),\n", + " ('order_032', 'open', 'cab_104'),\n", + " ('order_026', 'closed', 'cab_79'),\n", + " ('order_021', 'open', 'cab_45'),\n", + " ('order_018', 'closed', 'cab_26'),\n", + " ('order_008', 'closed', 'cab_112')\n", + "]\n", + "\n", + "df_orders = pd.DataFrame(orders, columns =['order','status','cab'])\n", + "df_orders_open = df_orders[df_orders['status']=='open']\n", + "unavailable_list = [x[2] for x in orders if x[1] == 'open']\n", + "display(unavailable_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "abf62615-a79f-445f-bd43-cf08e49185e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('cab_26', 2165), ('cab_112', 2861)]\n", + "('cab_26', 2165)\n" + ] + } + ], + "source": [ + "pick_up = 46.083822, 38.967845\n", + "cab_26 = 46.073852, 38.991890\n", + "cab_112 = 46.078228, 39.003949\n", + "cab_104 = 46.071226, 39.004947\n", + "cab_14 = 46.004859, 38.095825\n", + "cab_79 = 46.088621, 39.033929\n", + "cab_45 = 46.141225, 39.124934\n", + "cabs = {'cab_26': cab_26, 'cab_112': cab_112, 'cab_14': cab_14,\n", + " 'cab_104': cab_104, 'cab_79': cab_79, 'cab_45': cab_45}\n", + "\n", + "dist_list = []\n", + "\n", + "for cab_name, cab_loc in cabs.items():\n", + " if cab_name not in unavailable_list:\n", + " dist = distance(pick_up, cab_loc).m\n", + " dist_list.append((cab_name, round(dist)))\n", + "\n", + "print(dist_list)\n", + "print(min(dist_list, key=lambda x: x[1]))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d3ab7e20-3999-4d18-af08-7433dc36d53f", + "metadata": {}, + "outputs": [], + "source": [ + "# Cabs data with Baby Seat\n", + "cabs_list = [\n", + " ('cab_14',1),\n", + " ('cab_79',0),\n", + " ('cab_104',0),\n", + " ('cab_45',1),\n", + " ('cab_26',0),\n", + " ('cab_112',1)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "73212eb9-2128-4ec7-8564-51f0ec569f28", + "metadata": {}, + "outputs": [], + "source": [ + "df_cabs = pd.DataFrame(cabs_list, columns =['cab', 'seat'])\n", + "df_dist = pd.DataFrame(dist_list, columns =['cab', 'dist'])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "563b1942-13d7-4726-b175-0feaf9050f1b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cabseat
0cab_141
1cab_790
2cab_1040
3cab_451
4cab_260
5cab_1121
\n", + "
" + ], + "text/plain": [ + " cab seat\n", + "0 cab_14 1\n", + "1 cab_79 0\n", + "2 cab_104 0\n", + "3 cab_45 1\n", + "4 cab_26 0\n", + "5 cab_112 1" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cabdist
0cab_262165
1cab_1122861
\n", + "
" + ], + "text/plain": [ + " cab dist\n", + "0 cab_26 2165\n", + "1 cab_112 2861" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(df_cabs)\n", + "display(df_dist)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "0425a9c4-1f5e-4b16-a7e4-c9ab3e2bf031", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.merge(df_cabs, df_dist, on='cab', how='inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ae908c91-a73b-4ade-a49f-c52ec0ed92ba", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cabseatdist
0cab_2602165
1cab_11212861
\n", + "
" + ], + "text/plain": [ + " cab seat dist\n", + "0 cab_26 0 2165\n", + "1 cab_112 1 2861" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "dbd40a14-f4ad-482e-907c-8249de11ec3d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('cab_112', 1, 2861)]\n" + ] + } + ], + "source": [ + "result_list = list(df.itertuples(index=False,name=None))\n", + "result_list = [x for x in result_list if x[1] == 1]\n", + "print(result_list)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/requirements.txt b/requirements.txt index a48644d..0cd065c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ Cartopy == 0.23.0 +geocoder == 1.38.1 +geopy == 2.4.1 h5py == 3.10.0 jupyter == 1.0.0 keras == 3.3.3 @@ -10,5 +12,6 @@ pandas == 2.2.1 pillow == 10.3.0 scikit-learn == 1.4.0 seaborn == 0.13.2 +shapely == 2.0.6 spacy == 3.7.4 yfinance == 0.2.38