In [10]:
import pymongo as pm
from pprint import pprint
from datetime import datetime
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from geopy.distance import geodesic 

In [11]:
client = pm.MongoClient('bigdatadb.polito.it',
                        ssl=True,
                        authSource = 'carsharing',
                        tlsAllowInvalidCertificates=True)
db = client['carsharing']
db.authenticate('ictts', 'Ictts16!')

permanentParkings = db['PermanentParkings']


In [12]:
start = datetime.fromisoformat('2017-01-01')
end = datetime.fromisoformat('2017-02-28')
start_hour = '08' #8:00
end_hour = '19' #12:00
city = "Vancouver"
day_type = "weekend" ## weekday or weekend
output_name = day_type+"_"+start_hour+"_"+end_hour

geo_range = [[(49.350849,-123.310688),(49.350849,-123.014714)],[(49.175794,-123.310688),(49.175794,-123.014714)]]



In [13]:
# select all parking in the time period
pipeline_parking_in_period = [{"$match": {"city": city}},
                        {"$match": {"$and": [{"init_date": {"$gte": start}}, {"final_date": {"$lte": end}}]}},
                        {"$project": {
                                    "_id": 1, 
                                    "longitude":{"$arrayElemAt": [ "$loc.coordinates", 0]},
                                    "latitude":{"$arrayElemAt": [ "$loc.coordinates", 1]},
                                    "duration": {"$divide": [{"$subtract": ["$final_time", "$init_time"]}, 60]}, 
                                    "day_of_week": {"$dayOfWeek": "$init_date"},
                                    "start_hour": {"$dateToString": { "format": "%H", "date": "$init_date" }},
                                    "end_hour": {"$dateToString": { "format": "%H", "date": "$final_date" }},
                                    }
                        },
                        {"$match": {"$and": [   {"duration": {"$gte": 5}}, 
                                                {"duration": {"$lte": 3 * 60}},
                                            ],
                                    "$or":  [  ]
                                    }
                        },
                        {"$group": {"_id": {"longitude": "$longitude", "latitude": "$latitude"}, 
                                    "number":{"$sum":1}}
                                },
                        ]

if day_type == "weekday":
    pipeline_parking_in_period[3]["$match"]["$and"].append({"day_of_week":{"$gte":2}})
    pipeline_parking_in_period[3]["$match"]["$and"].append({"day_of_week":{"$lte":6}})
elif day_type == "weekend":
    pipeline_parking_in_period[3]["$match"]["$and"].append({"$or":[{"day_of_week":1},{"day_of_week":7}]})

if int(start_hour)>int(end_hour):
    pipeline_parking_in_period[3]["$match"]["$or"].append({"$and":[{"start_hour":{"$gte":start_hour}},{"start_hour":{"$lte":'24'}}]})
    pipeline_parking_in_period[3]["$match"]["$or"].append({"$and":[{"start_hour":{"$gte":'0'}},{"start_hour":{"$lte":end_hour}}]})
    pipeline_parking_in_period[3]["$match"]["$or"].append({"$and":[{"end_hour":{"$gte":start_hour}},{"end_hour":{"$lte":'24'}}]})
    pipeline_parking_in_period[3]["$match"]["$or"].append({"$and":[{"end_hour":{"$gte":'0'}},{"end_hour":{"$lte":end_hour}}]})
else:
    pipeline_parking_in_period[3]["$match"]["$or"].append({"$and":[{"start_hour":{"$gte":start_hour}},{"start_hour":{"$lte":end_hour}}]})
    pipeline_parking_in_period[3]["$match"]["$or"].append({"$and":[{"end_hour":{"$gte":start_hour}},{"end_hour":{"$lte":end_hour}}]})




pprint(pipeline_parking_in_period)

parking_in_period = permanentParkings.aggregate(pipeline_parking_in_period)

[{'$match': {'city': 'Vancouver'}},
 {'$match': {'$and': [{'init_date': {'$gte': datetime.datetime(2017, 1, 1, 0, 0)}},
                      {'final_date': {'$lte': datetime.datetime(2017, 2, 28, 0, 0)}}]}},
 {'$project': {'_id': 1,
               'day_of_week': {'$dayOfWeek': '$init_date'},
               'duration': {'$divide': [{'$subtract': ['$final_time',
                                                       '$init_time']},
                                        60]},
               'end_hour': {'$dateToString': {'date': '$final_date',
                                              'format': '%H'}},
               'latitude': {'$arrayElemAt': ['$loc.coordinates', 1]},
               'longitude': {'$arrayElemAt': ['$loc.coordinates', 0]},
               'start_hour': {'$dateToString': {'date': '$init_date',
                                                'format': '%H'}}}},
 {'$match': {'$and': [{'duration': {'$gte': 5}},
                      {'duration': {'$lte': 180}},
       

# Task A

In [14]:
count = 0
longitude=[]
latitude =[]
counting = []
frame_id =[]
for parking in parking_in_period:
    frame_id.append(count)
    longitude.append(parking["_id"]["longitude"])
    latitude.append(parking["_id"]["latitude"])
    counting.append(parking["number"])
    count+=1

df = pd.DataFrame({
    'id':frame_id,
    'longitude':longitude,
    'latitude':latitude,
    'density':counting
})
df.to_csv("./"+output_name+".csv",index=False)

In [15]:
# count = 0
# try:
#     for elem in parking_in_period:
#         #print(parking_in_period.next())
#         count+=1
#         # if count>10:
#         #     break
# except:
#     pass
# print(count)

# Task B

In [16]:
# recover the CommandCursor
parking_in_period = permanentParkings.aggregate(pipeline_parking_in_period)

In [17]:
longitude_range = geodesic(geo_range[0][0],geo_range[0][1]).m
latitude_range = geodesic(geo_range[0][0],geo_range[1][0]).m
print(longitude_range)
print(latitude_range)

longitude_blocks = int(longitude_range/500)
latitude_blocks = int(latitude_range/500)
longitude_step = (geo_range[0][1][1]-geo_range[0][0][1])/longitude_blocks
latitude_step= (geo_range[1][0][0]-geo_range[0][0][0])/latitude_blocks
print(longitude_step)
print(latitude_step)


block_map = np.zeros([latitude_blocks,longitude_blocks],dtype=int)

reference_point = geo_range[0][0]
for parking in parking_in_period:
    try:
        longitude_bias = int(geodesic(reference_point,(reference_point[0],parking["_id"]["longitude"])).m/500)
        latitude_bias = int(geodesic(reference_point,(parking["_id"]["latitude"],reference_point[1])).m/500)
        block_map[latitude_bias][longitude_bias]+=parking["number"]
    except:
        continue

21504.410396889554
19468.713197564055
0.006883116279069792
-0.0046067105263156135


In [None]:
count = 0
longitude=[]
latitude =[]
counting = []
frame_id =[]
origin_point = (geo_range[0][0][0]+0.5*latitude_step,geo_range[0][0][1]+0.5*longitude_step)
for i in range(latitude_blocks):
    for j in range(longitude_blocks):
        if block_map[i][j]!=0:
            latitude.append(origin_point[0]+i*latitude_step)
            longitude.append(origin_point[1]+j*longitude_step)
            counting.append(block_map[i][j])
            frame_id.append(count)
            count+=1


df2 = pd.DataFrame({
    'id':frame_id,
    'longitude':longitude,
    'latitude':latitude,
    'density':counting
})
df2.to_csv("./"+output_name+"_block"+".csv",index=False)  