In [173]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pymongo as pm
import pprint
from enum import Enum
from datetime import datetime, timedelta
import pytz

In [174]:

client = pm.MongoClient('bigdatadb.polito.it',                     
                        ssl=True,                     
                        authSource = 'carsharing',                     
                        username = 'ictts',                     
                        password ='Ict4SM22!',                     
                        tlsAllowInvalidCertificates=True) 
db = client['carsharing'] 

#Choose the DB to use 
permenant_booking = db['PermanentBookings']
permenant_parking = db['PermanentParkings']
enjoy_permenant_booking = db['enjoy_PermanentBookings']
enjoy_permenant_parking = db['enjoy_PermanentParkings']

#ENUM of cities
class CITY_ENUM(Enum):
    TO = 'Torino'
    SEA = 'Seattle'
    STU = 'Stuttgart'
class CITY_TIMEZONES(Enum):
    TO = 'Europe/Rome'
    SEA = 'America/Los_Angeles'
    STU = 'Europe/Berlin'

def get_start_end_unix_zone(timezone):
    start_timestamp = datetime(2018, 1, 1,0,0,0,0, pytz.timezone(timezone)).timestamp()
    end_timestamp  = datetime(2018, 1, 31,23,59,59,0, pytz.timezone(timezone)).timestamp()
    return start_timestamp,end_timestamp    


#date starts from 01/01/2018 to 31/01/2018 1514761200 - 1517353200
# start_unix_time = datetime.datetime.strptime("01/01/2018", "%d/%m/%Y").timestamp()
# end_unix_time = datetime.datetime.strptime("01/02/2018", "%d/%m/%Y").timestamp()
# start_unix_time=int('1514764800')
# end_unix_time=int('1517439600')

# dt = datetime(2018, 1, 1)
# start_unix_time = dt.replace(tzinfo=timezone.utc).timestamp()
# dt = datetime(2018, 1, 31)
# end_unix_time = dt.replace(tzinfo=timezone.utc).timestamp()

In [175]:
#pipeline for getting the data for the rentals with the filteration of the data
#too short and too long rentals are filtered out
#considered if car is moved
#grouped by date and hour
def filter_pipeline(city,start_unix_time,end_unix_time):
    return [
    {
        '$match': {
            'city': city,
            'init_time': {
                '$gte': start_unix_time,
                '$lt': end_unix_time
            },
            'final_time': {
                '$gte': start_unix_time,
                '$lt': end_unix_time
            }
        }
    },
    {
        '$project': {
            '_id': 0,
            'duration': {
                '$divide': [
                    { '$subtract': ['$final_time', '$init_time'] },
                    60  # Divide by 60 to convert seconds to minutes
                ]
            },
            'day': {'$dayOfMonth': '$init_date'},
            'hour': {'$hour': '$init_date'},
            'date': {
                '$dateToString': {
                    'format': '%Y-%m-%d',
                    'date': '$init_date'
                }
            },
            'moved': {
                '$ne':[
                    {"$arrayElemAt": [ "$origin_destination.coordinates", 0]},
                    {"$arrayElemAt": [ "$origin_destination.coordinates", 1]}
                 ]
            }
        }
    },
    {
        '$match': {
            'moved': True,
            'duration':{'$gt':5, '$lt':180},
                
        }
    },
    {
        '$group':{
            '_id': {'day': '$day', 'hour': '$hour', 'date': '$date'},
            'total_count': {'$sum': 1},
        }
    },
    {
        '$sort': {
            '_id': 1,
        }
    },
    {
        '$group': {
            '_id': '$_id.date',
            'hours': {
                '$push': {
                    'date': '$_id.date',
                    'hour': '$_id.hour',
                    'total_count': '$total_count'
                }
            }
        }
    },
    {
        '$sort': {
            '_id': 1,
            'hours.hour': 1
        }
    }
]

In [176]:
TO_Data = list(enjoy_permenant_booking.aggregate(filter_pipeline(CITY_ENUM.TO.value,get_start_end_unix_zone(CITY_TIMEZONES.TO.value)[0],get_start_end_unix_zone(CITY_TIMEZONES.TO.value)[1])))
SEA_Data = list(permenant_booking.aggregate(filter_pipeline(CITY_ENUM.SEA.value,get_start_end_unix_zone(CITY_TIMEZONES.SEA.value)[0],get_start_end_unix_zone(CITY_TIMEZONES.SEA.value)[1])))
STU_Data = list(permenant_booking.aggregate(filter_pipeline(CITY_ENUM.STU.value,get_start_end_unix_zone(CITY_TIMEZONES.STU.value)[0],get_start_end_unix_zone(CITY_TIMEZONES.STU.value)[1])))
cities_data_array = [(CITY_ENUM.TO.value,TO_Data),(CITY_ENUM.SEA.value,SEA_Data),(CITY_ENUM.STU.value,STU_Data)]

In [177]:
def flatten(city_booking_list):
    data_flat_list = []
    for entry in city_booking_list:
        date = entry['_id']
        for hour_data in entry['hours']:
            data_flat_list.append({
                'date': date,
                'hour': hour_data['hour'],
                'total_count': hour_data['total_count']
            })
    return data_flat_list

def map_hour_count(flattened_list):
    all_dates =[]
    all_values = []
    for entry in flattened_list:
        current_date = datetime.strptime(entry['date'], '%Y-%m-%d')
        all_dates.append((current_date+timedelta(hours=entry['hour'])))
        all_values.append(entry['total_count'])
    return all_dates,all_values

def uniq_dates(all_dates):
    return sorted(set(date.date() for date in all_dates))

def plotter(plotTitle, all_dates, all_values, unique_dates):
    plt.figure(figsize=(14, 6))
    plt.plot(all_dates, all_values, label='Book', color='blue')
    plt.xlabel('Date')
    plt.ylabel('Total Count')
    plt.legend()
    plt.grid(True)
    plt.title(f'Total Counts in Dates and Hours in - {plotTitle}')
    plt.grid(True)
    plt.xticks(unique_dates,rotation=90, fontsize=5)
    plt.savefig(f'step1-booking-{plotTitle}')
    plt.clf()

In [178]:
for city,city_data in cities_data_array:
    flattened_list = flatten(city_data)
    all_dates,all_values = map_hour_count(flattened_list)
    unique_dates = uniq_dates(all_dates)
    plotter(city, all_dates, all_values, unique_dates)

<Figure size 1400x600 with 0 Axes>

<Figure size 1400x600 with 0 Axes>

<Figure size 1400x600 with 0 Axes>

[{'_id': '2018-01-01',
  'hours': [{'date': '2018-01-01', 'hour': 0, 'total_count': 61},
            {'date': '2018-01-01', 'hour': 1, 'total_count': 105},
            {'date': '2018-01-01', 'hour': 2, 'total_count': 101},
            {'date': '2018-01-01', 'hour': 3, 'total_count': 80},
            {'date': '2018-01-01', 'hour': 4, 'total_count': 55},
            {'date': '2018-01-01', 'hour': 5, 'total_count': 54},
            {'date': '2018-01-01', 'hour': 6, 'total_count': 31},
            {'date': '2018-01-01', 'hour': 7, 'total_count': 23},
            {'date': '2018-01-01', 'hour': 8, 'total_count': 16},
            {'date': '2018-01-01', 'hour': 9, 'total_count': 24},
            {'date': '2018-01-01', 'hour': 10, 'total_count': 20},
            {'date': '2018-01-01', 'hour': 11, 'total_count': 36},
            {'date': '2018-01-01', 'hour': 12, 'total_count': 59},
            {'date': '2018-01-01', 'hour': 13, 'total_count': 56},
            {'date': '2018-01-01', 'hour': 14, 