In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pymongo as pm
import pprint
from enum import Enum
from datetime import datetime, timedelta
import geojson
import seaborn as sb

pd.set_option("display.max_columns", None)
mpl.rcParams["figure.dpi"] = 100

client = pm.MongoClient('bigdatadb.polito.it',                     
                        ssl=True,                     
                        authSource = 'carsharing',                     
                        username = 'ictts',                     
                        password ='Ict4SM22!',                     
                        tlsAllowInvalidCertificates=True) 
db = client['carsharing'] 

#Choose the DB to use 
Ictts_enj_p_booking = db['ictts_enjoy_PermanentBookings']
Ictts_p_booking = db['ictts_PermanentBookings']

#date starts from 01/01/2018 to 31/01/2018 1514761200 - 1517353200
start_unix_time = datetime.strptime("01/12/2017", "%d/%m/%Y").timestamp()
end_unix_time = datetime.strptime("31/01/2018", "%d/%m/%Y").timestamp()

with open("TorinoZonescol.geojson") as f:
    gj = geojson.load( f )

rental_args = [
{" Weekends-Afternoon": (False , False )},
{" Weekends-Morning": (False , True )},
{" Weekdays-Afternoon": (True , False )},
{" Weekdays-Morning": (True , True )}
]

In [4]:
db = client ["carsharing"]
permanentBookings = db["ictts_PermanentBookings"]
def extract_od_matrix( weekdays =True , morning = True ):
  OD_matrix = [([0]*23) for i in range(23) ]
  start_hour = 0
  end_hour = 0
  if morning == True :
    start_hour = 6
    end_hour = 12
  else :
    start_hour = 12
    end_hour = 23
  for i in range(23) :
   for j in range(23) :
    orig_zone = gj["features"][i]["geometry"]["coordinates"]
    dest_zone = gj["features"][j]["geometry"]["coordinates"]
    if weekdays == True :
      pipeline =[
      { "$project":{"hour":{"$hour":"$init_date"},"day":{"$dayOfWeek":"$init_date"},"init_loc":1,"final_loc":1,"init_time":1
      }
      },
      { "$match": {
      "day":{"$gte":2,"$lte":6},
      "hour":{"$gte":start_hour,"$lte":end_hour},
      "init_loc":{"$geoWithin":
      {"$geometry":{"type":"MultiPolygon","coordinates":orig_zone}}},"final_loc":{"$geoWithin":{"$geometry":{"type":"MultiPolygon","coordinates":dest_zone}}}
      }
      },
      { "$count":"tot"}
      ]

    elif weekdays == False :
      pipeline =[
      { "$project":{"hour":{"$hour":"$init_date"},"day":{"$dayOfWeek":"$init_date"},"init_loc":1,"final_loc":1,"init_time":1}},
      { "$match":{"day":1 and 7,"hour":{"$gte":start_hour,"$lte":end_hour},"init_loc":{"$geoWithin":{"$geometry":{"type":"MultiPolygon","coordinates":orig_zone}}},"final_loc":{"$geoWithin":{"$geometry":{"type":"MultiPolygon","coordinates":dest_zone}}}}},
      {"$count": "tot"}
      ]

    result = list ( permanentBookings.aggregate ( pipeline ))

    if( len( result ) > 0):
      OD_matrix[i][j] = result[0]["tot"]
    else :
      OD_matrix[i][j] = 0

  output_df = pd.DataFrame ( OD_matrix )
  output_df.columns =["Q"+f"{i:03d}" for i in range(1, 24) ]
  output_df['index'] =["Q"+f"{i:03d}" for i in range(1, 24) ]
  output_df = output_df.set_index('index', drop = True ).rename_axis( None )

  return output_df

rental_OD_matrices =[]

for i, args in enumerate( rental_args ):
 tmp_args = list( rental_args[i].values())[0]
 weekdays = tmp_args[0]
 morning = tmp_args[1]
 print(weekdays , morning )
 df = extract_od_matrix( weekdays , morning )
#  df.to_csv("OD_Matrix_"+f"{i+1}"+".csv")#  df.to_csv("OD_Matrix_"+f"{i+1:02d}"+".csv")
#  print("OD_Matrix_"+f"{i+1:02d}"+" is saved !")
 rental_OD_matrices.append(df)


# • Data Extraction from IMQ dataset
imq = pd.read_csv("spostamentiTorino.csv")
imq.head()

def create_pivot_table_with_filters(data , filters ):
  conditions = []
 # Apply filters
  for column, condition in filters.items():
    conditions.append( condition )
  filtered_data = data[conditions[0]]

 # Create pivot table using filtered data
  pivot_table = filtered_data.pivot_table( index ='COD_ZONA_PAR',columns='COD_ZONA_ARR',values='ID_INT',aggfunc =len ,fill_value =0)
  return pivot_table

filters = [
 {
  'SESSO':imq['SESSO'] == 1, # all male
  },
  {
  'SESSO':imq['SESSO'] == 2, # all female
  },
  {
  'FASCIA_ETA':imq["FASCIA_ETA"]== 1 # from 11 to 19 years old
  },
  {
  'FASCIA_ETA':imq["FASCIA_ETA"] == 2 # from 20 to 49 years old
  },
  {
  'FASCIA_ETA':imq["FASCIA_ETA"] == 3 # from 50 to 64 years old
  },
  {
  'FASCIA_ETA':imq["FASCIA_ETA"] == 4 # 65+ years old
  },
  {
  'SCOPO':imq["SCOPO"] == 1 # aim : go to work
  },
  {
  'SCOPO':imq["SCOPO"] == 3 # aim : study
  },
  {
  'SCOPO':imq["SCOPO"] == 4 # aim : shopping
  },
  {
  'SCOPO':imq["SCOPO"] == 8 # aim : going back home
  },
 ]

IMQ_OD_matrices = []

for i, f in enumerate( filters ):
  df = create_pivot_table_with_filters(imq , f)
  # df.to_csv("IMQ_OD_Matrix_"+f"{i+1}"+".csv")
  # print("IMQ_OD_Matrix_"+f"{i+1:02d}"+" is saved !")
  IMQ_OD_matrices.append(df)


# • Comparison between the OD Matrices
def L2_distance( matrix1 , matrix2 ):
 # Ensure matrices have the same dimensions

 assert matrix1.shape == matrix2.shape , "Matrices must have the same dimensions"
 normalized_matrix1 = matrix1 / matrix1.sum( axis =1, keepdims = True )
 normalized_matrix2 = matrix2 / matrix2.sum( axis =1, keepdims = True )

 # Calculate the squared differences between corresponding cells
 squared_diff = np.square( normalized_matrix1 - normalized_matrix2 )

 # Sum the squared differences
 sum_squared_diff = np.sum( squared_diff )

 # Take the square root to obtain the L2 distance
 l2_distance = np.sqrt( sum_squared_diff )

 return l2_distance

distances = [([0]* len( rental_OD_matrices )) for i in range(len( IMQ_OD_matrices ))]

for i, imq in enumerate( IMQ_OD_matrices ):
  for j, rental in enumerate( rental_OD_matrices ):
    distance = L2_distance(imq.values , rental.values )
    distances[i][j] = distance

 # plotting the heatmap
# hm = sb.heatmap( data =np.array( distances ), annot = True )

 # displaying the plotted heatmap
# plt.show()
# • Plotting the OD Matrices
def plot_matrix( od_matrix , title ):
 # Create meshgrid for x and y values
 x, y = np.meshgrid( range( od_matrix.shape[0]) , range( od_matrix.shape[1]) )

 # Create figure and 3D axes
 fig = plt.figure(dpi =300 , figsize =(12 , 12))

 ax = fig.add_subplot(111 , projection ='3d ')

 # Create surface plot
 ax.plot_surface(x, y, od_matrix , cmap =' viridis ')

 # Set labels and title
 ax.set_xlabel('Origin ')
 ax.set_ylabel(' Destination ')
 ax.set_zlabel('Flow ')
 ax.set_title( title )


 # Show the plot
#  plt.show()

# for i, args in enumerate( rental_args ):
  # title = list( rental_args[i].keys())[0]
  # plot_matrix( rental_OD_matrices[i].values , title )

# for i, od_matrix in enumerate( IMQ_OD_matrices ):
  # plot_matrix( IMQ_OD_matrices[i].values , " ")

False False


KeyboardInterrupt: 