# Lab 3 - Appendix - Source Code

## Import Packages

In [None]:
# Import packages 

import pymongo as pm
import pandas as pd
import numpy as np
import seaborn as sns
import geojson
import matplotlib.pylab as plt
from pprint import pprint
import os

pd.options.mode.chained_assignment = None

## Connect to Database

In [None]:
# Connecting to Database Using Credentials

db_add = "<Your DB Address>"
auth_source = "<Auth Source>"
username = "<Your Username>"
password = "<Your Password>"


client = pm.MongoClient(db_add,
        ssl=True,
        authSource = auth_source,
        tlsAllowInvalidCertificates=True,
        username=username,
        password=password)

db = client['carsharing']

# Read IMQ Data
sp_to = pd.read_csv('IMQData/spostamentiTorino.csv')

## Load Resources

In [None]:
zones = {
    0:'CENTRO',
    1:'S.SALVARIO',
    2:'CROCETTA',
    3:'S.PAOLO',
    4:'CENISIA',
    5:'S.DONATO',
    6:'AURORA',
    7:'VANCHIGLIA',
    8:'NIZZA-MILLEFONTI',
    9:'LINGOTTO',
    10:'S.RITA',
    11:'MIRAFIORI NORD',
    12:'POZZO STRADA',
    13:'PARELLA',
    14:'VALLETTE',
    15:'MADONNA CAMPAGNA',
    16:'B.TA VITTORIA',
    17:'B.RA MILANO',
    18:'FALCHERA',
    19:'REGIO PARCO',
    20:'MADONNA PILONE',
    21:'CAVORETTO',
    22:'MIRAFIORI SUD'
        }

with open('Zone/TorinoZonescol.geojson') as f:
    zones_data = geojson.load(f)

In [None]:
c2g_perm = db['ictts_PermanentBookings']
enj_perm = db['ictts_enjoy_PermanentBookings']

## Define Function and Pipeline

In [None]:
def get_OD_data(orig_zone, dest_zone, startHour, endHour, startDay, endDay):
    pipeline = [
            { "$project": {
                "hour": { "$hour": "$init_date" },
                "day": { "$dayOfWeek": "$init_date" },
                "init_loc":1, "final_loc":1, "init_time":1
                }
            },
            { "$match": {
                "day": { "$gte": startDay, "$lt": endDay }, 
                "hour": { "$gte": startHour, "$lte": endHour },
                "init_loc": { "$geoWithin" :
                    { "$geometry": { "type": "MultiPolygon", "coordinates": orig_zone } }
                    },
                "final_loc": { "$geoWithin" :
                    { "$geometry": { "type": "MultiPolygon", "coordinates": dest_zone } }
                    }
                }
            },
            { "$count": "tot"}
            ]
    return pipeline

def normalize(arr):
    min_val = np.min(arr)
    max_val = np.max(arr)
    return (arr - min_val)/(max_val-min_val)

### Zone Data

In [None]:
OD_carsharing = [np.zeros([len(zones), len(zones)]),
      np.zeros([len(zones), len(zones)]),
      np.zeros([len(zones), len(zones)]),
      np.zeros([len(zones), len(zones)]),
      np.zeros([len(zones), len(zones)]),
      np.zeros([len(zones), len(zones)])]

startDay = 2
endDay = 7

cases = [(0, 4), (5, 8), (9, 13), (14, 18), (19, 23)]

for case in range(len(cases)):
    print(cases[case])
    startHour, endHour = cases[case]
    for orig in range(23):
        for dest in range(23):
            print(f'Instance {orig}-{dest}')

            tot = 0

            orig_zone = zones_data['features'][orig]["geometry"]["coordinates"]
            dest_zone = zones_data['features'][dest]["geometry"]["coordinates"]

            pipeline = get_OD_data(orig_zone, dest_zone, startHour, endHour, startDay, endDay)
            result_c2g = list(c2g_perm.aggregate(pipeline))
            result_enj = list(enj_perm.aggregate(pipeline))

            if (len(result_c2g)>0):
                tot += result_c2g[0]["tot"]

            if (len(result_enj)>0):
                tot += result_enj[0]["tot"]

            OD_carsharing[case][orig-1,dest-1] = tot

In [None]:
for case in range(len(cases)): 
    np.savetxt(f'Results/{case}_Hour_{cases[case][0]}_{cases[case][1]}.txt', OD_carsharing[case], fmt='%d')

In [None]:
plt.figure(figsize=(18,14))
ax = sns.heatmap(OD_carsharing[3], linewidth=0.5, annot=True, fmt='g', annot_kws={"size": 14}, cmap='Greens')
plt.xlim([0,23])
plt.xticks(np.arange(0.5, 23.5, 1), labels=list(zones.values()), rotation=90, ha='center')
plt.yticks(np.arange(0.5, 23.5, 1), labels=list(zones.values()), rotation=0)
ax.xaxis.set_ticks_position('top')
plt.title(title, size=20, y=1.30)
plt.ylabel('Origin')
plt.tight_layout()
plt.show()

# Get Overview

## Car-Sharing

In [None]:
OD_C = np.zeros([len(zones), len(zones)])

startDay = 2
endDay = 7
startHour = 0
endHour = 23

for orig in range(23):
    for dest in range(23):
        print(f'Instance {orig}-{dest}')

        tot = 0

        orig_zone = zones_data['features'][orig]["geometry"]["coordinates"]
        dest_zone = zones_data['features'][dest]["geometry"]["coordinates"]

        pipeline = get_OD_data(orig_zone, dest_zone, startHour, endHour, startDay, endDay)
        result_c2g = list(c2g_perm.aggregate(pipeline))
        result_enj = list(enj_perm.aggregate(pipeline))

        if (len(result_c2g)>0):
            tot += result_c2g[0]["tot"]

        if (len(result_enj)>0):
            tot += result_enj[0]["tot"]

        OD_C[orig-1,dest-1] = tot

In [None]:
plt.rc('axes', labelsize=32)
plt.rc('ytick', labelsize=16)
plt.rc('xtick', labelsize=16)
plt.rc('legend', fontsize=28)
plt.rc('figure', titlesize=18)

plt.figure(figsize=(22,16))
ax = sns.heatmap(OD_C, linewidth=0.5, annot=True, fmt='g', annot_kws={"size": 17}, cmap='Greens')
plt.xlim([0,23])
plt.xticks(np.arange(0.5, 23.5, 1), labels=list(zones.values()), rotation=90, ha='center')
plt.yticks(np.arange(0.5, 23.5, 1), labels=list(zones.values()), rotation=0)
ax.xaxis.set_ticks_position('top')
plt.title('Destination (Car-Sharing)', size=32, y=1.24)
plt.ylabel('Origin (Car-Sharing)', labelpad=30)
plt.tight_layout()
plt.savefig('OD_CarSharing.png', dpi=300)
plt.show()

## IMQ

In [None]:
sp_to = pd.read_csv('IMQData/spostamentiTorino.csv')

sp_to_all = sp_to.copy()

sp_to_all['COD_ZONA_PAR'] = pd.to_numeric(sp_to_all['COD_ZONA_PAR'].str.replace('Q',''))
sp_to_all['COD_ZONA_ARR'] = pd.to_numeric(sp_to_all['COD_ZONA_ARR'].str.replace('Q',''))
sp_to_all['PAR_TO_ARR'] = list(zip(sp_to_all['COD_ZONA_PAR'], sp_to_all['COD_ZONA_ARR']))

OD_I = np.zeros([len(zones), len(zones)])
for i in range(len(sp_to_all['PAR_TO_ARR'].value_counts())):
    orig, dest = sp_to_all['PAR_TO_ARR'].value_counts().keys()[i]
    OD_I[orig-1,dest-1] = int(sp_to_all['PAR_TO_ARR'].value_counts()[i])

In [None]:
plt.rc('axes', labelsize=32)
plt.rc('ytick', labelsize=16)
plt.rc('xtick', labelsize=16)
plt.rc('legend', fontsize=28)
plt.rc('figure', titlesize=18)

plt.figure(figsize=(22,16))
ax = sns.heatmap(OD_I, linewidth=0.5, annot=True, fmt='g', annot_kws={"size": 17}, cmap='Greens')
plt.xlim([0,23])
plt.xticks(np.arange(0.5, 23.5, 1), labels=list(zones.values()), rotation=90, ha='center')
plt.yticks(np.arange(0.5, 23.5, 1), labels=list(zones.values()), rotation=0)
ax.xaxis.set_ticks_position('top')
plt.title('Destination (IMQ)', size=32, y=1.24)
plt.ylabel('Origin (IMQ)', labelpad=30)
plt.tight_layout()
plt.savefig('OD_IMQ.png', dpi=300)
plt.show()

# Daily and Hourly OD Matrices of Car-Sharing Rentals

### Hourly

In [None]:
startDay = 2
endDay = 7

OD_carsharing = []

cases = [(0, 4), (5, 8), (9, 13), (14, 18), (19, 23)]

for case in range(len(cases)):
    OD_carsharing.append(np.zeros([len(zones), len(zones)]))
    print(cases[case])
    startHour, endHour = cases[case]
    for orig in range(23):
        for dest in range(23):
            print(f'Instance {orig}-{dest}')

            tot = 0

            orig_zone = zones_data['features'][orig]["geometry"]["coordinates"]
            dest_zone = zones_data['features'][dest]["geometry"]["coordinates"]

            pipeline = get_OD_data(orig_zone, dest_zone, startHour, endHour, startDay, endDay)
            result_c2g = list(c2g_perm.aggregate(pipeline))
            result_enj = list(enj_perm.aggregate(pipeline))

            if (len(result_c2g)>0):
                tot += result_c2g[0]["tot"]

            if (len(result_enj)>0):
                tot += result_enj[0]["tot"]

            OD_carsharing[case][orig-1,dest-1] = tot
            
for case in range(len(cases)): 
    np.savetxt(f'Results/SingleFilter/Carsharing/Hourly/{case}_Hour_{cases[case][0]}_{cases[case][1]}.txt',
               OD_carsharing[case], fmt='%d')
    np.savetxt(f'Results/TwoFilters/Carsharing/Hourly/{case}_Hour_{cases[case][0]}_{cases[case][1]}.txt',
               OD_carsharing[case], fmt='%d')

# Daily

In [None]:
startHour = 0
endHour = 23

OD_carsharing = []

cases = [(2, 3), (3, 4), (4, 5), (5, 6), (6, 7)]

for case in range(len(cases)):
    OD_carsharing.append(np.zeros([len(zones), len(zones)]))
    print(cases[case])
    startDay, endDay = cases[case]
    for orig in range(23):
        for dest in range(23):
            print(f'Instance {orig}-{dest}')

            tot = 0

            orig_zone = zones_data['features'][orig]["geometry"]["coordinates"]
            dest_zone = zones_data['features'][dest]["geometry"]["coordinates"]

            pipeline = get_OD_data(orig_zone, dest_zone, startHour, endHour, startDay, endDay)
            result_c2g = list(c2g_perm.aggregate(pipeline))
            result_enj = list(enj_perm.aggregate(pipeline))

            if (len(result_c2g)>0):
                tot += result_c2g[0]["tot"]

            if (len(result_enj)>0):
                tot += result_enj[0]["tot"]

            OD_carsharing[case][orig-1,dest-1] = tot
            
for case in range(len(cases)): 
    np.savetxt(f'Results/SingleFilter/Carsharing/Daily/{case}_Day_{cases[case][0]}_{cases[case][1]}.txt',
               OD_carsharing[case], fmt='%d')
    np.savetxt(f'Results/TwoFilters/Carsharing/Daily/{case}_Day_{cases[case][0]}_{cases[case][1]}.txt',
               OD_carsharing[case], fmt='%d')

# Single Filter

In [None]:
sp_to = pd.read_csv('IMQData/spostamentiTorino.csv')
sp_to_all = sp_to[['SESSO', 'FASCIA_ETA', 'SCOPO', 'COD_ZONA_PAR', 'COD_ZONA_ARR']]

ct = 0

for column in ['SESSO', 'FASCIA_ETA', 'SCOPO']:
    
    for val in list(sorted(sp_to_all[column].unique())):
    
        sp_to_all = sp_to[['SESSO', 'FASCIA_ETA', 'SCOPO', 'COD_ZONA_PAR', 'COD_ZONA_ARR']]

        sp_to_all = sp_to_all[sp_to_all[column]==val]

        sp_to_all['COD_ZONA_PAR'] = pd.to_numeric(sp_to_all['COD_ZONA_PAR'].str.replace('Q',''))
        sp_to_all['COD_ZONA_ARR'] = pd.to_numeric(sp_to_all['COD_ZONA_ARR'].str.replace('Q',''))
        sp_to_all['PAR_TO_ARR'] = list(zip(sp_to_all['COD_ZONA_PAR'], sp_to_all['COD_ZONA_ARR']))

        OD_imq = np.zeros([len(zones), len(zones)])
        for i in range(len(sp_to_all['PAR_TO_ARR'].value_counts())):
            orig, dest = sp_to_all['PAR_TO_ARR'].value_counts().keys()[i]
            OD_imq[orig-1,dest-1] = int(sp_to_all['PAR_TO_ARR'].value_counts()[i])
        
        if ct/10>=1:
            add = ''
        else:
            add = '0'
            
        OD_imq = np.savetxt(f'Results/SingleFilter/IMQ/{add}{ct}_IMQ_{column}_{val}.txt', OD_imq, fmt='%d')
        ct += 1

In [None]:
all_imq_matrixes = []
all_mdb_matrixes = []

names = []
for dr in os.listdir(r'Results/SingleFilter/IMQ'):
    names.append(dr[:-4])
    all_imq_matrixes.append(np.loadtxt(f"Results/SingleFilter/IMQ/{dr}", dtype=int))
for dr in os.listdir(r'Results/SingleFilter/Carsharing/Hourly'):
    names.append(dr[:-4])
    all_mdb_matrixes.append(np.loadtxt(f"Results/SingleFilter/Carsharing/Hourly/{dr}", dtype=int))
for dr in os.listdir(r'Results/SingleFilter/Carsharing/Daily'):
    names.append(dr[:-4])
    all_mdb_matrixes.append(np.loadtxt(f"Results/SingleFilter/Carsharing/Daily/{dr}", dtype=int))

In [None]:
plt.rc('axes', labelsize=26)
plt.rc('ytick', labelsize=20)
plt.rc('xtick', labelsize=20)
plt.rc('legend', fontsize=20)
plt.rc('figure', titlesize=18)
plt.rcParams.update({'axes.titlesize': 'xx-large'})

    
euclidean_results = np.zeros([len(all_mdb_matrixes), len(all_imq_matrixes)])

for OD1 in range(len(all_mdb_matrixes)):
    for OD2 in range(len(all_imq_matrixes)):
        euclidean_results[OD1][OD2] = np.around(np.linalg.norm(normalize(all_imq_matrixes[OD2])-normalize(all_mdb_matrixes[OD1])), decimals=2)

names = ['(1)','(2)','(3)','(4)','(5)','(6)','(7)','(8)','(9)',
         '(10)','(11)','(12)','(13)','(14)','(15)','(16)','(17)',
         '0AM-5AM','5AM-9AM','9AM-2PM','2PM-7PM','7PM-12PM',
         'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']

plt.figure(figsize=(22,8))
ax = sns.heatmap(euclidean_results, linewidth=0.8, annot=True,
                 fmt='g', annot_kws={"size": 20}, cmap='Oranges_r', vmin=0, vmax=4.5)
plt.xlim([0,len(names)-len(all_mdb_matrixes)])
plt.xticks(np.arange(0.5, (len(names)-len(all_mdb_matrixes))+0.5, 1),
           labels=names[:-len(all_mdb_matrixes)], rotation=90, ha='center')
plt.yticks(np.arange(0.5, len(all_mdb_matrixes)+0.5, 1),
           labels=names[-len(all_mdb_matrixes):], rotation=0)
ax.xaxis.set_ticks_position('top')
plt.ylabel('Time Filter (Car-Sharing)', labelpad=30)
#plt.xlabel('Features')
plt.title('Interview Filter (IMQ) - Single Filter (Gender, Age, Reason)', size=26, y=1.15)
plt.tight_layout()
plt.savefig('Single_Filter(Gender,Age,Reason)', dpi=300)
plt.show()

In [None]:
euclidean_results

# Two Filters

In [None]:
sp_to_all = sp_to[['SESSO', 'FASCIA_ETA', 'SCOPO', 'COD_ZONA_PAR', 'COD_ZONA_ARR']]

ct = 0

filters = [('SESSO', 'FASCIA_ETA'), ('SESSO', 'SCOPO'), ('FASCIA_ETA', 'SCOPO')]

for fil in filters:
    f1, f2 = fil[0], fil[1]
    sp_to_all = sp_to[['SESSO', 'FASCIA_ETA', 'SCOPO', 'COD_ZONA_PAR', 'COD_ZONA_ARR']]
    for val1 in list(sorted(sp_to_all[f1].unique())):
        for val2 in list(sorted(sp_to_all[f2].unique())):
         
            sp_to_all = sp_to_all[sp_to_all[f1]==val1]
            sp_to_all = sp_to_all[sp_to_all[f2]==val2]

            sp_to_all['COD_ZONA_PAR'] = pd.to_numeric(sp_to_all['COD_ZONA_PAR'].str.replace('Q',''))
            sp_to_all['COD_ZONA_ARR'] = pd.to_numeric(sp_to_all['COD_ZONA_ARR'].str.replace('Q',''))
            sp_to_all['PAR_TO_ARR'] = list(zip(sp_to_all['COD_ZONA_PAR'], sp_to_all['COD_ZONA_ARR']))

            OD_imq = np.zeros([len(zones), len(zones)])
            for i in range(len(sp_to_all['PAR_TO_ARR'].value_counts())):
                orig, dest = sp_to_all['PAR_TO_ARR'].value_counts().keys()[i]
                OD_imq[orig-1,dest-1] = int(sp_to_all['PAR_TO_ARR'].value_counts()[i])

            if ct/10>=1:
                add = ''
            else:
                add = '0'

            OD_imq = np.savetxt(f'Results/TwoFilters/IMQ/{add}{ct}_IMQ_{f1}{val1}_{f2}{val2}.txt', OD_imq, fmt='%d')
            ct += 1
            sp_to_all = sp_to[['SESSO', 'FASCIA_ETA', 'SCOPO', 'COD_ZONA_PAR', 'COD_ZONA_ARR']]

### SE

In [None]:
all_imq_matrixesSeSc = []
all_imq_matrixesSeFa = []
all_mdb_matrixes = []

names_cs = []
names_SeSc = []
names_SeFa = []

for dr in os.listdir(r'Results/TwoFilters/IMQ/SESSO_SCOPO'):
    names_SeSc.append(dr[:-4])
    all_imq_matrixesSeSc.append(np.loadtxt(f"Results/TwoFilters/IMQ/SESSO_SCOPO/{dr}", dtype=int))
    
for dr in os.listdir(r'Results/TwoFilters/IMQ/SESSO_FASCIA_ETA'):
    names_SeFa.append(dr[:-4])
    all_imq_matrixesSeFa.append(np.loadtxt(f"Results/TwoFilters/IMQ/SESSO_FASCIA_ETA/{dr}", dtype=int))
    
for dr in os.listdir(r'Results/TwoFilters/Carsharing/Hourly'):
    names_cs.append(dr[:-4])
    all_mdb_matrixes.append(np.loadtxt(f"Results/TwoFilters/Carsharing/Hourly/{dr}", dtype=int))
    
for dr in os.listdir(r'Results/TwoFilters/Carsharing/Daily'):
    names_cs.append(dr[:-4])
    all_mdb_matrixes.append(np.loadtxt(f"Results/TwoFilters/Carsharing/Daily/{dr}", dtype=int))

In [None]:
names_SeSc = ['(1),(7)','(1),(8)','(1),(9)','(1),(10)','(1),(11)',
 '(1),(12)','(1),(13)','(1),(14)','(1),(15)','(1),(16)','(1),(17)',
 '(2),(7)','(2),(8)','(2),(9)','(2),(10)','(2),(11)','(2),(12)','(2),(13)',
 '(2),(14)','(2),(15)','(2),(16)','(2),(17)']

names_SeFa = ['(1),(3)','(1),(4)','(1),(5)','(1),(6)',
 '(2),(3)','(2),(4)','(2),(5)','(2),(6)']

names_cs = ['0AM-5AM','5AM-9AM','9AM-2PM','2PM-7PM','7PM-12PM',
           'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']

In [None]:
euclidean_results_SeSc = np.zeros([len(all_mdb_matrixes), len(all_imq_matrixesSeSc)])
euclidean_results_SeFa = np.zeros([len(all_mdb_matrixes), len(all_imq_matrixesSeFa)])

for OD1 in range(len(all_mdb_matrixes)):
    for OD2 in range(len(all_imq_matrixesSeSc)):
        euclidean_results_SeSc[OD1][OD2] = np.around(np.linalg.norm(normalize(all_imq_matrixesSeSc[OD2])-normalize(all_mdb_matrixes[OD1])), decimals=2)

    for OD2 in range(len(all_imq_matrixesSeFa)):
        euclidean_results_SeFa[OD1][OD2] = np.around(np.linalg.norm(normalize(all_imq_matrixesSeFa[OD2])-normalize(all_mdb_matrixes[OD1])), decimals=2)

In [None]:
plt.rc('axes', labelsize=26)
plt.rc('ytick', labelsize=20)
plt.rc('xtick', labelsize=20)
plt.rc('legend', fontsize=20)
plt.rc('figure', titlesize=18)
plt.rcParams.update({'axes.titlesize': 'xx-large'})


plt.figure(figsize=(22,10))
ax = sns.heatmap(euclidean_results_SeSc, linewidth=0.8, annot=True, fmt='g', annot_kws={"size": 20}, cmap='Oranges_r', vmin=0, vmax=4.5)
plt.xlim([0,len(names_SeSc)])
plt.xticks(np.arange(0.5, len(names_SeSc)+0.5, 1), labels=names_SeSc, rotation=90, ha='center')
plt.yticks(np.arange(0.5, len(all_mdb_matrixes)+0.5, 1), labels=names_cs, rotation=0)
ax.xaxis.set_ticks_position('top')
plt.ylabel('Time Filter (Car-Sharing)', labelpad=30)
#plt.xlabel('Features')
plt.title('Interview Filter (IMQ) - Two Filters (Gender, Reason)', size=26, y=1.25)
plt.tight_layout()
plt.savefig('Two_Filters_(Gender,Reason)', dpi=300)
plt.show()

In [None]:
plt.rc('axes', labelsize=26)
plt.rc('ytick', labelsize=20)
plt.rc('xtick', labelsize=20)
plt.rc('legend', fontsize=20)
plt.rc('figure', titlesize=18)
plt.rcParams.update({'axes.titlesize': 'xx-large'})


plt.figure(figsize=(22,8))
ax = sns.heatmap(euclidean_results_SeFa, linewidth=0.8, annot=True, fmt='g', annot_kws={"size": 20}, cmap='Oranges_r', vmin=0, vmax=4.5)
plt.xlim([0,len(names_SeFa)])
plt.xticks(np.arange(0.5, len(names_SeFa)+0.5, 1), labels=names_SeFa, rotation=90, ha='center')
plt.yticks(np.arange(0.5, len(all_mdb_matrixes)+0.5, 1), labels=names_cs, rotation=0)
ax.xaxis.set_ticks_position('top')
plt.ylabel('Time Filter (Car-Sharing)', labelpad=30)
#plt.xlabel('Features')
plt.title('Interview Filter (IMQ) - Two Filters (Gender, Age)', size=26, y=1.25)
plt.tight_layout()
plt.savefig('Two_Filters_(Gender,Age)', dpi=300)
plt.show()