In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import glob
import numpy as np
import tqdm
import datetime
import geopandas as gpd
import rtree

In [2]:
files = glob.glob("data/raw/SNData/*.csv")

dfs = []
for f in tqdm.tqdm(files):
    dfs.append(pd.read_csv(f, header=0, sep=";", parse_dates = [7,8]))

Full_data = pd.concat(dfs,ignore_index=True) # Save this to interim
Full_data.to_csv('data/interim/Full_data.csv')

100%|██████████| 53/53 [07:29<00:00,  8.48s/it]


In [3]:
# Drop 53 rows with na values
df = Full_data.dropna()

# Rename Columns to English
df. columns = ['Customer_Group', 'CustomerID', 'CarID', 'Engine', 'Rental_flag', 'RentalID', 'Rental_Usage_Type', 'Reservation_Time', 'End_Time', 'Revenue', 'Distance', 'Drives', 'Reservation_Minutes','Fuel_Start','Fuel_End','Start_Lat', 'Start_Long', 'End_Lat', 'End_Long']

# Drop drives as it has no info (only ones)
df = df.drop(columns = 'Drives')
df

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,Distance,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long
0,Non_Customer,793639.0,WBY1Z21080V307924,I3,No,9.335872e+09,Private,2016-03-24 11:48:43,2016-02-04 10:00:19,0.00,0,0,0,0,55.678763,12.552853,0.000000,0.000000
1,Non_Customer,1035973.0,WBY1Z21080V307857,I3,No,9.336114e+09,Private,2016-03-30 15:37:39,2016-01-04 00:40:38,0.00,0,0,62,47,55.770626,12.519300,55.770389,12.518839
2,Non_Customer,998095.0,WBY1Z21020V307904,I3,No,9.336154e+09,Private,2016-03-31 13:08:16,2016-05-04 08:32:25,0.00,2,1,85,79,55.621588,12.606951,55.621532,12.606279
3,Non_Customer,999604.0,WBY1Z21010V307926,I3,No,9.336158e+09,Private,2016-03-31 14:43:00,2016-01-04 07:10:00,0.00,0,1,0,71,55.770077,12.518914,55.769746,12.519123
4,Non_Customer,1035969.0,WBY1Z21070V308210,,No,9.336160e+09,Private,2016-03-31 15:21:36,2016-01-04 14:24:17,0.00,0,1,53,52,55.770623,12.519791,55.770439,12.518937
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2633979,Customer,1070662.0,WBY1Z21010V308185,I3,No,9.345011e+09,Private,2016-09-30 23:39:05,2016-09-30 23:50:54,5.16,4,10,46,41,55.694700,12.553776,55.678740,12.587144
2633980,Customer,1041705.0,WBY1Z21080V308250,I3,No,9.345011e+09,Private,2016-09-30 23:42:18,2016-09-30 23:52:14,3.44,6,8,59,52,55.648401,12.542945,55.641310,12.615295
2633981,Customer,2112471.0,WBY1Z21020V308261,I3,No,9.345011e+09,Private,2016-09-30 23:33:39,2016-09-30 23:52:03,8.17,9,3,39,30,55.664744,12.580875,55.719856,12.540863
2633982,Customer,440147.0,WBY1Z21060V307954,I3,Yes,9.345011e+09,Private,2016-09-30 23:41:56,2016-09-30 23:57:30,6.88,9,4,44,35,55.710676,12.566043,55.667453,12.619987


In [4]:
# Remove all rows with a CarID as it can not be used
df = df[df.CarID != '0']

In [5]:
# Engine has two types of missing values that is alligned
df["Engine"].replace({" ": '0'}, inplace=True)

In [6]:
# If a CarID already has an engine type assign that to the missing ones
Engine_dict = {c: df[df.CarID == c].Engine.nunique() for c in df[df.Engine == '0'].CarID.unique()}
for car, engine in Engine_dict.items():
    if engine == 1:
        continue
    True_Engine = [x for x in df[df.CarID == car].Engine.unique() if x!= '0'][0]
    df.loc[(df.CarID == car) & (df.Engine == '0'), 'Engine'] = True_Engine

# Populate the rest manual based on ID
df.loc[(df.CarID == 'WBA1R5104J7B14310') & (df.Engine == '0'), 'Engine'] = '118I'
df.loc[(df.CarID == 'WBA1R5104J5K58061') & (df.Engine == '0'), 'Engine'] = '118I'
df.loc[(df.CarID == 'WBA1R5103K7D66678') & (df.Engine == '0'), 'Engine'] = '118I'
df.loc[(df.CarID == 'WBY8P2105K7D70350') & (df.Engine == '0'), 'Engine'] = 'I3 120'
df.loc[(df.CarID == 'WBY8P2102K7D70287') & (df.Engine == '0'), 'Engine'] = 'I3 120'

In [7]:
# Add start time based on Reservation minutes
df['Start_Time'] = [row.Reservation_Time+datetime.timedelta(minutes=row.Reservation_Minutes) for _, row in df.iterrows()] 

## Add zones

In [8]:
# Load shapefile and set projection
shapefile = gpd.read_file("../Zonekort/LTM_Zone3/zones_level3.shp")
shapefile = shapefile.to_crs(epsg=4326)

In [9]:
# Create a geoDF with geometry as starting point
gdf_start = gpd.GeoDataFrame(df, geometry= gpd.points_from_xy(df.Start_Long, df.Start_Lat))

# Set projection
gdf_start = gdf_start.set_crs(epsg=4326)

In [10]:
# Populate zones based on which zone they are within
gdpj_start  = gpd.sjoin(gdf_start, shapefile, op='within')
df['Start_Zone'] = gdpj_start.zoneid

In [11]:
# Populate the rest based on which zone they are closest too
Start_zone_filler = {x: shapefile.zoneid[shapefile.distance(df.loc[x].geometry).sort_values().index[0]] for x in df.index[df['Start_Zone'].isna()]}
df['Start_Zone'] = df['Start_Zone'].fillna(Start_zone_filler)


  Start_zone_filler = {x: shapefile.zoneid[shapefile.distance(df.loc[x].geometry).sort_values().index[0]] for x in df.index[df['Start_Zone'].isna()]}


In [12]:
# Create a geoDF with geometry as end point
gdf_end = gpd.GeoDataFrame(df, geometry= gpd.points_from_xy(df.End_Long, df.End_Lat))

# Set projection
gdf_end = gdf_end.set_crs(epsg=4326)

In [13]:
# Populate zones based on which zone they are within
gdpj_end  = gpd.sjoin(gdf_end, shapefile, op='within')
df['End_Zone'] = gdpj_end.zoneid

In [14]:
# Populate the rest based on which zone they are closest too
End_zone_filler = {x: shapefile.zoneid[shapefile.distance(df.loc[x].geometry).sort_values().index[0]] for x in df.index[df['End_Zone'].isna()]}
df['End_Zone'] = df['End_Zone'].fillna(End_zone_filler)


  End_zone_filler = {x: shapefile.zoneid[shapefile.distance(df.loc[x].geometry).sort_values().index[0]] for x in df.index[df['End_Zone'].isna()]}


In [15]:
# Remove geomery type and make IDs int columns
df = df.drop(columns = 'geometry')
df = df.astype({'CustomerID': 'int32', 'RentalID': 'int64', 'Start_Zone': 'int32','End_Zone': 'int32'})

In [16]:
# Check types
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2630477 entries, 0 to 2633983
Data columns (total 21 columns):
 #   Column               Dtype         
---  ------               -----         
 0   Customer_Group       object        
 1   CustomerID           int32         
 2   CarID                object        
 3   Engine               object        
 4   Rental_flag          object        
 5   RentalID             int64         
 6   Rental_Usage_Type    object        
 7   Reservation_Time     datetime64[ns]
 8   End_Time             datetime64[ns]
 9   Revenue              float64       
 10  Distance             int64         
 11  Reservation_Minutes  int64         
 12  Fuel_Start           int64         
 13  Fuel_End             int64         
 14  Start_Lat            float64       
 15  Start_Long           float64       
 16  End_Lat              float64       
 17  End_Long             float64       
 18  Start_Time           datetime64[ns]
 19  Start_Zone           

## Weird Coordinates

In [17]:
# Equator 1
df[(df.Start_Lat < 5)]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
318973,Non_Customer,793661,WBY1Z6108HV938983,I3 94,No,9349129851,Private,2016-12-22 15:38:31,2016-12-22 15:57:30,0.0,...,1,86,84,0.0,0.0,55.703077,12.553715,2016-12-22 15:39:31,550062,102432
589405,Non_Customer,793639,WBY8P2107K7E71728,I3 120,No,9422076259,Private,2019-09-19 14:29:27,2019-09-19 15:08:06,0.0,...,6,9,68,0.0,0.0,55.674228,12.560673,2019-09-19 14:35:27,550062,102161
592152,Non_Customer,793639,WBA31AA02L5N90396,X1 SDRIVE18I,No,9422140700,Private,2019-09-20 12:06:08,2019-09-20 12:22:58,0.0,...,15,0,0,0.0,0.0,55.716238,12.567008,2019-09-20 12:21:08,550062,102412
607119,Non_Customer,793639,WBY8P2104K7E72500,I3 120,No,9422802637,Private,2019-09-25 16:57:17,2019-09-25 17:23:20,0.0,...,1,7,6,0.0,0.0,55.716530,12.566844,2019-09-25 16:58:17,550062,102412
609129,Non_Customer,793639,WBY8P2106K7E72613,I3 120,No,9422919959,Private,2019-09-26 11:51:09,2019-09-26 11:58:08,0.0,...,3,5,100,0.0,0.0,55.715922,12.575446,2019-09-26 11:54:09,550062,102324
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2571451,Non_Customer,1106037,WMWXU7107KTM91384,COOPER,No,9379438620,Private,2018-06-15 06:01:07,2018-06-15 06:06:06,0.0,...,3,99,99,0.0,0.0,55.640906,12.611920,2018-06-15 06:04:07,550062,103251
2573547,Non_Customer,1114084,WMWXU7101KTM91395,COOPER,No,9379504274,Private,2018-06-15 21:18:48,2018-06-15 21:28:11,0.0,...,2,97,90,0.0,0.0,55.630224,12.648739,2018-06-15 21:20:48,550062,185203
2587868,Non_Customer,793639,WMWXR3100KTK19883,COOPER,No,9379880464,Private,2018-06-21 18:38:45,2018-06-21 19:47:16,0.0,...,28,74,72,0.0,0.0,55.674168,12.560709,2018-06-21 19:06:45,550062,102161
2589662,Non_Customer,819011,WMWXU7107KTM90591,COOPER,No,9379929925,Private,2018-06-22 12:52:55,2018-06-22 13:13:24,0.0,...,2,90,87,0.0,0.0,55.622541,12.615536,2018-06-22 12:54:55,550062,185154


In [18]:
# Equator 2
df[df.End_Lat < 5]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
0,Non_Customer,793639,WBY1Z21080V307924,I3,No,9335872135,Private,2016-03-24 11:48:43,2016-02-04 10:00:19,0.00,...,0,0,0,55.678763,12.552853,0.0,0.0,2016-03-24 11:48:43,147111,550062
1646,Customer,1030857,WBY1Z21050V307783,I3,No,9336279892,Private,2016-02-04 23:28:40,2016-03-04 03:06:32,78.57,...,15,44,0,55.667531,12.549165,0.0,0.0,2016-02-04 23:43:40,102821,550062
4685,Customer,819446,WBY1Z21050V308108,I3,No,9336476091,Private,2016-07-04 13:41:24,2016-07-04 14:27:27,20.21,...,17,100,0,55.629616,12.580891,0.0,0.0,2016-07-04 13:58:24,103291,550062
14282,Customer,1069340,WBY1Z21010V307859,I3,No,9337055461,Private,2016-04-19 21:11:17,2016-04-19 22:14:48,22.15,...,3,31,0,55.674121,12.499912,0.0,0.0,2016-04-19 21:14:17,147251,550062
19084,Customer,796025,WBY1Z21010V308073,I3,No,9337342253,Private,2016-04-26 01:39:19,2016-04-26 02:08:26,0.00,...,11,39,96,55.674215,12.560683,0.0,0.0,2016-04-26 01:50:19,102161,550062
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2627413,Customer,800699,WBY1Z21090V307804,I3,No,9344591671,Private,2016-09-22 15:42:22,2016-09-22 16:07:30,9.01,...,8,58,0,55.671578,12.574404,0.0,0.0,2016-09-22 15:50:22,102171,550062
2629163,Customer,820349,WBY1Z21000V307917,I3,No,9344701319,Private,2016-09-24 17:52:07,2016-09-24 20:38:13,45.07,...,0,75,0,55.696387,12.594131,0.0,0.0,2016-09-24 17:52:07,102336,550062
2631059,Customer,2043660,WBY1Z21070V307929,I3,No,9344825951,Private,2016-09-27 13:41:38,2016-09-27 14:59:46,28.56,...,5,85,0,55.665693,12.621481,0.0,0.0,2016-09-27 13:46:38,103151,550062
2631542,Customer,2118489,WBY1Z21090V307821,I3,No,9344857319,Private,2016-09-28 08:51:39,2016-09-28 09:39:17,27.16,...,2,33,0,55.630176,12.648790,0.0,0.0,2016-09-28 08:53:39,185203,550062


In [19]:
# Sweden and Bornholm
df[df.Start_Long > 13].sort_values(by = 'Reservation_Time')

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
733460,Customer,829813,WBY1Z21020V307840,I3,No,9331420174,Private,2015-08-12 22:04:01,2015-08-12 23:03:44,20.37,...,5,34,4,55.606053,13.025068,55.602430,12.660331,2015-08-12 22:09:01,185121,155013
351052,Customer,866569,WBA2C1106GV519875,218D,No,9335030622,Private,2016-05-03 13:45:22,2016-10-03 20:10:57,230.41,...,32,18,27,59.332089,18.051437,58.111483,12.150692,2016-05-03 14:17:22,400114,825001
766566,Non_Customer,793661,WBY1Z210X0V307892,I3,No,9354880509,Private,2017-04-19 10:49:33,2017-04-19 13:17:08,0.00,...,2,32,72,55.276373,14.802931,55.277839,14.802453,2017-04-19 10:51:33,400213,400213
766664,Non_Customer,793661,WBY1Z210X0V307892,I3,No,9354888567,Private,2017-04-19 13:20:16,2017-04-19 13:49:04,0.00,...,2,0,0,55.277839,14.802453,55.278269,14.799411,2017-04-19 13:22:16,400213,400213
766682,Non_Customer,793661,WBY1Z210X0V307892,I3,No,9354889703,Private,2017-04-19 13:49:31,2017-04-19 16:10:11,0.00,...,1,41,97,55.278269,14.799411,55.069947,14.749238,2017-04-19 13:50:31,400213,400147
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647281,Non_Customer,809751,WBY1Z21080V307793,I3,No,9358232472,Private,2017-06-19 15:36:37,2017-06-19 15:39:19,0.00,...,1,0,0,55.102019,14.690349,55.100859,14.685823,2017-06-19 15:37:37,400334,400334
647307,Non_Customer,1041552,WBY1Z6109HV939138,I3 94,No,9358233881,Private,2017-06-19 16:03:40,2017-06-19 16:08:21,0.00,...,1,0,0,55.212866,14.970356,55.100938,14.685497,2017-06-19 16:04:40,400153,400334
656179,Non_Customer,1027097,WBY1Z6104HV939144,I3 94,No,9358688456,Private,2017-06-27 12:00:46,2017-06-27 12:31:44,0.00,...,9,41,98,55.099321,14.689407,55.676149,12.560335,2017-06-27 12:09:46,400334,102162
656238,Non_Customer,1097634,WBY1Z6100HV939139,I3 94,No,9358692769,Private,2017-06-27 13:21:56,2017-06-27 13:31:38,0.00,...,3,14,14,55.100571,14.688337,55.662822,12.517809,2017-06-27 13:24:56,400334,102721


In [20]:
# Jutland
df[(df.Start_Long < 11) & (df.Start_Long > 0) ]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
496868,Non_Customer,793639,WMWXU7105KTM91318,COOPER,No,9419190956,Private,2019-08-19 08:04:36,2019-08-19 08:47:12,0.0,...,20,5,100,56.461193,10.035505,55.683235,12.585613,2019-08-19 08:24:36,730323,102223
1477935,Non_Customer,793639,WBA1R5104J5K58092,118I,No,9407257717,Private,2019-04-06 18:44:56,2019-08-06 11:45:14,0.0,...,3814,33,96,56.469710,10.054346,55.419475,11.569249,2019-04-09 10:18:56,730313,340014
1583469,Non_Customer,225179,WBY1Z21040V308181,I3,Yes,9363811912,Private,2017-09-25 13:27:44,2017-09-25 13:29:43,0.0,...,1,62,62,53.598649,9.973126,53.598649,9.973124,2017-09-25 13:28:44,540111,540111
1583651,Non_Customer,225179,WBY1Z21080V308202,I3,Yes,9363822077,Private,2017-09-25 16:13:14,2017-09-25 16:16:10,0.0,...,2,79,79,53.598743,9.973440,53.598743,9.973439,2017-09-25 16:15:14,540111,540111
1614088,Non_Customer,793639,WBA1R510XJ7B13324,118I,No,9424808121,Private,2019-03-10 06:37:12,2019-03-10 09:26:08,0.0,...,35,67,38,55.411150,10.440528,55.634374,12.650062,2019-03-10 07:12:12,461123,185121
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2571519,Non_Customer,793639,WBA1R5108J7B14309,118I,No,9379439616,Private,2018-06-14 19:04:58,2018-06-15 06:41:23,0.0,...,693,94,94,55.438697,9.308134,55.650671,12.541162,2018-06-15 06:37:58,621243,102851
2571520,Non_Customer,793639,WBA1R5108J5K58192,118I,No,9379439627,Private,2018-06-15 06:34:25,2018-06-15 06:39:50,0.0,...,4,96,96,55.437764,9.306264,55.703714,12.568429,2018-06-15 06:38:25,621243,102422
2580056,Non_Customer,793639,WMWXR3100KTK68968,COOPER,No,9379662436,Private,2018-06-18 14:22:28,2018-06-18 14:33:41,0.0,...,6,76,81,55.434890,9.296048,55.675832,12.559100,2018-06-18 14:28:28,621243,102162
2599546,Non_Customer,793661,WMWXU7108KTM91409,COOPER,No,9380185309,Private,2018-06-26 11:56:23,2018-06-26 12:43:40,0.0,...,1,80,77,55.434576,9.295855,55.692034,12.484833,2018-06-26 11:57:23,621243,102641


In [21]:
df[df.Start_Lat>0].sort_values(by = 'Start_Lat')

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
260794,Non_Customer,13487,WBAJG11020ED99355,X1 SDRIVE18I,No,9385799406,Private,2018-09-25 06:41:35,2018-09-25 16:08:42,0.00,...,245,45,31,52.662932,12.909675,54.255309,13.589357,2018-09-25 10:46:35,376111,390211
1583469,Non_Customer,225179,WBY1Z21040V308181,I3,Yes,9363811912,Private,2017-09-25 13:27:44,2017-09-25 13:29:43,0.00,...,1,62,62,53.598649,9.973126,53.598649,9.973124,2017-09-25 13:28:44,540111,540111
1583651,Non_Customer,225179,WBY1Z21080V308202,I3,Yes,9363822077,Private,2017-09-25 16:13:14,2017-09-25 16:16:10,0.00,...,2,79,79,53.598743,9.973440,53.598743,9.973439,2017-09-25 16:15:14,540111,540111
1148319,Non_Customer,793639,WBA1R5106J7B14311,118I,No,9394837953,Private,2019-02-24 21:07:00,2019-02-25 08:12:30,0.00,...,652,50,50,54.657593,11.945839,55.620649,12.606360,2019-02-25 07:59:00,376124,185154
2500919,Non_Customer,793639,WBA1R5107J5K57440,118I,No,9390733210,Private,2018-12-15 12:48:09,2018-12-16 12:07:53,0.00,...,268,63,63,54.775789,11.438965,55.620966,12.606330,2018-12-15 17:16:09,360133,185154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670333,Customer,3603143,WBA31AA08L3H87538,X1 SDRIVE18I,No,9427403373,Private,2019-10-22 11:32:19,2019-10-22 17:28:16,0.00,...,1,18,84,56.234743,8.202535,55.696808,12.527990,2019-10-22 11:33:19,760241,102542
496868,Non_Customer,793639,WMWXU7105KTM91318,COOPER,No,9419190956,Private,2019-08-19 08:04:36,2019-08-19 08:47:12,0.00,...,20,5,100,56.461193,10.035505,55.683235,12.585613,2019-08-19 08:24:36,730323,102223
1477935,Non_Customer,793639,WBA1R5104J5K58092,118I,No,9407257717,Private,2019-04-06 18:44:56,2019-08-06 11:45:14,0.00,...,3814,33,96,56.469710,10.054346,55.419475,11.569249,2019-04-09 10:18:56,730313,340014
720787,Non_Customer,855363,WMWXS5103G2B10608,COOPER,No,9329326661,Private,2015-10-21 13:13:58,2015-10-21 13:38:09,0.00,...,2,99,99,56.651368,12.857907,56.651429,12.858104,2015-10-21 13:15:58,217013,217013


In [22]:
# Car in Germany in the middle of the data..
df[df.CarID == 'WBY1Z21040V308181'].sort_values(by = 'Reservation_Time').iloc[-30:-20]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
635379,Customer,2316589,WBY1Z21040V308181,I3,No,9357695970,Private,2017-09-06 18:43:33,2017-09-06 19:03:22,57.6,...,2,0,0,55.660678,12.605743,55.581953,12.639533,2017-09-06 18:45:33,103172,155017
1583470,Non_Customer,225179,WBY1Z21040V308181,I3,Yes,9363811978,Private,2017-09-25 13:22:05,2017-09-25 13:25:43,0.0,...,3,62,62,55.437519,11.818469,53.598649,9.973126,2017-09-25 13:25:05,329031,540111
1583469,Non_Customer,225179,WBY1Z21040V308181,I3,Yes,9363811912,Private,2017-09-25 13:27:44,2017-09-25 13:29:43,0.0,...,1,62,62,53.598649,9.973126,53.598649,9.973124,2017-09-25 13:28:44,540111,540111
913719,Non_Customer,793639,WBY1Z21040V308181,I3,No,9349921108,Private,2017-10-01 18:58:33,2017-10-01 20:08:28,0.0,...,58,100,92,55.665634,12.483597,55.673764,12.543404,2017-10-01 19:56:33,102773,147131
967871,Customer,815006,WBY1Z21040V308181,I3,No,9352816246,Private,2017-10-03 07:36:26,2017-10-03 07:47:30,0.0,...,1,0,0,55.648558,12.535806,55.631752,12.599764,2017-10-03 07:37:26,102852,185143
879566,Customer,795107,WBY1Z21040V308181,I3,No,9356009438,Private,2017-10-05 09:19:51,2017-10-05 09:26:41,0.0,...,1,0,0,55.701065,12.549062,55.701086,12.549048,2017-10-05 09:20:51,102431,102431
879608,Customer,2269498,WBY1Z21040V308181,I3,No,9356010927,Private,2017-10-05 09:43:59,2017-10-05 10:16:04,57.6,...,1,0,0,55.701086,12.549048,55.734666,12.394038,2017-10-05 09:44:59,102431,151022
880152,Customer,2389680,WBY1Z21040V308181,I3,No,9356034416,Business,2017-10-05 17:20:43,2017-10-05 18:01:12,65.91,...,8,0,0,55.734666,12.394038,55.710845,12.574337,2017-10-05 17:28:43,151022,102325
636466,Customer,2403205,WBY1Z21040V308181,I3,No,9357746404,Private,2017-10-06 16:13:49,2017-10-06 16:20:49,0.0,...,1,6,99,55.581953,12.639533,55.602708,12.658814,2017-10-06 16:14:49,155017,155013
913980,Customer,1037567,WBY1Z21040V308181,I3,No,9349935922,Private,2017-11-01 08:42:28,2017-11-01 09:12:05,45.93,...,6,92,82,55.673764,12.543404,55.695418,12.595481,2017-11-01 08:48:28,147131,102336


## Weird times

In [23]:
df[df.Reservation_Time > df.End_Time].sort_values(by = 'RentalID')

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
1595034,Customer,822637,WBY1Z21040V307905,I3,No,9327538014,Private,2015-11-09 18:20:41,2015-09-13 09:49:52,2.12,...,0,64,0,55.684635,12.481256,55.689038,12.464122,2015-11-09 18:20:41,102634,175032
1595626,Customer,797054,WBY1Z21000V307755,I3,No,9327579373,Private,2015-12-09 17:24:03,2015-09-13 09:29:24,5.93,...,0,38,0,55.706262,12.589599,55.687497,12.544636,2015-12-09 17:24:03,102341,102444
1595712,Non_Customer,793664,WBY1Z21050V308237,I3,No,9327582622,Private,2015-12-09 18:52:01,2015-09-14 08:07:20,0.00,...,0,23,99,55.682745,12.488288,55.682745,12.488265,2015-12-09 18:52:01,102633,102633
1595792,Customer,812933,WBY1Z21080V307874,I3,No,9327588787,Private,2015-12-09 22:17:59,2015-09-13 12:40:45,4.37,...,8,74,71,55.688638,12.528939,55.651131,12.473902,2015-12-09 22:25:59,147161,167023
1595805,Customer,821859,WBY1Z21000V307903,I3,No,9327589840,Private,2015-12-09 23:14:51,2015-09-13 00:13:46,20.37,...,3,69,99,55.718156,12.599125,55.655766,12.500946,2015-12-09 23:17:51,102332,102752
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1368583,Non_Customer,1109469,WBY8P2103K7D82416,I3 120,No,9429719640,Private,2019-11-30 22:43:11,2019-01-12 05:35:14,0.00,...,403,6,6,55.677503,12.577796,55.680674,12.580411,2019-12-01 05:26:11,102122,102121
1370163,Non_Customer,793639,WBA31AA02L5N90401,X1 SDRIVE18I,No,9429739899,Private,2019-11-30 13:28:34,2019-01-12 15:45:03,0.00,...,1566,3,84,55.642268,12.603632,55.639966,12.600910,2019-12-01 15:34:34,103252,103252
1370256,Non_Customer,793639,WMWXU7108KTM91345,COOPER,No,9429741117,Private,2019-11-30 14:02:48,2019-01-12 16:17:30,0.00,...,1561,52,52,55.664820,12.582789,55.621618,12.606269,2019-12-01 16:03:48,103193,185154
1370262,Non_Customer,793639,WMWXU7102KTM90630,COOPER,No,9429741151,Private,2019-11-30 02:24:37,2019-01-12 16:16:36,0.00,...,2260,35,32,55.654054,12.623534,55.621639,12.606267,2019-12-01 16:04:37,103214,185154


In [24]:
df[df.CarID=='WMWXU7102KTM90630'].sort_values(by = 'Reservation_Time').iloc[-200:-180]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
1960627,Customer,3022614,WMWXU7102KTM90630,COOPER,No,9429586947,Private,2019-11-28 16:40:43,2019-11-28 16:49:34,1.87,...,2,52,50,55.703815,12.49401,55.694029,12.487516,2019-11-28 16:42:43,102612,102641
1961205,Non_Customer,793639,WMWXU7102KTM90630,COOPER,No,9429595761,Private,2019-11-28 18:21:48,2019-11-28 19:07:16,0.0,...,45,52,52,55.694029,12.487516,55.694028,12.487516,2019-11-28 19:06:48,102641,102641
1962478,Customer,3743091,WMWXU7102KTM90630,COOPER,No,9429614585,Private,2019-11-29 08:11:10,2019-11-29 08:21:18,1.07,...,6,52,52,55.694028,12.487516,55.694028,12.487516,2019-11-29 08:17:10,102641,102641
1962622,Customer,2828427,WMWXU7102KTM90630,COOPER,No,9429616323,Business,2019-11-29 08:32:30,2019-11-29 09:00:05,3.0,...,14,52,50,55.694028,12.487516,55.70986,12.532094,2019-11-29 08:46:30,102641,102533
1962794,Customer,3144384,WMWXU7102KTM90630,COOPER,No,9429619317,Private,2019-11-29 09:27:51,2019-11-29 09:34:45,2.14,...,2,52,50,55.70986,12.532094,55.700383,12.522612,2019-11-29 09:29:51,102533,102543
1962835,Customer,2769687,WMWXU7102KTM90630,COOPER,No,9429620248,Private,2019-11-29 09:37:30,2019-11-29 09:54:11,3.21,...,5,50,47,55.700383,12.522612,55.692225,12.544574,2019-11-29 09:42:30,102543,102453
1963025,Customer,3296480,WMWXU7102KTM90630,COOPER,No,9429624689,Business,2019-11-29 10:50:00,2019-11-29 12:04:08,31.27,...,2,47,50,55.692225,12.544574,55.65462,12.526675,2019-11-29 10:52:00,102453,102711
1963287,Customer,3296480,WMWXU7102KTM90630,COOPER,No,9429630366,Private,2019-11-29 12:04:51,2019-11-29 15:17:32,37.48,...,17,47,42,55.65462,12.526675,55.654183,12.5268,2019-11-29 12:21:51,102711,102711
1963920,Customer,3642388,WMWXU7102KTM90630,COOPER,No,9429641934,Private,2019-11-29 15:19:18,2019-11-29 15:46:49,2.94,...,5,42,35,55.654183,12.5268,55.685909,12.551368,2019-11-29 15:24:18,102711,102445
1964704,Customer,2543449,WMWXU7102KTM90630,COOPER,No,9429652738,Private,2019-11-29 17:49:00,2019-11-29 18:32:00,12.42,...,14,37,35,55.685909,12.551368,55.693684,12.613323,2019-11-29 18:03:00,102445,103112


## Other columns

In [25]:
df[df.Customer_Group == 'Non_Customer']

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
0,Non_Customer,793639,WBY1Z21080V307924,I3,No,9335872135,Private,2016-03-24 11:48:43,2016-02-04 10:00:19,0.0,...,0,0,0,55.678763,12.552853,0.000000,0.000000,2016-03-24 11:48:43,147111,550062
1,Non_Customer,1035973,WBY1Z21080V307857,I3,No,9336114126,Private,2016-03-30 15:37:39,2016-01-04 00:40:38,0.0,...,0,62,47,55.770626,12.519300,55.770389,12.518839,2016-03-30 15:37:39,173061,173061
2,Non_Customer,998095,WBY1Z21020V307904,I3,No,9336153910,Private,2016-03-31 13:08:16,2016-05-04 08:32:25,0.0,...,1,85,79,55.621588,12.606951,55.621532,12.606279,2016-03-31 13:09:16,185154,185154
3,Non_Customer,999604,WBY1Z21010V307926,I3,No,9336158303,Private,2016-03-31 14:43:00,2016-01-04 07:10:00,0.0,...,1,0,71,55.770077,12.518914,55.769746,12.519123,2016-03-31 14:44:00,173061,173061
4,Non_Customer,1035969,WBY1Z21070V308210,I3,No,9336160465,Private,2016-03-31 15:21:36,2016-01-04 14:24:17,0.0,...,1,53,52,55.770623,12.519791,55.770439,12.518937,2016-03-31 15:22:36,173061,173061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2633887,Non_Customer,793639,WBY1Z21050V308187,I3,No,9345006012,Private,2016-09-30 20:54:26,2016-09-30 21:04:52,0.0,...,163,100,93,55.705041,12.498220,55.688268,12.525764,2016-09-30 23:37:26,102615,147213
2633898,Non_Customer,793639,WBY1Z21060V307971,I3,No,9345006845,Private,2016-09-30 21:16:39,2016-09-30 21:32:45,0.0,...,185,60,51,55.698907,12.472528,55.684551,12.541961,2016-10-01 00:21:39,102652,147121
2633899,Non_Customer,793639,WBY1Z210X0V308007,I3,No,9345006854,Private,2016-09-30 21:16:51,2016-09-30 21:32:44,0.0,...,186,12,96,55.698951,12.472341,55.684770,12.531710,2016-10-01 00:22:51,102652,147162
2633910,Non_Customer,793639,WBY1Z21060V308084,I3,No,9345007816,Private,2016-09-30 21:45:36,2016-09-30 22:03:24,0.0,...,215,97,89,55.705516,12.482168,55.674171,12.560710,2016-10-01 01:20:36,102613,102161


In [26]:
df[df.Rental_flag == 'Yes']

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
92,Customer,1041131,WBY1Z21030V307927,I3,Yes,9336190387,Private,2016-01-04 08:14:43,2016-01-04 08:32:31,8.80,...,8,57,50,55.671970,12.561281,55.654693,12.612746,2016-01-04 08:22:43,102182,103224
162,Customer,341960,WBY1Z210X0V308248,I3,Yes,9336195348,Private,2016-01-04 09:13:18,2016-01-04 09:26:30,6.01,...,3,100,90,55.666223,12.544056,55.632389,12.574830,2016-01-04 09:16:18,102821,103291
226,Customer,19617,WBY1Z21020V308048,I3,Yes,9336201372,Private,2016-01-04 11:08:34,2016-01-04 11:43:16,15.03,...,4,71,57,55.687231,12.549290,55.692560,12.546405,2016-01-04 11:12:34,102444,102453
227,Customer,24827,WBY1Z21040V307791,I3,Yes,9336201380,Private,2016-01-04 11:08:48,2016-01-04 11:49:28,18.68,...,9,62,51,55.632424,12.644685,55.681026,12.604476,2016-01-04 11:17:48,185125,103132
298,Customer,110192,WBY1Z210X0V308072,I3,Yes,9336207064,Private,2016-01-04 12:49:24,2016-01-04 13:12:59,11.38,...,9,55,49,55.708624,12.578704,55.671733,12.539777,2016-01-04 12:58:24,102343,147131
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2633885,Customer,440147,WBY1Z21060V307954,I3,Yes,9345006009,Private,2016-09-30 20:54:26,2016-09-30 21:16:58,9.88,...,8,55,44,55.662403,12.623958,55.710676,12.566043,2016-09-30 21:02:26,103151,102412
2633903,Customer,1032015,WBY1Z21010V307814,I3,Yes,9345007233,Private,2016-09-30 21:16:24,2016-09-30 22:02:05,16.12,...,11,83,97,55.662269,12.604593,55.685559,12.586609,2016-09-30 21:27:24,103172,102223
2633919,Customer,457665,WBY1Z21060V308070,I3,Yes,9345008207,Private,2016-09-30 21:52:27,2016-09-30 22:29:45,22.67,...,1,56,25,55.630208,12.648793,55.665015,12.556939,2016-09-30 21:53:27,185203,102812
2633966,Customer,427906,WBY1Z21080V307955,I3,Yes,9345010629,Private,2016-09-30 23:02:50,2016-09-30 23:13:13,5.37,...,1,97,92,55.665828,12.565080,55.682833,12.584502,2016-09-30 23:03:50,102181,102223


In [27]:
df.sort_values(by = 'Reservation_Minutes')

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
0,Non_Customer,793639,WBY1Z21080V307924,I3,No,9335872135,Private,2016-03-24 11:48:43,2016-02-04 10:00:19,0.00,...,0,0,0,55.678763,12.552853,0.000000,0.000000,2016-03-24 11:48:43,147111,550062
984006,Customer,2151327,WBY1Z21050V307962,I3,No,9353638538,Private,2017-03-26 11:09:43,2017-03-26 15:28:17,160.00,...,0,0,0,55.669291,12.580119,55.670481,12.584305,2017-03-26 11:09:43,103192,103142
984005,Customer,2304622,WBY1Z21000V307951,I3,No,9353638526,Private,2017-03-26 10:53:47,2017-03-26 11:07:43,32.40,...,0,0,0,55.692822,12.463192,55.698181,12.458396,2017-03-26 10:53:47,175041,175043
983999,Customer,1000748,WBY1Z21090V307947,I3,No,9353638259,Private,2017-03-26 11:06:09,2017-03-26 18:43:11,359.51,...,0,0,0,55.656356,12.600851,55.672874,12.560838,2017-03-26 11:06:09,103242,102184
2259184,Customer,818884,WBY1Z21070V308188,I3,No,9377259841,Private,2018-11-05 12:32:50,2018-11-05 12:41:18,10.80,...,0,59,57,55.697268,12.543893,55.696615,12.523467,2018-11-05 12:32:50,102453,102551
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1499203,Non_Customer,793639,WBY1Z210X0V307911,I3,No,9408102693,Private,2019-05-13 13:22:19,2019-06-18 16:08:50,0.00,...,45960,0,0,55.634986,12.650334,0.000000,0.000000,2019-06-14 11:22:19,185121,550062
1508474,Non_Customer,793639,WBY1Z21030V307989,I3,No,9408401227,Private,2019-05-13 14:06:03,2019-06-18 16:08:51,0.00,...,50135,0,0,55.635009,12.650490,0.000000,0.000000,2019-06-17 09:41:03,185121,550062
1508467,Non_Customer,793639,WBY1Z21060V307999,I3,No,9408400365,Private,2019-05-13 14:05:58,2019-06-18 16:08:50,0.00,...,50136,0,0,55.634987,12.650426,0.000000,0.000000,2019-06-17 09:41:58,185121,550062
1508476,Non_Customer,793639,WBY1Z21050V307766,I3,No,9408404530,Private,2019-05-13 14:02:03,2019-06-18 16:08:51,0.00,...,50140,0,0,55.635113,12.650348,0.000000,0.000000,2019-06-17 09:42:03,185121,550062


In [28]:
df[df.CarID == 'WBY1Z21050V307993'].sort_values('Reservation_Time').iloc[-550:-540]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
1702579,Customer,3257292,WBY1Z21050V307993,I3,No,9400375651,Private,2019-02-04 18:27:14,2019-02-04 18:55:37,60.8,...,10,38,29,55.659379,12.630345,55.667792,12.545928,2019-02-04 18:37:14,103212,102821
1702760,Customer,2985859,WBY1Z21050V307993,I3,No,9400379233,Private,2019-02-04 19:16:57,2019-02-04 19:35:49,17.33,...,10,29,24,55.667792,12.545928,55.686779,12.536951,2019-02-04 19:26:57,102821,147161
1702938,Customer,828693,WBY1Z21050V307993,I3,No,9400382472,Private,2019-02-04 20:20:58,2019-02-04 20:51:08,64.0,...,11,5,100,55.686779,12.536951,55.654816,12.618666,2019-02-04 20:31:58,147161,103223
1783729,Non_Customer,1112124,WBY1Z21050V307993,I3,No,9402866073,Private,2019-02-05 00:23:35,2019-02-05 02:10:30,0.0,...,0,75,48,55.69442,12.550729,55.634472,12.648757,2019-02-05 00:23:35,102443,185121
1493665,Non_Customer,793639,WBY1Z21050V307993,I3,No,9407597629,Private,2019-02-05 14:44:54,2019-06-18 16:08:50,0.0,...,59009,0,0,55.634966,12.650418,0.0,0.0,2019-03-18 14:13:54,185121,550062
1113833,Customer,1015209,WBY1Z21050V307993,I3,No,9394090697,Private,2019-02-13 07:42:30,2019-02-13 08:13:58,22.53,...,19,58,51,55.713129,12.572044,55.701208,12.600449,2019-02-13 08:01:30,102324,102336
1114545,Customer,3219128,WBY1Z21050V307993,I3,No,9394109802,Business,2019-02-13 13:22:25,2019-02-13 13:58:53,80.0,...,2,51,39,55.701208,12.600449,55.63022,12.64893,2019-02-13 13:24:25,102336,185203
1114742,Customer,1053005,WBY1Z21050V307993,I3,No,9394115235,Private,2019-02-13 14:52:45,2019-02-13 15:23:13,43.07,...,4,39,19,55.63022,12.64893,55.679108,12.479413,2019-02-13 14:56:45,185203,102634
1115510,Customer,2658148,WBY1Z21050V307993,I3,No,9394130425,Private,2019-02-13 18:44:27,2019-02-13 19:24:02,51.2,...,8,19,10,55.679108,12.479413,55.674144,12.571492,2019-02-13 18:52:27,102634,102131
1116382,Customer,812023,WBY1Z21050V307993,I3,No,9394146782,Private,2019-02-14 05:20:48,2019-02-14 05:56:35,84.8,...,17,100,90,55.674144,12.571492,55.630231,12.64884,2019-02-14 05:37:48,102131,185203


# Create Vacancy

In [29]:
# Haversine function
def haversine(point1, point2):
    # convert decimal degrees to radians
    lat1, lon1 = map(np.radians, point1)
    lat2, lon2 = map(np.radians, point2)

    # Deltas
    delta_lon = lon2 - lon1 
    delta_lat = lat2 - lat1 
    
    # haversine formula 
    a = np.sin(delta_lat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(delta_lon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    r = 6371000 # Radius of earth in m
    return c * r

In [30]:
df.columns

Index(['Customer_Group', 'CustomerID', 'CarID', 'Engine', 'Rental_flag',
       'RentalID', 'Rental_Usage_Type', 'Reservation_Time', 'End_Time',
       'Revenue', 'Distance', 'Reservation_Minutes', 'Fuel_Start', 'Fuel_End',
       'Start_Lat', 'Start_Long', 'End_Lat', 'End_Long', 'Start_Time',
       'Start_Zone', 'End_Zone'],
      dtype='object')

In [39]:
101

101

In [35]:
df_sorted = df.sort_values("Reservation_Time")

In [None]:
data = []
for i, car in enumerate(df_sorted.CarID.unique()):
    if car == '-':
        continue
    car_sub_df = df_sorted[df_sorted.CarID == car]
    if not i%10:
        print(f'{i} cars processed' ,end="\r")
    for (_, row1), (_, row2) in zip(car_sub_df[:-1].iterrows(),car_sub_df[1:].iterrows()):
        park_time = row1['End_Time']
        reservation_time = row2['Reservation_Time']
        start_time = row2['Start_Time']
        time_to_reservation = (row2['Reservation_Time']-row1['End_Time']).total_seconds()/3600
        time_to_start = (row2['Start_Time']-row1['End_Time']).total_seconds()/3600
        park_location_lat = row1['Latitude_End']
        park_location_long = row1['Longitude_End']
        park_zone = row1['Zone_End']
        park_fuel = row1['Battery_End']
        leave_fuel = row2['Battery_Start']
        engine = row1['Enigine']
        moved = haversine(row1.loc[['Latitude_End','Longitude_End']].values, row2.loc[['Latitude_Start','Longitude_Start']].values) 
        data.append([car, park_time,reservation_time, start_time, time_to_reservation, time_to_start, park_location_lat, park_location_long, park_zone, park_fuel, leave_fuel, engine, moved])

# Create new df
df_vacancy = pd.DataFrame(data = data, columns = ['car', 'park_time', 'reservation_time', 'start_time','time_to_reservation', 'time_to_start', 'park_location_lat', 'park_location_long', 'park_zone', 'park_fuel', 'leave_fuel', 'engine' 'moved'])

# Infer types
df_vacancy = df_vacancy.convert_dtypes()