In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import glob
import numpy as np
import tqdm
import datetime
import geopandas as gpd
import rtree

In [2]:
files = glob.glob("data/raw/SNData/*.csv")

dfs = []
for f in tqdm.tqdm(files):
    dfs.append(pd.read_csv(f, header=0, sep=";"))

Full_data = pd.concat(dfs,ignore_index=True) # Save this to interim
Full_data.to_csv('data/interim/Full_data.csv')

100%|██████████| 53/53 [00:07<00:00,  7.04it/s]


In [3]:
# Drop 53 rows with na values
df = Full_data.dropna()

# Rename Columns to English
df. columns = ['Customer_Group', 'CustomerID', 'CarID', 'Engine', 'Rental_flag', 'RentalID', 'Rental_Usage_Type', 'Reservation_Time', 'End_Time', 'Revenue', 'Distance', 'Drives', 'Reservation_Minutes','Fuel_Start','Fuel_End','Start_Lat', 'Start_Long', 'End_Lat', 'End_Long']

# Drop drives as it has no info (only ones)
df = df.drop(columns = 'Drives')
df

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,Distance,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long
0,Non_Customer,793639.0,WBY1Z21080V307924,I3,No,9.335872e+09,Private,24.03.2016 11:48:43,02.04.2016 10:00:19,0.00,0,0,0,0,55.678763,12.552853,0.000000,0.000000
1,Non_Customer,1035973.0,WBY1Z21080V307857,I3,No,9.336114e+09,Private,30.03.2016 15:37:39,01.04.2016 00:40:38,0.00,0,0,62,47,55.770626,12.519300,55.770389,12.518839
2,Non_Customer,998095.0,WBY1Z21020V307904,I3,No,9.336154e+09,Private,31.03.2016 13:08:16,05.04.2016 08:32:25,0.00,2,1,85,79,55.621588,12.606951,55.621532,12.606279
3,Non_Customer,999604.0,WBY1Z21010V307926,I3,No,9.336158e+09,Private,31.03.2016 14:43:00,01.04.2016 07:10:00,0.00,0,1,0,71,55.770077,12.518914,55.769746,12.519123
4,Non_Customer,1035969.0,WBY1Z21070V308210,,No,9.336160e+09,Private,31.03.2016 15:21:36,01.04.2016 14:24:17,0.00,0,1,53,52,55.770623,12.519791,55.770439,12.518937
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2633979,Customer,1070662.0,WBY1Z21010V308185,I3,No,9.345011e+09,Private,30.09.2016 23:39:05,30.09.2016 23:50:54,5.16,4,10,46,41,55.694700,12.553776,55.678740,12.587144
2633980,Customer,1041705.0,WBY1Z21080V308250,I3,No,9.345011e+09,Private,30.09.2016 23:42:18,30.09.2016 23:52:14,3.44,6,8,59,52,55.648401,12.542945,55.641310,12.615295
2633981,Customer,2112471.0,WBY1Z21020V308261,I3,No,9.345011e+09,Private,30.09.2016 23:33:39,30.09.2016 23:52:03,8.17,9,3,39,30,55.664744,12.580875,55.719856,12.540863
2633982,Customer,440147.0,WBY1Z21060V307954,I3,Yes,9.345011e+09,Private,30.09.2016 23:41:56,30.09.2016 23:57:30,6.88,9,4,44,35,55.710676,12.566043,55.667453,12.619987


In [4]:
# Remove all rows with a CarID as it can not be used
df = df[df.CarID != '0']

In [5]:
# Engine has two types of missing values that is alligned
df["Engine"].replace({" ": '0'}, inplace=True)

In [6]:
# If a CarID already has an engine type assign that to the missing ones
Engine_dict = {c: df[df.CarID == c].Engine.nunique() for c in df[df.Engine == '0'].CarID.unique()}
for car, engine in Engine_dict.items():
    if engine == 1:
        continue
    True_Engine = [x for x in df[df.CarID == car].Engine.unique() if x!= '0'][0]
    df.loc[(df.CarID == car) & (df.Engine == '0'), 'Engine'] = True_Engine

# Populate the rest manual based on ID
df.loc[(df.CarID == 'WBA1R5104J7B14310') & (df.Engine == '0'), 'Engine'] = '118I'
df.loc[(df.CarID == 'WBA1R5104J5K58061') & (df.Engine == '0'), 'Engine'] = '118I'
df.loc[(df.CarID == 'WBA1R5103K7D66678') & (df.Engine == '0'), 'Engine'] = '118I'
df.loc[(df.CarID == 'WBY8P2105K7D70350') & (df.Engine == '0'), 'Engine'] = 'I3 120'
df.loc[(df.CarID == 'WBY8P2102K7D70287') & (df.Engine == '0'), 'Engine'] = 'I3 120'

## Times

In [7]:
df['Reservation_Time'] = pd.to_datetime(df['Reservation_Time'], format="%d.%m.%Y %H:%M:%S")
df['End_Time'] = pd.to_datetime(df['End_Time'], format="%d.%m.%Y %H:%M:%S")

## Fix 0,0 locations

We also accept the other ones outside Copenhagen as the cars must have been there. They can be removed in the vacancy dataset

In [8]:
for i, row in df[(df.Start_Lat < 5)].iterrows():
    # Skip if first instance as it will unaffect vacancy
    sub_df = df[df.CarID == row.CarID].sort_values('RentalID')
    err_index = sub_df.index.get_loc(i)
    if err_index == 0:
        continue

    # Populate based on previous end 
    df.loc[i, ['Start_Lat', 'Start_Long']] = sub_df.iloc[err_index-1].loc[['End_Lat','End_Long']].values

In [156]:
for i, row in df[(df.End_Lat < 5)].iterrows():
    sub_df = df[df.CarID == row.CarID].sort_values('RentalID')
    err_index = sub_df.index.get_loc(i)

    # Will fail if last index
    try:
        df.loc[i, ['End_Lat', 'End_Long']] = sub_df.iloc[err_index+1].loc[['Start_Lat','Start_Long']].values
    except:
        continue
    

## Add zones

In [157]:
# Load shapefile and set projection
shapefile = gpd.read_file("../Zonekort/LTM_Zone3/zones_level3.shp")
shapefile = shapefile.to_crs(epsg=4326)

In [158]:
# Create a geoDF with geometry as starting point
gdf_start = gpd.GeoDataFrame(df, geometry= gpd.points_from_xy(df.Start_Long, df.Start_Lat))

# Set projection
gdf_start = gdf_start.set_crs(epsg=4326)

In [159]:
# Populate zones based on which zone they are within
gdpj_start  = gpd.sjoin(gdf_start, shapefile, op='within')
df['Start_Zone'] = gdpj_start.zoneid

In [160]:
# Populate the rest based on which zone they are closest too
Start_zone_filler = {x: shapefile.zoneid[shapefile.distance(df.loc[x].geometry).sort_values().index[0]] for x in df.index[df['Start_Zone'].isna()]}
df['Start_Zone'] = df['Start_Zone'].fillna(Start_zone_filler)


  Start_zone_filler = {x: shapefile.zoneid[shapefile.distance(df.loc[x].geometry).sort_values().index[0]] for x in df.index[df['Start_Zone'].isna()]}


In [161]:
# Create a geoDF with geometry as end point
gdf_end = gpd.GeoDataFrame(df, geometry= gpd.points_from_xy(df.End_Long, df.End_Lat))

# Set projection
gdf_end = gdf_end.set_crs(epsg=4326)

In [162]:
# Populate zones based on which zone they are within
gdpj_end  = gpd.sjoin(gdf_end, shapefile, op='within')
df['End_Zone'] = gdpj_end.zoneid

In [163]:
# Populate the rest based on which zone they are closest too
End_zone_filler = {x: shapefile.zoneid[shapefile.distance(df.loc[x].geometry).sort_values().index[0]] for x in df.index[df['End_Zone'].isna()]}
df['End_Zone'] = df['End_Zone'].fillna(End_zone_filler)


  End_zone_filler = {x: shapefile.zoneid[shapefile.distance(df.loc[x].geometry).sort_values().index[0]] for x in df.index[df['End_Zone'].isna()]}


In [164]:
# Remove geomery type and make IDs int columns
df = df.drop(columns = 'geometry')
df = df.astype({'CustomerID': 'int32', 'RentalID': 'int64', 'Start_Zone': 'int32','End_Zone': 'int32'})

In [165]:
# Check types
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2630477 entries, 0 to 2633983
Data columns (total 21 columns):
 #   Column               Dtype         
---  ------               -----         
 0   Customer_Group       object        
 1   CustomerID           int32         
 2   CarID                object        
 3   Engine               object        
 4   Rental_flag          object        
 5   RentalID             int64         
 6   Rental_Usage_Type    object        
 7   Reservation_Time     datetime64[ns]
 8   End_Time             datetime64[ns]
 9   Revenue              float64       
 10  Distance             int64         
 11  Reservation_Minutes  int64         
 12  Fuel_Start           int64         
 13  Fuel_End             int64         
 14  Start_Lat            float64       
 15  Start_Long           float64       
 16  End_Lat              float64       
 17  End_Long             float64       
 18  Start_Time           datetime64[ns]
 19  Start_Zone           

In [11]:
# Sweden and Bornholm
#df[df.Start_Long > 13].sort_values(by = 'Reservation_Time')

In [10]:
# Jutland
#df[(df.Start_Long < 11) & (df.Start_Long > 0) ]

In [9]:
#df[df.Start_Lat>0].sort_values(by = 'Start_Lat')

In [12]:
# Car in Germany in the middle of the data..
#df[df.CarID == 'WBY1Z21040V308181'].sort_values(by = 'Reservation_Time').iloc[-30:-20]

## Weird times

In [21]:
# Winter Time
WinterTimeIndex = df[(df.Reservation_Time > df.End_Time) & (df.End_Time.apply(lambda x: x.month) == 10) & (tmp.End_Time.apply(lambda x: x.hour) < 4)].index
WinterTimeIndexBack = [2179859, 2179865, 1683947, 1683948]
WinterTimeIndexForward = [x for x in WinterTimeIndex if x not in WinterTimeIndexBack]
df.loc[WinterTimeIndexBack, 'Reservation_Time'] = df.loc[WinterTimeIndexBack, 'Reservation_Time'] - pd.to_timedelta(1,'h')
df.loc[WinterTimeIndexForward, 'End_Time'] = df.loc[WinterTimeIndexForward, 'End_Time'] + pd.to_timedelta(1,'h')

In [22]:
# How should we treat:
df.loc[[2186270,2186286,2186449]]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,Distance,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long
2186270,Customer,2146082.0,WMWXU7108KTM90583,COOPER,No,9387938000.0,Private,2018-10-30 14:29:13,2018-10-30 14:33:13,3.2,0,4,0,0,55.662179,12.534083,55.662179,12.534083
2186286,Customer,2146082.0,WMWXU7108KTM90583,COOPER,No,9387938000.0,Business,2018-10-30 14:33:53,2018-10-30 14:42:56,13.87,2,1,30,30,55.662179,12.534083,55.669684,12.546812
2186449,Customer,2146082.0,WMWXU7108KTM90583,COOPER,No,9387941000.0,Business,2018-10-30 15:27:20,2018-10-30 15:38:09,8.67,1,6,30,27,55.669684,12.546812,55.664356,12.538918


In [27]:
tmp = df[df.Reservation_Time > df.End_Time].sort_values(by = 'RentalID') # Check dates for apattern
tmp[(tmp.End_Time.apply(lambda x: x.month) == 10) & (tmp.End_Time.apply(lambda x: x.hour) < 4)] #Slut på levetid, 

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,Distance,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long


In [12]:
df.loc[WinterTimeIndexBack, 'Reservation_Time']

2179859   2018-10-28 01:53:33
2179865   2018-10-28 01:56:50
1683947   2019-10-27 01:51:34
1683948   2019-10-27 01:51:34
Name: Reservation_Time, dtype: datetime64[ns]

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2630477 entries, 0 to 2633983
Data columns (total 18 columns):
 #   Column               Dtype         
---  ------               -----         
 0   Customer_Group       object        
 1   CustomerID           float64       
 2   CarID                object        
 3   Engine               object        
 4   Rental_flag          object        
 5   RentalID             float64       
 6   Rental_Usage_Type    object        
 7   Reservation_Time     datetime64[ns]
 8   End_Time             datetime64[ns]
 9   Revenue              float64       
 10  Distance             int64         
 11  Reservation_Minutes  int64         
 12  Fuel_Start           int64         
 13  Fuel_End             int64         
 14  Start_Lat            float64       
 15  Start_Long           float64       
 16  End_Lat              float64       
 17  End_Long             float64       
dtypes: datetime64[ns](2), float64(7), int64(4), object(5)
memory u

In [214]:
# Backwards one hour with reserv and start 2179859, 2179865, 1683947, 1683948
subdf = df[df.CarID == 'WBA1R5109J7B13377'].sort_values(by = 'RentalID')
subdf[(subdf.Reservation_Time.apply(lambda x: x.month) == 10) & (subdf.Reservation_Time.apply(lambda x: x.day) > 26) & (subdf.Reservation_Time.apply(lambda x: x.year) == 2019)]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
1683948,Non_Customer,1120958,WBA1R5109J7B13377,118I,No,9427691242,Private,2019-10-27 02:51:34,2019-10-27 02:25:31,0.0,...,2,96,94,55.647926,12.268436,55.679345,12.573714,2019-10-27 02:53:34,169042,102141
1683975,Customer,3227619,WBA1R5109J7B13377,118I,No,9427691709,Private,2019-10-27 02:36:02,2019-10-27 02:49:43,35.2,...,3,94,94,55.679345,12.573714,55.666416,12.522672,2019-10-27 02:39:02,102141,102722
1684307,Customer,848621,WBA1R5109J7B13377,118I,No,9427697944,Private,2019-10-27 09:53:16,2019-10-27 10:14:20,32.0,...,3,94,92,55.666416,12.522672,55.708059,12.593211,2019-10-27 09:56:16,102722,102333
1684654,Customer,3713430,WBA1R5109J7B13377,118I,No,9427704141,Private,2019-10-27 11:46:01,2019-10-27 12:20:36,10.0,...,8,90,92,55.708059,12.593211,55.75432,12.519248,2019-10-27 11:54:01,102333,157262
1684940,Customer,3484267,WBA1R5109J7B13377,118I,No,9427708547,Private,2019-10-27 13:21:55,2019-10-27 13:43:05,32.0,...,2,92,90,55.75432,12.519248,55.78148,12.511751,2019-10-27 13:23:55,157262,173042
1685863,Customer,1037492,WBA1R5109J7B13377,118I,No,9427722902,Private,2019-10-27 18:04:57,2019-10-27 18:49:02,45.07,...,18,90,88,55.78148,12.511751,55.676181,12.55993,2019-10-27 18:22:57,173042,102162
1686066,Customer,1060059,WBA1R5109J7B13377,118I,No,9427726626,Private,2019-10-27 19:50:50,2019-10-27 20:00:48,15.6,...,1,88,86,55.676181,12.55993,55.650613,12.548528,2019-10-27 19:51:50,102162,102851
1686766,Customer,2534937,WBA1R5109J7B13377,118I,No,9427740479,Private,2019-10-28 07:05:12,2019-10-28 08:13:52,58.4,...,46,86,82,55.650613,12.548528,55.648406,12.469644,2019-10-28 07:51:12,102851,167023
1687501,Customer,1063324,WBA1R5109J7B13377,118I,No,9427767697,Private,2019-10-28 13:30:01,2019-10-28 14:13:54,1.6,...,18,84,82,55.648406,12.469644,55.677855,12.541781,2019-10-28 13:48:01,167023,147122
1687681,Customer,2531834,WBA1R5109J7B13377,118I,No,9427773073,Business,2019-10-28 15:16:16,2019-10-28 15:39:58,64.0,...,5,82,80,55.677855,12.541781,55.668484,12.48573,2019-10-28 15:21:16,147122,102773


In [176]:
# WBY1Z21030V308205 looks like +2 hours
subdf = df[df.CarID == 'WBY1Z21080V308085'].sort_values(by = 'RentalID')
subdf

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
1969968,Non_Customer,793664,WBY1Z21080V308085,I3,No,9326992555,Private,2015-08-30 17:00:49,2015-08-30 17:01:14,0.00,...,1,73,73,55.692053,12.617602,55.692051,12.617616,2015-08-30 17:01:49,103112,103112
1592248,Non_Customer,809751,WBY1Z21080V308085,I3,No,9327233967,Private,2015-09-04 21:37:34,2015-09-04 22:04:44,0.00,...,0,70,61,55.692051,12.617615,55.674157,12.560719,2015-09-04 21:37:34,103112,102161
1592265,Customer,710156,WBY1Z21080V308085,I3,No,9327235115,Private,2015-09-04 22:09:39,2015-09-04 22:28:36,0.00,...,0,61,53,55.674157,12.560719,55.636744,12.647062,2015-09-04 22:09:39,102161,185122
1592833,Customer,805877,WBY1Z21080V308085,I3,No,9327295583,Private,2015-09-06 14:22:39,2015-09-06 14:25:48,1.69,...,6,52,52,55.636744,12.647062,55.638541,12.646269,2015-09-06 14:28:39,185122,185122
1592859,Customer,805877,WBY1Z21080V308085,I3,No,9327297369,Private,2015-09-06 15:15:56,2015-09-06 15:18:04,1.27,...,1,51,50,55.638541,12.646269,55.637998,12.641161,2015-09-06 15:16:56,185122,185124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1844886,Customer,3463496,WBY1Z21080V308085,I3,No,9404208117,Private,2019-05-21 13:55:06,2019-05-21 14:26:55,22.00,...,2,66,59,55.634159,12.649971,55.677970,12.561133,2019-05-21 13:57:06,185121,102162
1845739,Customer,2915996,WBY1Z21080V308085,I3,No,9404225216,Private,2019-05-21 18:02:02,2019-05-21 18:29:05,67.20,...,7,46,46,55.677970,12.561133,55.786010,12.521652,2019-05-21 18:09:02,102162,173041
1845894,Customer,2465881,WBY1Z21080V308085,I3,No,9404228386,Private,2019-05-21 18:44:53,2019-05-21 19:22:04,34.80,...,9,46,6,55.786010,12.521652,55.622079,12.574919,2019-05-21 18:53:53,173041,103292
1846695,Non_Customer,1109469,WBY1Z21080V308085,I3,No,9404246570,Private,2019-05-21 19:22:09,2019-05-22 05:29:55,0.00,...,579,100,93,55.622079,12.574919,55.634534,12.648994,2019-05-22 05:01:09,103292,185121


In [139]:
tmp2 = df[df.Reservation_Time+pd.to_timedelta(7,'d') < df.End_Time].sort_values(by = 'RentalID')
#tmp2[tmp2.Customer_Group == 'Customer'] # 1152167 of these should be removed 
tmp2.iloc[:20]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,Distance,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time
1969590,Non_Customer,790031.0,WBY1Z21090V307818,I3,No,9326442000.0,Private,2015-08-17 13:59:14,2015-08-27 14:48:58,0.0,334,0,16,99,55.726729,12.582682,55.671036,12.583519,2015-08-17 13:59:14
1591238,Non_Customer,808838.0,WBY1Z210X0V307780,I3,No,9327127000.0,Private,2015-09-02 16:28:12,2015-09-13 12:19:27,0.0,351,0,3,99,55.70636,12.529268,55.674361,12.560594,2015-09-02 16:28:12
1593151,Non_Customer,817026.0,WBY1Z21060V308053,I3,No,9327351000.0,Private,2015-09-07 17:30:34,2015-09-24 14:08:26,0.0,581,0,19,99,55.674417,12.56056,55.676246,12.559132,2015-09-07 17:30:34
1596004,Non_Customer,808838.0,WBY1Z210X0V307780,I3,No,9327605000.0,Private,2015-09-13 13:12:02,2015-09-29 09:57:05,0.0,499,0,9,99,55.674361,12.560594,55.737039,12.477291,2015-09-13 13:12:02
727799,Non_Customer,793639.0,WBY1Z21060V307811,I3,No,9330365000.0,Private,2015-11-14 16:45:08,2015-12-02 12:03:44,0.0,0,0,30,30,55.679698,12.609623,55.674791,12.605215,2015-11-14 16:45:08
733006,Non_Customer,793639.0,WBY1Z21050V308058,I3,No,9331390000.0,Private,2015-12-08 09:29:30,2015-12-16 15:03:55,0.0,14,0,55,34,55.684804,12.537323,55.769769,12.519382,2015-12-08 09:29:30
734244,Non_Customer,793639.0,WBY1Z21050V308111,I3,No,9331489000.0,Private,2015-12-10 13:47:52,2015-12-18 16:06:27,0.0,0,0,6,77,55.676097,12.56057,55.620474,12.607469,2015-12-10 13:47:52
734806,Non_Customer,793639.0,WBY1Z21040V307953,I3,No,9331523000.0,Private,2015-12-11 09:52:45,2015-12-18 16:02:21,0.0,6,0,2,99,55.730663,12.361931,55.621929,12.604761,2015-12-11 09:52:45
0,Non_Customer,793639.0,WBY1Z21080V307924,I3,No,9335872000.0,Private,2016-03-24 11:48:43,2016-04-02 10:00:19,0.0,0,0,0,0,55.678763,12.552853,0.0,0.0,2016-03-24 11:48:43
7934,Non_Customer,1003492.0,WBY1Z21050V307976,I3,No,9336676000.0,Private,2016-04-11 17:19:33,2016-04-21 14:22:45,0.0,2,0,51,97,55.694098,12.583355,55.621629,12.606532,2016-04-11 17:19:33


In [141]:
subdf = df[df.CarID == 'WBY1Z210X0V307780'].sort_values(by = 'RentalID')
subdf.iloc[:20]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,Distance,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time
1969578,Customer,786735.0,WBY1Z210X0V307780,I3,No,9325982000.0,Private,2015-08-06 09:06:22,2015-08-06 10:15:56,0.0,0,0,85,90,55.6761,12.5683,55.4379,11.812772,2015-08-06 09:06:22
1591238,Non_Customer,808838.0,WBY1Z210X0V307780,I3,No,9327127000.0,Private,2015-09-02 16:28:12,2015-09-13 12:19:27,0.0,351,0,3,99,55.70636,12.529268,55.674361,12.560594,2015-09-02 16:28:12
1596004,Non_Customer,808838.0,WBY1Z210X0V307780,I3,No,9327605000.0,Private,2015-09-13 13:12:02,2015-09-29 09:57:05,0.0,499,0,9,99,55.674361,12.560594,55.737039,12.477291,2015-09-13 13:12:02
707473,Non_Customer,793639.0,WBY1Z210X0V307780,I3,No,9328508000.0,Private,2015-10-02 18:36:23,2015-10-02 18:58:36,0.0,10,1457,35,26,55.737039,12.477291,55.670974,12.529543,2015-10-03 18:53:23
707510,Non_Customer,811074.0,WBY1Z210X0V307780,I3,No,9328509000.0,Private,2015-10-02 19:06:45,2015-10-02 19:08:19,0.0,0,1,26,26,55.670974,12.529543,55.671026,12.529435,2015-10-02 19:07:45
709782,Non_Customer,793639.0,WBY1Z210X0V307780,I3,No,9328631000.0,Private,2015-10-05 17:53:02,2015-10-05 17:55:05,0.0,0,99,24,23,55.671026,12.529435,55.670976,12.529522,2015-10-05 19:32:02
711484,Customer,792113.0,WBY1Z210X0V307780,I3,No,9328756000.0,Private,2015-10-08 14:10:27,2015-10-08 14:22:32,5.51,4,2,99,94,55.670976,12.529522,55.684697,12.500431,2015-10-08 14:12:27
711794,Customer,826445.0,WBY1Z210X0V307780,I3,No,9328771000.0,Private,2015-10-08 19:49:27,2015-10-08 20:07:12,7.47,4,6,94,86,55.684697,12.500431,55.684262,12.503581,2015-10-08 19:55:27
712120,Customer,854069.0,WBY1Z210X0V307780,I3,No,9328795000.0,Private,2015-10-09 11:08:44,2015-10-09 11:21:53,5.93,3,6,86,81,55.684262,12.503581,55.684252,12.503593,2015-10-09 11:14:44
712404,Customer,813358.0,WBY1Z210X0V307780,I3,No,9328814000.0,Private,2015-10-09 17:19:28,2015-10-09 17:43:13,5.93,10,4,81,70,55.684252,12.503593,55.728324,12.56868,2015-10-09 17:23:28


In [129]:
# Times are really broken
subdf = df[df.CarID == 'WBA1R5106J7B13384'].sort_values(by = 'RentalID')
subdf.iloc[1534:1600] # log-error

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,Distance,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time
1123417,Customer,812232.0,WBA1R5106J7B13384,118I,No,9.394313e+09,Private,2019-02-16 13:12:15,2019-02-16 17:18:48,386.13,34,27,71,65,55.703862,12.577457,55.707539,12.580102,2019-02-16 13:39:15
1124125,Customer,3099942.0,WBA1R5106J7B13384,118I,No,9.394326e+09,Private,2019-02-16 17:19:06,2019-02-16 17:42:17,64.00,4,3,67,63,55.707539,12.580102,55.686772,12.567625,2019-02-16 17:22:06
1124303,Customer,2116648.0,WBA1R5106J7B13384,118I,No,9.394329e+09,Private,2019-02-16 17:58:08,2019-02-16 18:20:01,12.13,3,15,65,63,55.686772,12.567625,55.703331,12.568661,2019-02-16 18:13:08
1124910,Customer,3227456.0,WBA1R5106J7B13384,118I,No,9.394340e+09,Private,2019-02-16 21:07:29,2019-02-16 21:46:25,48.00,14,25,65,59,55.703331,12.568661,55.733310,12.444893,2019-02-16 21:32:29
1125176,Customer,3259145.0,WBA1R5106J7B13384,118I,No,9.394344e+09,Private,2019-02-16 23:20:09,2019-02-17 05:43:46,276.80,179,15,25,38,55.733310,12.444893,55.708182,12.475597,2019-02-16 23:35:09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1148162,Customer,3141661.0,WBA1R5106J7B13384,118I,No,9.394834e+09,Private,2019-02-25 05:55:32,2019-02-25 07:20:18,28.80,16,61,76,73,55.675255,12.546721,55.738907,12.392924,2019-02-25 06:56:32
1149678,Customer,3225646.0,WBA1R5106J7B13384,118I,No,9.394879e+09,Business,2019-02-25 17:01:04,2019-02-25 17:55:15,53.60,22,22,73,71,55.738907,12.392924,55.700799,12.595648,2019-02-25 17:23:04
1150983,Customer,3225646.0,WBA1R5106J7B13384,118I,No,9.394907e+09,Business,2019-02-26 07:47:45,2019-02-26 08:52:58,127.20,20,27,71,67,55.700663,12.595599,55.737304,12.388996,2019-02-26 08:14:45
1152167,Customer,3155921.0,WBA1R5106J7B13384,118I,No,9.394938e+09,Private,2019-02-16 09:41:20,2019-02-26 16:28:08,308.53,0,13,0,0,55.688568,12.564169,0.000000,0.000000,2019-02-16 09:54:20


In [27]:
df[df.CarID=='WMWXU7102KTM90630'].sort_values(by = 'Reservation_Time').iloc[-200:-180]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
1960627,Customer,3022614,WMWXU7102KTM90630,COOPER,No,9429586947,Private,2019-11-28 16:40:43,2019-11-28 16:49:34,1.87,...,2,52,50,55.703815,12.49401,55.694029,12.487516,2019-11-28 16:42:43,102612,102641
1961205,Non_Customer,793639,WMWXU7102KTM90630,COOPER,No,9429595761,Private,2019-11-28 18:21:48,2019-11-28 19:07:16,0.0,...,45,52,52,55.694029,12.487516,55.694028,12.487516,2019-11-28 19:06:48,102641,102641
1962478,Customer,3743091,WMWXU7102KTM90630,COOPER,No,9429614585,Private,2019-11-29 08:11:10,2019-11-29 08:21:18,1.07,...,6,52,52,55.694028,12.487516,55.694028,12.487516,2019-11-29 08:17:10,102641,102641
1962622,Customer,2828427,WMWXU7102KTM90630,COOPER,No,9429616323,Business,2019-11-29 08:32:30,2019-11-29 09:00:05,3.0,...,14,52,50,55.694028,12.487516,55.70986,12.532094,2019-11-29 08:46:30,102641,102533
1962794,Customer,3144384,WMWXU7102KTM90630,COOPER,No,9429619317,Private,2019-11-29 09:27:51,2019-11-29 09:34:45,2.14,...,2,52,50,55.70986,12.532094,55.700383,12.522612,2019-11-29 09:29:51,102533,102543
1962835,Customer,2769687,WMWXU7102KTM90630,COOPER,No,9429620248,Private,2019-11-29 09:37:30,2019-11-29 09:54:11,3.21,...,5,50,47,55.700383,12.522612,55.692225,12.544574,2019-11-29 09:42:30,102543,102453
1963025,Customer,3296480,WMWXU7102KTM90630,COOPER,No,9429624689,Business,2019-11-29 10:50:00,2019-11-29 12:04:08,31.27,...,2,47,50,55.692225,12.544574,55.65462,12.526675,2019-11-29 10:52:00,102453,102711
1963287,Customer,3296480,WMWXU7102KTM90630,COOPER,No,9429630366,Private,2019-11-29 12:04:51,2019-11-29 15:17:32,37.48,...,17,47,42,55.65462,12.526675,55.654183,12.5268,2019-11-29 12:21:51,102711,102711
1963920,Customer,3642388,WMWXU7102KTM90630,COOPER,No,9429641934,Private,2019-11-29 15:19:18,2019-11-29 15:46:49,2.94,...,5,42,35,55.654183,12.5268,55.685909,12.551368,2019-11-29 15:24:18,102711,102445
1964704,Customer,2543449,WMWXU7102KTM90630,COOPER,No,9429652738,Private,2019-11-29 17:49:00,2019-11-29 18:32:00,12.42,...,14,37,35,55.685909,12.551368,55.693684,12.613323,2019-11-29 18:03:00,102445,103112


In [118]:
ttt = df

In [121]:
#ttt.loc[1152167]
sub_df = df[df.CarID == 'WBA1R5106J7B13384'].sort_values('RentalID')
err_index = sub_df.index.get_loc(1152167)
err_index

1598

In [123]:
sub_df.iloc[err_index+1].loc[['Start_Lat','Start_Long']].values

array([55.737304, 12.388996], dtype=object)

In [None]:
for i, row in df[(df.End_Lat < 5)].iterrows():
    sub_df = df[df.CarID == row.CarID].sort_values('RentalID')
    err_index = sub_df.index.get_loc(i)

    # Will fail if last index
    try:
        df.loc[i, ['End_Lat', 'End_Long']] = sub_df.iloc[err_index+1].loc[['Start_Lat','Start_Long']].values
    except:
        continue
    

## Other columns

In [28]:
df[df.Customer_Group == 'Non_Customer']

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
0,Non_Customer,793639,WBY1Z21080V307924,I3,No,9335872135,Private,2016-03-24 11:48:43,2016-02-04 10:00:19,0.0,...,0,0,0,55.678763,12.552853,0.000000,0.000000,2016-03-24 11:48:43,147111,550062
1,Non_Customer,1035973,WBY1Z21080V307857,I3,No,9336114126,Private,2016-03-30 15:37:39,2016-01-04 00:40:38,0.0,...,0,62,47,55.770626,12.519300,55.770389,12.518839,2016-03-30 15:37:39,173061,173061
2,Non_Customer,998095,WBY1Z21020V307904,I3,No,9336153910,Private,2016-03-31 13:08:16,2016-05-04 08:32:25,0.0,...,1,85,79,55.621588,12.606951,55.621532,12.606279,2016-03-31 13:09:16,185154,185154
3,Non_Customer,999604,WBY1Z21010V307926,I3,No,9336158303,Private,2016-03-31 14:43:00,2016-01-04 07:10:00,0.0,...,1,0,71,55.770077,12.518914,55.769746,12.519123,2016-03-31 14:44:00,173061,173061
4,Non_Customer,1035969,WBY1Z21070V308210,I3,No,9336160465,Private,2016-03-31 15:21:36,2016-01-04 14:24:17,0.0,...,1,53,52,55.770623,12.519791,55.770439,12.518937,2016-03-31 15:22:36,173061,173061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2633887,Non_Customer,793639,WBY1Z21050V308187,I3,No,9345006012,Private,2016-09-30 20:54:26,2016-09-30 21:04:52,0.0,...,163,100,93,55.705041,12.498220,55.688268,12.525764,2016-09-30 23:37:26,102615,147213
2633898,Non_Customer,793639,WBY1Z21060V307971,I3,No,9345006845,Private,2016-09-30 21:16:39,2016-09-30 21:32:45,0.0,...,185,60,51,55.698907,12.472528,55.684551,12.541961,2016-10-01 00:21:39,102652,147121
2633899,Non_Customer,793639,WBY1Z210X0V308007,I3,No,9345006854,Private,2016-09-30 21:16:51,2016-09-30 21:32:44,0.0,...,186,12,96,55.698951,12.472341,55.684770,12.531710,2016-10-01 00:22:51,102652,147162
2633910,Non_Customer,793639,WBY1Z21060V308084,I3,No,9345007816,Private,2016-09-30 21:45:36,2016-09-30 22:03:24,0.0,...,215,97,89,55.705516,12.482168,55.674171,12.560710,2016-10-01 01:20:36,102613,102161


In [29]:
df[df.Rental_flag == 'Yes']

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
92,Customer,1041131,WBY1Z21030V307927,I3,Yes,9336190387,Private,2016-01-04 08:14:43,2016-01-04 08:32:31,8.80,...,8,57,50,55.671970,12.561281,55.654693,12.612746,2016-01-04 08:22:43,102182,103224
162,Customer,341960,WBY1Z210X0V308248,I3,Yes,9336195348,Private,2016-01-04 09:13:18,2016-01-04 09:26:30,6.01,...,3,100,90,55.666223,12.544056,55.632389,12.574830,2016-01-04 09:16:18,102821,103291
226,Customer,19617,WBY1Z21020V308048,I3,Yes,9336201372,Private,2016-01-04 11:08:34,2016-01-04 11:43:16,15.03,...,4,71,57,55.687231,12.549290,55.692560,12.546405,2016-01-04 11:12:34,102444,102453
227,Customer,24827,WBY1Z21040V307791,I3,Yes,9336201380,Private,2016-01-04 11:08:48,2016-01-04 11:49:28,18.68,...,9,62,51,55.632424,12.644685,55.681026,12.604476,2016-01-04 11:17:48,185125,103132
298,Customer,110192,WBY1Z210X0V308072,I3,Yes,9336207064,Private,2016-01-04 12:49:24,2016-01-04 13:12:59,11.38,...,9,55,49,55.708624,12.578704,55.671733,12.539777,2016-01-04 12:58:24,102343,147131
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2633885,Customer,440147,WBY1Z21060V307954,I3,Yes,9345006009,Private,2016-09-30 20:54:26,2016-09-30 21:16:58,9.88,...,8,55,44,55.662403,12.623958,55.710676,12.566043,2016-09-30 21:02:26,103151,102412
2633903,Customer,1032015,WBY1Z21010V307814,I3,Yes,9345007233,Private,2016-09-30 21:16:24,2016-09-30 22:02:05,16.12,...,11,83,97,55.662269,12.604593,55.685559,12.586609,2016-09-30 21:27:24,103172,102223
2633919,Customer,457665,WBY1Z21060V308070,I3,Yes,9345008207,Private,2016-09-30 21:52:27,2016-09-30 22:29:45,22.67,...,1,56,25,55.630208,12.648793,55.665015,12.556939,2016-09-30 21:53:27,185203,102812
2633966,Customer,427906,WBY1Z21080V307955,I3,Yes,9345010629,Private,2016-09-30 23:02:50,2016-09-30 23:13:13,5.37,...,1,97,92,55.665828,12.565080,55.682833,12.584502,2016-09-30 23:03:50,102181,102223


In [30]:
for i,x in enumerate((df.sort_values(by = 'Reservation_Minutes', ascending=False).Customer_Group == 'Customer').values):
    if x:
        print(i)
        break

40


In [31]:
df.sort_values(by = 'Reservation_Minutes', ascending=False).iloc[:42]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
1493665,Non_Customer,793639,WBY1Z21050V307993,I3,No,9407597629,Private,2019-02-05 14:44:54,2019-06-18 16:08:50,0.0,...,59009,0,0,55.634966,12.650418,0.0,0.0,2019-03-18 14:13:54,185121,550062
1508476,Non_Customer,793639,WBY1Z21050V307766,I3,No,9408404530,Private,2019-05-13 14:02:03,2019-06-18 16:08:51,0.0,...,50140,0,0,55.635113,12.650348,0.0,0.0,2019-06-17 09:42:03,185121,550062
1508467,Non_Customer,793639,WBY1Z21060V307999,I3,No,9408400365,Private,2019-05-13 14:05:58,2019-06-18 16:08:50,0.0,...,50136,0,0,55.634987,12.650426,0.0,0.0,2019-06-17 09:41:58,185121,550062
1508474,Non_Customer,793639,WBY1Z21030V307989,I3,No,9408401227,Private,2019-05-13 14:06:03,2019-06-18 16:08:51,0.0,...,50135,0,0,55.635009,12.65049,0.0,0.0,2019-06-17 09:41:03,185121,550062
1499203,Non_Customer,793639,WBY1Z210X0V307911,I3,No,9408102693,Private,2019-05-13 13:22:19,2019-06-18 16:08:50,0.0,...,45960,0,0,55.634986,12.650334,0.0,0.0,2019-06-14 11:22:19,185121,550062
1499161,Non_Customer,793639,WBY1Z21080V308040,I3,No,9408101113,Private,2019-05-13 13:19:56,2019-06-18 16:08:51,0.0,...,45949,0,0,55.634991,12.650396,0.0,0.0,2019-06-14 11:08:56,185121,550062
1511323,Non_Customer,793639,WBY1Z21060V308098,I3,No,9408845467,Private,2019-05-27 00:54:05,2019-06-18 16:08:49,0.0,...,32225,0,0,55.634453,12.649101,0.0,0.0,2019-06-18 09:59:05,185121,550062
1688993,Non_Customer,793639,WBY1Z21090V308094,I3,No,9427800504,Private,2019-07-10 03:58:33,2019-10-29 16:50:51,0.0,...,31834,5,0,55.71624,12.56701,0.0,0.0,2019-08-01 06:32:33,102412,550062
1511307,Non_Customer,793639,WBY1Z210X0V308007,I3,No,9408845049,Private,2019-05-27 09:11:10,2019-06-18 16:08:49,0.0,...,31722,0,0,55.635099,12.650519,0.0,0.0,2019-06-18 09:53:10,185121,550062
1763108,Non_Customer,793639,WBA1R5104J5K58061,118I,No,9402424970,Private,2019-03-04 19:56:41,2019-04-25 15:36:02,0.0,...,31069,0,0,55.677168,12.580413,0.0,0.0,2019-03-26 09:45:41,102111,550062


In [32]:
(df.sort_values(by = 'Reservation_Minutes', ascending=False).Customer_Group == 'Customer').values

array([False, False, False, ..., False,  True, False])

In [33]:
df[df.CarID == 'WBY1Z21050V307993'].sort_values('Reservation_Time').iloc[-550:-540]
#df[df.CarID == 'WBY1Z21050V307993'].loc[1493665]

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
1702579,Customer,3257292,WBY1Z21050V307993,I3,No,9400375651,Private,2019-02-04 18:27:14,2019-02-04 18:55:37,60.8,...,10,38,29,55.659379,12.630345,55.667792,12.545928,2019-02-04 18:37:14,103212,102821
1702760,Customer,2985859,WBY1Z21050V307993,I3,No,9400379233,Private,2019-02-04 19:16:57,2019-02-04 19:35:49,17.33,...,10,29,24,55.667792,12.545928,55.686779,12.536951,2019-02-04 19:26:57,102821,147161
1702938,Customer,828693,WBY1Z21050V307993,I3,No,9400382472,Private,2019-02-04 20:20:58,2019-02-04 20:51:08,64.0,...,11,5,100,55.686779,12.536951,55.654816,12.618666,2019-02-04 20:31:58,147161,103223
1783729,Non_Customer,1112124,WBY1Z21050V307993,I3,No,9402866073,Private,2019-02-05 00:23:35,2019-02-05 02:10:30,0.0,...,0,75,48,55.69442,12.550729,55.634472,12.648757,2019-02-05 00:23:35,102443,185121
1493665,Non_Customer,793639,WBY1Z21050V307993,I3,No,9407597629,Private,2019-02-05 14:44:54,2019-06-18 16:08:50,0.0,...,59009,0,0,55.634966,12.650418,0.0,0.0,2019-03-18 14:13:54,185121,550062
1113833,Customer,1015209,WBY1Z21050V307993,I3,No,9394090697,Private,2019-02-13 07:42:30,2019-02-13 08:13:58,22.53,...,19,58,51,55.713129,12.572044,55.701208,12.600449,2019-02-13 08:01:30,102324,102336
1114545,Customer,3219128,WBY1Z21050V307993,I3,No,9394109802,Business,2019-02-13 13:22:25,2019-02-13 13:58:53,80.0,...,2,51,39,55.701208,12.600449,55.63022,12.64893,2019-02-13 13:24:25,102336,185203
1114742,Customer,1053005,WBY1Z21050V307993,I3,No,9394115235,Private,2019-02-13 14:52:45,2019-02-13 15:23:13,43.07,...,4,39,19,55.63022,12.64893,55.679108,12.479413,2019-02-13 14:56:45,185203,102634
1115510,Customer,2658148,WBY1Z21050V307993,I3,No,9394130425,Private,2019-02-13 18:44:27,2019-02-13 19:24:02,51.2,...,8,19,10,55.679108,12.479413,55.674144,12.571492,2019-02-13 18:52:27,102634,102131
1116382,Customer,812023,WBY1Z21050V307993,I3,No,9394146782,Private,2019-02-14 05:20:48,2019-02-14 05:56:35,84.8,...,17,100,90,55.674144,12.571492,55.630231,12.64884,2019-02-14 05:37:48,102131,185203


In [34]:
df[df.CarID == 'WBY1Z21010V307926'].sort_values(by = 'Start_Time')

Unnamed: 0,Customer_Group,CustomerID,CarID,Engine,Rental_flag,RentalID,Rental_Usage_Type,Reservation_Time,End_Time,Revenue,...,Reservation_Minutes,Fuel_Start,Fuel_End,Start_Lat,Start_Long,End_Lat,End_Long,Start_Time,Start_Zone,End_Zone
706564,Customer,799392,WBY1Z21010V307926,I3,No,9328448278,Private,2015-01-10 16:12:37,2015-01-10 16:19:27,2.96,...,1,51,48,55.697877,12.574413,55.694355,12.561257,2015-01-10 16:13:37,102421,102443
388824,Customer,818630,WBY1Z21010V307926,I3,No,9329780526,Private,2015-01-11 08:43:22,2015-01-11 09:35:05,20.38,...,8,58,42,55.653078,12.626547,55.722742,12.536773,2015-01-11 08:51:22,103214,102521
388929,Customer,818630,WBY1Z21010V307926,I3,No,9329784962,Private,2015-01-11 11:27:30,2015-01-11 11:54:37,11.86,...,8,42,33,55.722742,12.536773,55.655853,12.627918,2015-01-11 11:35:30,102521,103214
389218,Customer,806078,WBY1Z21010V307926,I3,No,9329793851,Private,2015-01-11 16:11:59,2015-01-11 16:40:00,12.29,...,10,33,14,55.655853,12.627918,55.662882,12.517856,2015-01-11 16:21:59,103214,102721
707110,Customer,820918,WBY1Z21010V307926,I3,No,9328485456,Private,2015-02-10 12:07:30,2015-02-10 12:12:44,2.54,...,8,48,46,55.694355,12.561257,55.694262,12.550472,2015-02-10 12:15:30,102443,102443
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1728454,Non_Customer,893118,WBY1Z21010V307926,I3,No,9401567372,Private,2019-12-04 09:49:19,2019-12-04 19:16:18,0.00,...,1,87,86,55.621074,12.606593,55.621529,12.606263,2019-12-04 09:50:19,185154,185154
1730525,Non_Customer,793639,WBY1Z21010V307926,I3,No,9401621132,Private,2019-12-04 20:45:14,2019-12-04 21:15:08,0.00,...,24,85,78,55.621542,12.606279,55.632379,12.576203,2019-12-04 21:09:14,185154,103291
1730579,Customer,2868004,WBY1Z21010V307926,I3,No,9401623638,Private,2019-12-04 21:16:20,2019-12-04 21:46:28,38.13,...,9,78,71,55.632379,12.576203,55.651913,12.617169,2019-12-04 21:25:20,103291,103222
1730749,Customer,3290202,WBY1Z21010V307926,I3,No,9401627234,Private,2019-12-04 22:20:54,2019-12-04 22:42:25,38.40,...,10,71,65,55.651913,12.617169,55.636903,12.618049,2019-12-04 22:30:54,103222,185141


# Create Vacancy

In [33]:
# Haversine function
def haversine(point1, point2):
    # convert decimal degrees to radians
    lat1, lon1 = map(np.radians, point1)
    lat2, lon2 = map(np.radians, point2)

    # Deltas
    delta_lon = lon2 - lon1 
    delta_lat = lat2 - lat1 
    
    # haversine formula 
    a = np.sin(delta_lat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(delta_lon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    r = 6371000 # Radius of earth in m
    return c * r

In [34]:
df_sorted = df.sort_values("Reservation_Time")
df_sorted.CarID.nunique()

1021

In [35]:
data = []
for i, car in enumerate(df_sorted.CarID.unique()):
    if car == '-':
        continue
    car_sub_df = df_sorted[df_sorted.CarID == car]
    if not i%10:
        print(f'{i} cars processed')
    for (_, row1), (_, row2) in zip(car_sub_df[:-1].iterrows(),car_sub_df[1:].iterrows()):
        park_time = row1['End_Time']
        reservation_time = row2['Reservation_Time']
        start_time = row2['Start_Time']
        time_to_reservation = (row2['Reservation_Time']-row1['End_Time']).total_seconds()/3600
        time_to_start = (row2['Start_Time']-row1['End_Time']).total_seconds()/3600
        park_location_lat = row1['End_Lat']
        park_location_long = row1['End_Long']
        park_zone = row1['End_Zone']
        park_fuel = row1['Fuel_End']
        leave_fuel = row2['Fuel_Start']
        engine = row1['Engine']
        moved = haversine(row1.loc[['End_Lat','End_Long']].values, row2.loc[['Start_Lat','Start_Long']].values) 
        data.append([car, park_time,reservation_time, start_time, time_to_reservation, time_to_start, park_location_lat, park_location_long, park_zone, park_fuel, leave_fuel, engine, moved])

0 cars processed
10 cars processed
20 cars processed
30 cars processed
40 cars processed
50 cars processed
60 cars processed
70 cars processed
80 cars processed
90 cars processed
100 cars processed
110 cars processed
120 cars processed
130 cars processed
140 cars processed
150 cars processed
160 cars processed
170 cars processed
180 cars processed
190 cars processed
200 cars processed
210 cars processed
220 cars processed
230 cars processed
240 cars processed
250 cars processed
260 cars processed
270 cars processed
280 cars processed
290 cars processed
300 cars processed
310 cars processed
320 cars processed
330 cars processed
340 cars processed
350 cars processed
360 cars processed
370 cars processed
380 cars processed
390 cars processed
400 cars processed
410 cars processed
420 cars processed
430 cars processed
440 cars processed
450 cars processed
460 cars processed
470 cars processed
480 cars processed
490 cars processed
500 cars processed
510 cars processed
520 cars processed
530 

In [36]:
# Create new df
df_vacancy = pd.DataFrame(data = data, columns = ['car', 'park_time', 'reservation_time', 'start_time','time_to_reservation', 'time_to_start', 'park_location_lat', 'park_location_long', 'park_zone', 'park_fuel', 'leave_fuel', 'engine', 'moved'])

# Infer types
df_vacancy = df_vacancy.convert_dtypes()

# Save
df_vacancy.to_csv('data/processed/Vacancy_new.csv')

In [39]:
df_vacancy[df_vacancy.park_location_lat < 10]

Unnamed: 0,car,park_time,reservation_time,start_time,time_to_reservation,time_to_start,park_location_lat,park_location_long,park_zone,park_fuel,leave_fuel,engine,moved
6484,WBY1Z21010V307859,2019-09-23 16:36:47,2019-10-01 02:02:55,2019-10-01 02:04:55,177.435556,177.468889,0.0,0.0,550062,0,56,I3,6292768.460909
20262,WBY1Z21000V307884,2019-06-18 16:08:03,2019-07-01 06:25:50,2019-07-01 06:48:50,302.296389,302.679722,0.0,0.0,550062,0,100,I3,6295269.445084
25593,WBY1Z210X0V307858,2019-09-25 16:37:29,2019-10-01 16:50:39,2019-10-01 16:59:39,144.219444,144.369444,0.0,0.0,550062,0,77,I3,6291430.710595
31020,WBY1Z21030V308205,2019-01-10 16:39:24,2019-01-13 00:30:05,2019-01-13 00:40:05,55.844722,56.011389,0.0,0.0,550062,0,87,I3,6296310.222621
33636,WBY1Z21050V307976,2017-07-14 03:30:25,2017-08-01 15:15:53,2017-08-01 15:20:53,443.757778,443.841111,0.0,0.0,550062,98,59,I3,6295540.691409
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2449916,WBY8P2104K7D92193,2019-10-09 16:32:02,2019-11-05 06:58:03,2019-11-05 06:59:03,638.433611,638.450278,0.0,0.0,550062,0,62,I3 120,6291983.829059
2464979,WBY8P2104K7D89939,2019-08-29 16:28:31,2019-09-05 13:54:16,2019-09-05 13:56:16,165.429167,165.4625,0.0,0.0,550062,0,100,I3 120,6291369.414998
2552340,WBA1R5100K7D67738,2019-07-29 16:19:41,2019-08-06 00:31:12,2019-08-06 00:37:12,176.191944,176.291944,0.0,0.0,550062,0,94,118I,6298022.719552
2586727,WBA31AA05L3H87545,2019-05-12 12:30:42,2019-05-10 14:27:27,2019-05-10 14:42:27,-46.054167,-45.804167,0.0,0.0,550062,0,76,X1 SDRIVE18I,6294458.041067


## OSM

In [None]:
import osmnx

In [None]:
tmp = osmnx.geometries_from_place('Region Hovedstaden', {'railway': 'station'})

  aout[:] = out
  aout[:] = out


In [None]:
tmp.name.sort_values().values

array(['Aksel Møllers Have', 'Allerød', 'Amager Strand', 'Amagerbro',
       'Ballerup', 'Bella Center', 'Birkerød', 'Blovstrød H', 'Brandhøj',
       'Brødeskov', 'Buddinge', 'Christianshavn', 'DR Byen',
       'Dronningmølle', 'Dyssegård', 'Dyssekilde', 'Enghave Brygge',
       'Enghave Plads', 'Farum', 'Fasanvej', 'Femøren', 'Firhøj',
       'Flintholm', 'Flintholm', 'Forum', 'Fredensborg', 'Frederiksberg',
       'Frederiksberg Allé', 'Frederikssund', 'Frederiksværk', 'Fuglevad',
       'Gammel Strand', 'Gentofte', 'Gilleleje', 'Glostrup', 'Græsted',
       'Grønnehave', 'Gørløse', 'Hareskov', 'Havneholmen', 'Hedehusgård',
       'Hellebæk', 'Hellerup', 'Helsinge', 'Helsingør', 'Herlev',
       'Hillerød', 'Hillerød', 'Holte', 'Hornbæk', 'Hundested',
       'Hundested Havn', 'Høje Taastrup', 'Høje Taastrup', 'Højstrup',
       'Islands Brygge', 'Jægersborg', 'Kagerup', 'Kastrup',
       'Klampenborg', 'Klampenborg', 'Kongens Nytorv', 'Kregme',
       'København H', 'København H', '

In [None]:
tmp2 = osmnx.geometries_from_place('Region Hovedstaden', {'public_station': 'platform'})

  aout[:] = out


In [None]:
tmp2

Unnamed: 0,geometry
