# Parte 1 - car_trip.csv

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.interpolate import interp1d

%matplotlib inline

In [2]:
data = pd.read_csv("car_trip.csv")
carinfo = pd.read_csv("vai.csv")
sns.set_style(style="dark")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2461 entries, 0 to 2460
Data columns (total 3 columns):
Date        2461 non-null object
Velocity    2461 non-null int64
RPM         2461 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 57.8+ KB


In [3]:
carinfo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 9 columns):
Id         11 non-null int64
User       11 non-null object
Begin      11 non-null object
End        11 non-null object
Minutes    11 non-null int64
Consumo    11 non-null int64
km         11 non-null int64
Cost       11 non-null float64
kml        11 non-null int64
dtypes: float64(1), int64(5), object(3)
memory usage: 872.0+ bytes


In [4]:
#How many dates?
data["Date"].value_counts()

28/05/201915:37    361
28/05/201915:00    317
28/05/201907:55    300
03/06/201915:30    220
27/05/201915:30    202
03/06/201916:14    194
28/05/201912:40    194
03/06/201914:47    179
03/06/201915:12    172
03/06/201915:52    163
03/06/201916:03    159
Name: Date, dtype: int64

In [5]:
first_trip    = data.iloc[:202]          # 27/05/201915:30
second_trip   = data.iloc[202:202+300]   # 28/05/201907:55
third_trip    = data.iloc[502:502+194]   # 28/05/201912:40
forth_trip    = data.iloc[696:696+317]   # 28/05/201915:00
fifth_trip    = data.iloc[1013:1013+361] # 28/05/201915:37
sixth_trip    = data.iloc[1374:1374+179] # 03/06/201914:47
seventh_trip  = data.iloc[1553:1553+172] # 03/06/201915:12
eighth_trip   = data.iloc[1725:1725+220] # 03/06/201915:30
nineth_trip   = data.iloc[1945:1945+163] # 03/06/201915:52
tenth_trip    = data.iloc[2108:2108+159] # 03/06/201916:03
eleventh_trip = data.iloc[2267:2267+194] # 03/06/201916:14

trips = [first_trip, second_trip, third_trip, forth_trip, fifth_trip, sixth_trip, seventh_trip, eighth_trip,
        nineth_trip, tenth_trip, eleventh_trip]

In [6]:
def generate_random_date():
    #Date format: 28/05/201915:37
    return "{:02}/{:02}/{}{:02}:00".format(np.random.randint(1,29),
                                           np.random.randint(1,13),
                                           2010 + np.random.randint(0,9),
                                           np.random.randint(0,24), 
                                           np.random.randint(0,60))

def generate_random_consumption(minutes, km):
    minutes_factor = 10 + np.random.randint(0,5) + np.random.random()
    km_factor = np.random.random() / 10
    return minutes_factor * minutes + km_factor * km

In [7]:
import time
start = time.perf_counter()
num_of_sets = 200
gas_price = [2.50, 2.70, 2.60, 2.65, 2.95, 3.00, 3.30, 3.70, 4.00, 4.30]
i = 0
while i < num_of_sets:
    global trips
    global carinfo
    num_of_available_trips = len(trips)  
    date = generate_random_date()
    random_trip = np.random.randint(low=0, high=num_of_available_trips)
    total_time = carinfo.iloc[random_trip]["Minutes"]
    rate = 1
    if total_time > 20: rate = 5
    original_velocities = trips[random_trip]["Velocity"]
    original_rpm        = trips[random_trip]["RPM"]
    n = len(original_velocities)
    x = np.arange(n)
    sinoid = np.random.randint(low=1, high=5) * np.random.random() * (np.sin(2 * np.pi * 2 * x / n) - np.sin(2 * np.pi * 1 * x / n))
    noise_up = np.random.randint(low=2, high=10) * np.random.random_sample(n)
    noise_down = np.random.randint(low=2, high=10) * np.random.random_sample(n)
    new_velocities = (original_velocities + sinoid + noise_up - noise_down).astype(int).apply(lambda x: max(0,x))
    distance = rate * sum(new_velocities) - (rate/2) * (list(new_velocities)[0] + list(new_velocities)[-1])
    distance /= 3.6
    user = {"Id": 636465667, "User": "VictorOliveira", "Begin": date, 
            "End": "{}{:02}".format(date[:-2], total_time), "Minutes": total_time, 
            "Consumo": generate_random_consumption(total_time, distance), "km": round(distance) }
    user["Cost"] = round(gas_price[int(date[8:10]) - 10] * user["Consumo"] / 1000,2)
    user["kml"] = int(1000 * user["km"] / user["Consumo"])
    if user["kml"] > 15000: continue
    rpm_velocity = interp1d(original_velocities,original_rpm,fill_value="extrapolate") # RPM = f(velocity)
    rpm_noise = np.random.random_sample(n) - np.random.randint(low=1, high=7) / 10 # random noise
    new_rpm = rpm_velocity(new_velocities) + rpm_noise
    new_rpm[np.isnan(new_rpm)] = 0.875 # velocity = 0 generates NaN's
    new_rpm = np.around(new_rpm,decimals=3)
    new_rpm = list(map(lambda x: max(0.850,x), new_rpm))
    dictionary = {"Date": [date] * n ,"Velocity": list(new_velocities), "RPM": new_rpm}
    dataframe_vel_rpm = pd.DataFrame(data=dictionary,columns=["Date","Velocity", "RPM"])
    dataframe_car_info = pd.DataFrame(data=user,index=[11+i],columns=["Id","User","Begin","End","Minutes","Consumo","km","Cost","kml"])
    trips.append(dataframe_vel_rpm)
    carinfo = pd.concat([carinfo, dataframe_car_info])
    i += 1
    
end = time.perf_counter()
print("{} new sets generated in {:.2f} seconds".format(num_of_sets, end - start))

  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  y_new = slope*(x_new - x_lo)[:, None] + y_lo
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  y_new = slope*(x_new - x_lo)[:, None] + y_lo


200 new sets generated in 1.90 seconds


In [8]:
"Number of trips now = {}".format(len(trips))

'Number of trips now = 211'

In [9]:
for i in range(11,11+num_of_sets):
    global data
    data = pd.concat([data, trips[i]],ignore_index=True)

In [10]:
data.tail(10)

Unnamed: 0,Date,Velocity,RPM
43773,13/12/201701:00,11,2.325
43774,13/12/201701:00,5,0.949
43775,13/12/201701:00,11,2.592
43776,13/12/201701:00,9,3.083
43777,13/12/201701:00,9,3.113
43778,13/12/201701:00,3,1.019
43779,13/12/201701:00,0,0.875
43780,13/12/201701:00,0,0.875
43781,13/12/201701:00,7,1.262
43782,13/12/201701:00,3,1.077


In [11]:
carinfo.tail(10)

Unnamed: 0,Id,User,Begin,End,Minutes,Consumo,km,Cost,kml
201,636465667,VictorOliveira,13/05/201301:00,13/05/201301:24,24,448.92832,6503,1.19,14485
202,636465667,VictorOliveira,19/07/201509:00,19/07/201509:10,10,147.895155,699,0.44,4726
203,636465667,VictorOliveira,13/12/201302:00,13/12/201302:10,10,167.09901,767,0.44,4590
204,636465667,VictorOliveira,26/11/201608:00,26/11/201608:33,33,579.556241,6622,1.91,11425
205,636465667,VictorOliveira,26/08/201701:00,26/08/201701:09,9,124.853569,897,0.46,7184
206,636465667,VictorOliveira,23/02/201808:00,23/02/201808:25,25,866.531964,7245,3.47,8360
207,636465667,VictorOliveira,22/06/201201:00,22/06/201201:10,10,151.156316,879,0.39,5815
208,636465667,VictorOliveira,17/07/201004:00,17/07/201004:33,33,1060.31581,6556,2.65,6183
209,636465667,VictorOliveira,06/04/201110:00,06/04/201110:33,33,485.096182,6128,1.31,12632
210,636465667,VictorOliveira,13/12/201701:00,13/12/201701:10,10,180.713652,761,0.67,4211


In [12]:
# new_csv
data.to_csv("car_trip1.csv",index=False)
carinfo.to_csv("agoravai.csv",index=False)