# Data Export Notebook

Author: Jordan Perr-Sauer

This notebook contains scripts which have been used on intermediate data to make them ready for public release. NREL is not able to release all of the intermediate data files to the public due to licensing agreements.

In [5]:
import pandas as pd
import numpy as np

### Shuffle vehicle ids

In [47]:
#vehicles_in_study = pd.read_csv("./FleetDNAETL_CoDA_epaprime_traditional_nolimit.csv")["vdir"].str[2:]
#np.random.shuffle(vehicles_in_study.values)
#shuffle_df = pd.DataFrame(vehicles_in_study)
#shuffle_df.to_csv("shuffle_vehicle_id_mapping.csv")

shuffle_df = pd.read_csv("shuffle_vehicle_id_mapping.csv", index_col=0)

### data/FleetDNAETL_CoDA_epaprime_agnostic_50klimit.csv

Shuffle vehicle IDs. Preserve only speed-based features, as the other features were not used in the analysis.

In [53]:
df = pd.read_csv("./FleetDNAETL_CoDA_epaprime_agnostic_50klimit.csv")

cols = [x for x in df.columns if (x[0]=='s' or x == 'vdir')]
df = df[cols]

df["vdir"] = df["vdir"].map(lambda x: np.argmax(shuffle_df["vdir"] == int(x[2:])))

df.sort_values("vdir")

df.to_csv("./FleetDNAETL_CoDA_epaprime_agnostic_50klimit.public.csv", index=False)

### data/FleetDNAETL_CoDA_epaprime_traditional_nolimit.csv

Shuffle vehicle IDs

In [61]:
df = pd.read_csv("./FleetDNAETL_CoDA_epaprime_traditional_nolimit.csv")

df["vdir"] = df["vdir"].map(lambda x: np.argmax(shuffle_df["vdir"] == int(x[2:])))
df = df.sort_values("vdir")

df.to_csv("./FleetDNAETL_CoDA_epaprime_traditional_nolimit.public.csv", index=False)

### data/vehicle_specifications_fdna.csv

Delete all columns except for vocation and vehicle id.
Remove vehicles that are not included in the study.

In [55]:
df = pd.read_csv("./vehicle_specifications_fdna.csv")

cols = [x for x in df.columns if x in ['id', 'vocation']]
df = df[cols]

vehicles_in_study = pd.read_csv("./FleetDNAETL_CoDA_epaprime_traditional_nolimit.csv")["vdir"].str[2:]
vehicles_in_study = pd.DataFrame(vehicles_in_study.astype(int))
df = df.merge(vehicles_in_study, how="inner", left_on="id", right_on="vdir")

df = df[["id", "vocation"]]

df["id"] = df["id"].map(lambda x: np.argmax(shuffle_df["vdir"] == int(x)))
df = df.sort_values("id")

df["vocation"] = df["vocation"].str.upper()

df.to_csv("./vehicle_specifications_fdna.public.csv", index=False)

### List vehicles missing from agnostic feature set

In [57]:
ag_v = pd.read_csv("./FleetDNAETL_CoDA_epaprime_agnostic_50klimit.public.csv")["vdir"]
tr_v = pd.read_csv("./FleetDNAETL_CoDA_epaprime_traditional_nolimit.public.csv")["vdir"]

In [58]:
display(len(list(ag_v)))
display(len(list(tr_v)))

899

912

In [59]:
set(ag_v) - set(tr_v)

set()

In [60]:
set(tr_v) - set(ag_v)

{24, 63, 73, 249, 335, 341, 388, 436, 521, 584, 737, 760, 887}