In [None]:
! pip install -r requirements.txt --q

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import folium
from tqdm import tqdm
pd.set_option('display.max_columns', None)

In [None]:
%%time
df_list = []
input_dir = "datarepo-eved-dataset-549403600cf5/data/eVED/"
for file in os.listdir(input_dir):
    if file.endswith("_week.csv"):
        df = pd.read_csv(input_dir + file, low_memory=False)
        print(f"Reading file: {file} (len = {len(df)})")
        df = df[df['Energy_Consumption'].notnull()]
        df_list.append(df)

full_df = pd.concat(df_list)
print("Final Output =>", full_df.shape)

In [None]:
print("Total unique vehicles:", full_df['VehId'].nunique())
print("Total unique trips:", full_df[['VehId', 'Trip']].drop_duplicates().shape[0])

In [None]:
cols = ['Energy_Consumption', 'Gradient', 'Vehicle Speed[km/h]', 'OAT[DegC]']
full_df[cols].hist(bins=30, figsize=(15, 8))
plt.tight_layout()
plt.show()

In [None]:
sample_trip = full_df[full_df['Trip'] == full_df['Trip'].iloc[0]]

plt.figure(figsize=(12,4))
plt.plot(sample_trip['Elevation Smoothed[m]'].values)
plt.title("Elevation Profile of a Sample Trip")
plt.xlabel("Sequential Point")
plt.ylabel("Elevation (m)")
plt.grid(True)
plt.show()


In [None]:
%%time
fig, ax = plt.subplots(1, 2, figsize=(14, 5))

ax[0].scatter(full_df['Gradient'], full_df['Energy_Consumption'], alpha=0.3)
ax[0].set_xlabel("Gradient")
ax[0].set_ylabel("Energy Consumption")
ax[0].set_title("Gradient vs Energy Consumption")

ax[1].scatter(full_df['Vehicle Speed[km/h]'], full_df['Energy_Consumption'], alpha=0.3, color='orange')
ax[1].set_xlabel("Vehicle Speed (km/h)")
ax[1].set_ylabel("Energy Consumption")
ax[1].set_title("Speed vs Energy Consumption")

plt.tight_layout()
plt.show()

In [None]:
%%time
# Create a map centered at the mean coordinates
center_lat = full_df['Matchted Latitude[deg]'].mean()
center_lon = full_df['Matched Longitude[deg]'].mean()
m = folium.Map(location=[center_lat, center_lon], zoom_start=12)

# Add points to the map
for idx, row in tqdm(full_df.iterrows()):
    folium.CircleMarker(
        location=[row['Matchted Latitude[deg]'], row['Matched Longitude[deg]']],
        radius=1,
        color='blue',
        fill=True,
        fillOpacity=0.2
    ).add_to(m)

# Display the map
m

In [None]:
# Calculate NaN percentages
nan_percentages = (full_df.isna().sum() / len(full_df)) * 100

# Filter columns with >80% NaN values
high_nan_cols = nan_percentages[nan_percentages > 50]

# Create bar plot
plt.figure(figsize=(10, 6))
plt.bar(range(len(high_nan_cols)), high_nan_cols.values)
plt.xticks(range(len(high_nan_cols)), high_nan_cols.index, rotation=45, ha='right')
plt.ylabel('Percentage of NaN Values')  
plt.title('Columns with > 50% Missing Values')

# Add percentage labels on top of bars
for i, v in enumerate(high_nan_cols.values):
    plt.text(i, v, f'{v:.1f}%', ha='center', va='bottom')

plt.tight_layout()
plt.show()

In [None]:
full_df.columns

In [None]:
full_df[full_df['Fuel Rate[L/hr]'].notnull()]['Fuel Rate[L/hr]'].value_counts()

In [None]:
full_df.columns

In [None]:
df = full_df[(full_df['VehId']==10) & (full_df['Trip'] == 1558)]
print(df.shape)
df[['Matchted Latitude[deg]', 'Matched Longitude[deg]']].drop_duplicates()

In [1]:
import osmnx as ox

# Specify the path to your .osm file
osm_file_path = "DATA/map.osm"

# Load the .osm file
graph = ox.graph_from_xml(osm_file_path)

# Convert the graph to a GeoDataFrame for further analysis
nodes, edges = ox.graph_to_gdfs(graph)

# Display the edges GeoDataFrame
nodes

Unnamed: 0_level_0,y,x,highway,railway,ref,geometry
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
62527493,42.281797,-83.759832,,,,POINT (-83.75983 42.2818)
62527499,42.281894,-83.763230,,,,POINT (-83.76323 42.28189)
5352792088,42.277009,-83.763159,,,,POINT (-83.76316 42.27701)
5352792090,42.276958,-83.763146,,,,POINT (-83.76315 42.27696)
5352792096,42.274649,-83.762852,,,,POINT (-83.76285 42.27465)
...,...,...,...,...,...,...
4936161200,42.277986,-83.758416,,,,POINT (-83.75842 42.27799)
4936161202,42.279524,-83.758457,,,,POINT (-83.75846 42.27952)
4936161204,42.278070,-83.758417,,,,POINT (-83.75842 42.27807)
4936161206,42.278289,-83.757326,,,,POINT (-83.75733 42.27829)


In [2]:
edges

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,highway,lanes,maxspeed,name,oneway,reversed,length,geometry,service,landuse,access
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
62527493,7359784515,0,8728126,residential,2,15 mph,Eighth Street,False,False,114.215180,"LINESTRING (-83.75983 42.2818, -83.75985 42.28...",,,
62527499,62537396,0,8726618,residential,2,25 mph,Crest Avenue,False,False,357.880123,"LINESTRING (-83.76323 42.28189, -83.76323 42.2...",,,
5352792088,62486098,0,442013278,secondary,2,30 mph,West Liberty Street,False,True,7.180637,"LINESTRING (-83.76316 42.27701, -83.76307 42.2...",,,
5352792088,5352792090,0,554671148,footway,,,,False,False,5.832030,"LINESTRING (-83.76316 42.27701, -83.76315 42.2...",,,
5352792088,62486111,0,442013278,secondary,2,30 mph,West Liberty Street,False,False,468.191305,"LINESTRING (-83.76316 42.27701, -83.76417 42.2...",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4936161206,62484277,0,8722066,residential,2,,Mulholland Street,False,False,315.018029,"LINESTRING (-83.75733 42.27829, -83.75734 42.2...",,,
4936161207,62486090,0,420302893,tertiary,3,25 mph,South Seventh Street,False,False,9.575456,"LINESTRING (-83.75855 42.27805, -83.75854 42.2...",,,
4936161207,62544806,0,"[420302893, 441448231]",tertiary,"[3, 2]",25 mph,South Seventh Street,False,True,307.272354,"LINESTRING (-83.75855 42.27805, -83.75856 42.2...",,,
4936161207,5352898407,0,"[554685025, 503275533]",footway,,,,False,"[False, True]",7.571493,"LINESTRING (-83.75855 42.27805, -83.75859 42.2...",,,


In [3]:
nodes

Unnamed: 0_level_0,y,x,highway,railway,ref,geometry
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
62527493,42.281797,-83.759832,,,,POINT (-83.75983 42.2818)
62527499,42.281894,-83.763230,,,,POINT (-83.76323 42.28189)
5352792088,42.277009,-83.763159,,,,POINT (-83.76316 42.27701)
5352792090,42.276958,-83.763146,,,,POINT (-83.76315 42.27696)
5352792096,42.274649,-83.762852,,,,POINT (-83.76285 42.27465)
...,...,...,...,...,...,...
4936161200,42.277986,-83.758416,,,,POINT (-83.75842 42.27799)
4936161202,42.279524,-83.758457,,,,POINT (-83.75846 42.27952)
4936161204,42.278070,-83.758417,,,,POINT (-83.75842 42.27807)
4936161206,42.278289,-83.757326,,,,POINT (-83.75733 42.27829)


In [6]:
import folium
# Create a map centered at the mean coordinates of the nodes
map_center = [nodes['y'].mean(), nodes['x'].mean()]
map_nodes = folium.Map(location=map_center, zoom_start=14)

# Add points to the map
for _, row in nodes.iterrows():
    folium.CircleMarker(
        location=[row['y'], row['x']],
        radius=3,
        color='blue',
        fill=True,
        fillOpacity=0.6
    ).add_to(map_nodes)

# Display the map
map_nodes

In [None]:
max_lat = df['Matchted Latitude[deg]'].max()
min_lat = df['Matchted Latitude[deg]'].min()
max_lon = df['Matched Longitude[deg]'].max()
min_lon = df['Matched Longitude[deg]'].min()

print("Max Matched Latitude:", max_lat)
print("Min Matched Latitude:", min_lat)
print("Max Matched Longitude:", max_lon)
print("Min Matched Longitude:", min_lon)

In [None]:
df[['Intersection']].drop_duplicates()

In [None]:
# Filter trips without any intersections
no_intersection_trips = df[df['Intersection'].isnull()]

# Create a map centered at the mean coordinates of the filtered trips
map_center = [no_intersection_trips['Latitude[deg]'].mean(), no_intersection_trips['Longitude[deg]'].mean()]
map_no_intersection = folium.Map(location=map_center, zoom_start=12)

# Add points to the map
for _, row in no_intersection_trips.iterrows():
    folium.CircleMarker(
        location=[row['Latitude[deg]'], row['Longitude[deg]']],
        radius=2,
        color='red',
        fill=True,
        fillOpacity=0.6
    ).add_to(map_no_intersection)

# Display the map
map_no_intersection

In [None]:
# Create a map centered at the mean coordinates of the given latitude and longitude points
latitudes = df[df['Intersection']==1]['Matchted Latitude[deg]']
longitudes = df[df['Intersection']==1]['Matched Longitude[deg]']
map_center = [latitudes.mean(), longitudes.mean()]
map_points = folium.Map(location=map_center, zoom_start=12)

# Add points to the map
for lat, lon in zip(latitudes, longitudes):
    folium.CircleMarker(
        location=[lat, lon],
        radius=2,
        color='blue',
        fill=True,
        fillOpacity=0.6
    ).add_to(map_points)

# Display the map
map_points

In [None]:
# Create a map centered at the mean coordinates of the given latitude and longitude points
latitudes = df['Matchted Latitude[deg]']
longitudes = df['Matched Longitude[deg]']
map_center = [latitudes.mean(), longitudes.mean()]
map_points = folium.Map(location=map_center, zoom_start=12)

# Add points to the map
for lat, lon in zip(latitudes, longitudes):
    folium.CircleMarker(
        location=[lat, lon],
        radius=2,
        color='blue',
        fill=True,
        fillOpacity=0.6
    ).add_to(map_points)

# Display the map
map_points

In [None]:
df.columns

In [None]:
# Create a map centered at the mean coordinates of the given latitude and longitude points
latitudes = df['Latitude[deg]']
longitudes = df['Longitude[deg]']
map_center = [latitudes.mean(), longitudes.mean()]
map_points = folium.Map(location=map_center, zoom_start=12)

# Add points to the map
for lat, lon in zip(latitudes, longitudes):
    folium.CircleMarker(
        location=[lat, lon],
        radius=2,
        color='blue',
        fill=True,
        fillOpacity=0.6
    ).add_to(map_points)

# Display the map
map_points

In [None]:
full_df['Trip'].nunique()

In [None]:
full_df.head(30)