In [2]:
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
from geopy.distance import geodesic
from sklearn.cluster import KMeans
from tqdm.notebook import tqdm

USER_INPUT = 5


# Loading trianing data
df_2 = pd.read_csv("Charger_chall/lat_and_lon.csv")
df_2 = df_2.drop(columns=['Unnamed: 0'])

# Loading another dataset which contains addtional features
shapefile = pd.read_csv('Charger_chall/roads.csv')

# Removing datapoints which we don't want chargers to be placed on
cleaned_shapefile = shapefile.drop(shapefile[shapefile.type == 'residential'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'motorway'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'motorway_link'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'cycleway'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'footway'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'primary'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'pedestrian'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'living_street'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'primary_link'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'track'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'path'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'steps'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'proposed'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'raceway'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'corridor'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'bridleway'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'service'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'unclassified'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'tertiary_link'].index)
cleaned_shapefile = cleaned_shapefile.drop(cleaned_shapefile[cleaned_shapefile.type == 'secondary_link'].index)

# Cleaning the WKT column and getting all the lats and longs
cleaned_shapefile["WKT"] = cleaned_shapefile["WKT"].replace("LINESTRING \(", "", regex=True)
cleaned_shapefile["WKT"] = cleaned_shapefile["WKT"].replace("\)", "", regex=True)

list_of_clean_cords = []
for x in range(len(cleaned_shapefile['WKT'].values)):
    for y in cleaned_shapefile['WKT'].values[x].split(","):
        list_of_clean_cords.append(y)

clean_long = []
clean_lat = []
for z in list_of_clean_cords:
    clean_long.append(float(z.strip().split(" ")[0]))
    clean_lat.append(float(z.strip().split(" ")[1]))
    
clean_cords_df = pd.DataFrame()

clean_cords_df['lat'] = clean_lat
clean_cords_df['lon'] = clean_long

clean_cords_df.drop_duplicates(inplace=True)
clean_cords_df.reset_index(drop=True, inplace=True)

# Removing duplicates, outliers and detroit
clean_cords_df = clean_cords_df.drop(clean_cords_df[clean_cords_df.lat > 42.3086014].index)
clean_cords_df = clean_cords_df.drop(clean_cords_df[clean_cords_df.lon < -83.0728775].index)
clean_cords_df = clean_cords_df.drop(clean_cords_df[clean_cords_df.lon > -82.5096667].index)
cleaned_shapefile.name = cleaned_shapefile.name.fillna('Street name missing')

# Machine Learning -- K-Means Clustering
k_means_model = KMeans(n_clusters = 10)
k_means_model.fit(df_2)

pred_labels = k_means_model.labels_
centroid_list = k_means_model.cluster_centers_

centroid_lat = []
centroid_lon = []
for x in centroid_list:
    centroid_lat.append(x[0])
    centroid_lon.append(x[1])
    
# Building predicted stations and OUTPUTTING
predicted_chargers = pd.DataFrame()
predicted_chargers['lat'] = centroid_lat
predicted_chargers['lon'] = centroid_lon
#predicted_chargers.to_csv('charger_output_5.csv')

# Refining optimal results
true_lat = []
true_lon = []

for y in tqdm(range(len(centroid_lat))):
    centroid_cords = (centroid_lat[y], centroid_lon[y])
    #a = centroid_lat[y]
    #b = centroid_lon[y]
    distance = []
    for z in tqdm(range(len(clean_cords_df))):
        main_cords = (clean_lat[z], clean_long[z])
        distance.append(geodesic(main_cords, centroid_cords).meters)
        #distance.append((abs(clean_lat[z]-a) + abs(clean_long[z]-b))*1000)
        #distance.append((abs(clean_lat[z]-centroid_lat[y]) + abs(clean_long[z]-centroid_lon[y])))
        #distance.append(sqrt(((clean_lat[z]-centroid_lat[y])**2 + (clean_long[z]-centroid_lon[y])**2)))
        distance_df = pd.DataFrame()
        distance_df['Distance'] = distance
    true_lat.append(clean_cords_df.iloc[distance_df.sort_values('Distance').head(1).index[0]][0])
    true_lon.append(clean_cords_df.iloc[distance_df.sort_values('Distance').head(1).index[0]][1])

# Getting streetnames
street_name = []
for x in true_lat:
    street = cleaned_shapefile[cleaned_shapefile['WKT'].str.contains(str(x))].name.values[0]
    street_name.append(street)
    
# Making Pandas dataframe of true cords and the street location
true_cords = pd.DataFrame()
true_cords['lat'] = true_lat
true_cords['lon'] = true_lon
true_cords['street'] = street_name

# Outputs
true_cords

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/8909 [00:00<?, ?it/s]

  0%|          | 0/8909 [00:00<?, ?it/s]

  0%|          | 0/8909 [00:00<?, ?it/s]

  0%|          | 0/8909 [00:00<?, ?it/s]

  0%|          | 0/8909 [00:00<?, ?it/s]

  0%|          | 0/8909 [00:00<?, ?it/s]

  0%|          | 0/8909 [00:00<?, ?it/s]

  0%|          | 0/8909 [00:00<?, ?it/s]

  0%|          | 0/8909 [00:00<?, ?it/s]

  0%|          | 0/8909 [00:00<?, ?it/s]

Unnamed: 0,lat,lon,street
0,42.275912,-83.006381,Howard Avenue
1,42.019782,-82.760801,Heritage Road
2,42.15354,-82.810599,Malden Road
3,42.241539,-82.548183,Morris Road
4,42.240454,-82.987811,Lake Trail Drive
5,42.293289,-82.717007,Notre Dame Street
6,42.299658,-82.781204,County Road 22
7,42.3051,-82.556022,Tecumseh Road
8,42.256241,-83.037992,Huron Church Road
9,42.244522,-83.023492,Talbot Road
