# Clean VSCODE Scraped Results from PlugShare
#### This notebook takes a csv file scraped from Plugshare using VSCODE to pull data from their map. We no longer use this method as sidestepping the robot.txt file from the website is unethical and we could get in trouble for further scraping their data.
#### This data is cleaned and the meaning of columns translated from numbers to either plug type or network finally formating it correctly

In [62]:
import pandas as pd
import numpy as np
from collections import Counter

In [63]:
#Rename CSV
df = pd.read_csv('VSCode Scan (Plugshare).csv')

#Dictionary containing all plug types and their corresponding values
id_to_type = {
    13 : "CCS_SAE",
    3  : "CHAdeMO",
    12 : "Wall_AU_NZ)",
    7  : "Type2",
    6  : "Tesla",
    14 : "Three_Phase",
    2  : "J1772",
    15 : "Caravan_Mains_Socket",
    13.0 : "CCS_SAE",
    3.0  : "CHAdeMO",
    12.0 : "Wall_AU_NZ",
    7.0  : "Type2",
    6.0  : "Tesla",
    14.0 : "Three_Phase",
    2.0  : "J1772",
    15.0 : "Caravan_Mains_Socket"
}

#Dictionary containing all networks and their corresponding values
id_to_network = {
    8.0  : "Supercharger",
    60.0 : "Evie Networks",
    0.0   : "Non-networked",
    48.0 : "Chargefox",
    35.0 : "Tesla Destination",
    57.0 : "EVUp",
    1.0  : "ChargePoint",
    1292.0: "AMPCharge",
    1371.0: "Chargebay",
    70.0: "Exploren",
    62.0: "Everty",
    63.0: "NRMA",
    47.0: "Electrify America",
    26.0: "Shell Recharge",
    1039.0 : "EVGateway",
    61.0: "CalTrans",
    2.0: "Blink",
    3.0: "SemaConnect",
    19.0: "EVgo",
    1379.0: "OpenLoop"
}



In [64]:
#Extract all of the rows from the csv file combining directly related columns and retaining ones that are needed
def extract(row):
    networks = []
    plugs = []
    kw = []
    stations = 0

    #Loop through station prefixes
    while stations >= 0:
        #If station column with current iteration exists
        if 'stations['+str(stations)+'].id' in row:
            #If there is a value in the column
            if np.isnan(row['stations['+str(stations)+'].id']) == False:
                #If there is a network
                if 'stations['+str(stations)+'].network_id' in row:
                    networks.append(row['stations['+str(stations)+'].network_id'])
            else:
                break
        else:
            break
            
        connectors = 0
        
        #Loop through connector prefixes
        while connectors >= 0:
            #If column with this station and connector exists
            if 'stations['+str(stations)+'].outlets['+str(connectors)+'].connector' in row:
                #If there is a value in it
                if np.isnan(row['stations['+str(stations)+'].outlets['+str(connectors)+'].connector']) == False:
                    #Save the plug types
                    plugs.append(row['stations['+str(stations)+'].outlets['+str(connectors)+'].connector'])
                    if 'stations['+str(stations)+'].outlets['+str(connectors)+'].kilowatts' in row:
                        #Save the KWs
                        kw.append(row['stations['+str(stations)+'].outlets['+str(connectors)+'].kilowatts'])
                    connectors += 1
                else:
                    break
            else:
                break
        
        stations += 1
        
    
    return row["name"], row["address"], row["longitude"], row["latitude"], row['phone'], networks, plugs, kw

#Convert the numbers from scrapped data into their corresponding network using dictionary
def conv_network(row):
    newlist = [0 if x != x else x for x in row['networks']]
    converted = [id_to_network[number] for number in newlist]
    converted = list(set(converted))
    return converted

#Convert the numbers from scrapped data into their corresponding plug type using dictionary
def conv_type(row):
    newlist = [0 if x != x else x for x in row['plugs']]
    converted = [id_to_type[number] for number in newlist]
    return converted

#Clean the watt values into knowns and unknowns
def conv_watt(row):
    cleaned = ["unknown" if x != x else x for x in row['kw']]
    cleaned = list(set(cleaned))
    return cleaned

#Prepare dataframe using extract function on all rows
df1 = df.apply(lambda row: extract(row), axis=1)
df1 = pd.DataFrame.from_records(df1, columns=['name', 'address', 'longitude', 'latitude', 'contact', 'networks', 'plugs', 'kw'])

#Convert networks column into lists of what networks are at each location
networks = df1.apply(lambda row: conv_network(row), axis=1)
df1['networks'] = networks

#Convert plugs into many columns with the count of each type of plug for each station and count total plugs at all stations
types = df1.apply(lambda row: conv_type(row), axis=1)
df1['total_plugs']=[len(x) for x in df1['plugs']]
typed =  pd.DataFrame([Counter(x) for x in types]).sort_index(axis=1).fillna(0).astype(int).add_prefix('Plugs_')
df1 = df1.join(typed)
df1 = df1.drop(['plugs'], axis=1)

#Clean watts column into what levels of charging are available or if its not
watts = df1.apply(lambda row: conv_watt(row), axis=1)
df1["power_outputs_kw"] = watts
df1 = df1.drop(['kw'], axis=1)

df1.insert(loc = 4, column = "description", value=[np.nan for i in range(df1.shape[0])])
df1.insert(loc = 5, column = "parking", value=[np.nan for i in range(df1.shape[0])])
df1.insert(loc = 6, column = "pricing", value=[np.nan for i in range(df1.shape[0])])
df1.insert(loc = 10, column = "renewable_power_supply", value=[np.nan for i in range(df1.shape[0])])
df1.insert(loc = 17, column = "Plugs_Commando", value=[np.nan for i in range(df1.shape[0])])
df1.insert(loc = 20, column = "Plugs_Other", value=[np.nan for i in range(df1.shape[0])])
df1["source"] = "PlugShare_Scrape"
#Replace source date with current date
df1["source_date"] = "20/08/2022"

In [65]:
#Rename CSV as needed
df1.to_csv("Region.csv", index=False)

In [61]:
df1

Unnamed: 0,name,address,longitude,latitude,description,parking,pricing,contact,networks,total_plugs,...,Plugs_J1772,Plugs_Tesla,Plugs_Three_Phase,Plugs_Commando,Plugs_Type2,Plugs_Wall_AU_NZ,Plugs_Other,power_outputs_kw,source,source_date
0,Geelong Supercharger,"470-510 Princes Hwy, Geelong, VIC, Australia, ...",144.382451,-38.065192,,,,61280152834,[Supercharger],6,...,0,6,0,,0,0,,[250.0],PlugShare_Scrape,20/08/2022
1,Penguin Parade Visitor's Centre,"995 Ventnor Road Summerlands VIC 3922, Australia",145.148488,-38.505496,,,,,[Non-networked],2,...,0,0,0,,0,0,,[25.0],PlugShare_Scrape,20/08/2022
2,Mornington Supercharger,"75 Mornington-Tyabb Rd, Mornington VIC 3931, A...",145.051085,-38.234305,,,,61280152834,[Supercharger],6,...,0,6,0,,0,0,,[250.0],PlugShare_Scrape,20/08/2022
3,Kingston Village Square (DC Fast Charger),"Kingston Village Square, Grubb Rd, Ocean Grove...",144.540122,-38.246677,,,,1300518038,[Chargefox],4,...,0,0,0,,2,0,,"[50.0, unknown]",PlugShare_Scrape,20/08/2022
4,BIG4 Phillip Island Caravan Park,"24 Old Bridge Drive, Newhaven VIC 3925, Australia",145.356460,-38.516829,,,,0359567227,[Non-networked],2,...,0,0,0,,0,1,,[unknown],PlugShare_Scrape,20/08/2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,Holiday Inn Express Melbourne Southbank,"35-47 City Rd, Southbank VIC 3006, Australia",144.966652,-37.821895,,,,,[Non-networked],9,...,0,0,0,,9,0,,[unknown],PlugShare_Scrape,20/08/2022
246,Secure Parking (450 Flinders Lane),"450 Flinders Ln, Melbourne VIC 3000, Australia",144.958866,-37.818758,,,,,[Non-networked],10,...,0,0,0,,10,0,,[unknown],PlugShare_Scrape,20/08/2022
247,Rialto Car Park,"476 Flinders Ln, Melbourne VIC 3000, Australia",144.957613,-37.819100,,,,,[Non-networked],3,...,0,0,0,,2,1,,[unknown],PlugShare_Scrape,20/08/2022
248,Lorbek Luxury Cars,"327 Plummer St, Port Melbourne VIC 3207, Austr...",144.919764,-37.833209,,,,,[Everty],3,...,0,0,0,,3,0,,[22.0],PlugShare_Scrape,20/08/2022
