## EVAT Reliability Scoring & Sentiment Dashboard
**bold text**
This notebook performs data integration, reliability scoring for Electric Vehicle charging stations. Designed for the EVAT Capstone Project.

In [1]:
!pip install pymongo


Collecting pymongo
  Downloading pymongo-4.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.8.0-py3-none-any.whl.metadata (5.7 kB)
Downloading pymongo-4.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dnspython-2.8.0-py3-none-any.whl (331 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m331.1/331.1 kB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dnspython, pymongo
Successfully installed dnspython-2.8.0 pymongo-4.15.0


In [2]:
from pymongo import MongoClient

client = MongoClient("your_connection_string_here")
db = client["evat_database"]
charger_logs = db["charger_logs"]
feedback = db["user_feedback"]
stations = db["charging_stations"]

###  Step 1: Load Session Logs & Charger Metadata
We begin by loading session logs and metadata for chargers from different cities such as Dundee, Perth, and Palo Alto.

In [3]:

import pandas as pd

# Replace with actual paths
dundee_df = pd.read_csv("dundee.csv")
palo_df = pd.read_csv("palo_alto.csv")
perth_df = pd.read_csv("perth.csv")

# Peek at each one
print(dundee_df.columns)
print(palo_df.columns)
print(perth_df.columns)

  palo_df = pd.read_csv("palo_alto.csv")


Index(['_id', 'Charging event', 'CP ID', 'Connector', 'Start Date',
       'Start Time', 'End Date', 'End Time', 'Total kWh', 'Cost', 'Site',
       'Group', 'Model'],
      dtype='object')
Index(['Station Name', 'MAC Address', 'Org Name', 'Start Date',
       'Start Time Zone', 'End Date', 'End Time Zone',
       'Transaction Date (Pacific Time)', 'Total Duration (hh:mm:ss)',
       'Charging Time (hh:mm:ss)', 'Energy (kWh)', 'GHG Savings (kg)',
       'Gasoline Savings (gallons)', 'Port Type', 'Port Number', 'Plug Type',
       'EVSE ID', 'Address 1', 'City', 'State/Province', 'Postal Code',
       'Country', 'Latitude', 'Longitude', 'Currency', 'Fee', 'Ended By',
       'Plug In Event Id', 'Driver Postal Code', 'User ID', 'County',
       'System S/N', 'Model Number'],
      dtype='object')
Index(['_id', 'CP ID', 'Connector', 'Start Date', 'Start Time', 'End Date',
       'End Time', 'Total kWh', 'Site', 'Model'],
      dtype='object')


In [4]:
import pandas as pd
import numpy as np

# Step 1: Load the Dundee CSV
df = pd.read_csv("dundee.csv")  # Replace with your actual file path

# Step 2: Combine start and end datetime
df['start_datetime'] = pd.to_datetime(df['Start Date'] + ' ' + df['Start Time'])
df['end_datetime'] = pd.to_datetime(df['End Date'] + ' ' + df['End Time'])

# Step 3: Calculate session duration in hours
df['duration_hours'] = (df['end_datetime'] - df['start_datetime']).dt.total_seconds() / 3600

# Step 4: Simulate error sessions – anything under 15 minutes
df['is_error'] = df['duration_hours'] < 0.25  # 0.25 hours = 15 minutes

# Step 5: Group by charger ID to calculate stats
reliability_df = df.groupby('CP ID').agg(
    total_sessions=('duration_hours', 'count'),
    total_uptime_hours=('duration_hours', 'sum'),
    error_sessions=('is_error', 'sum')
).reset_index()

# Step 6: Normalize uptime to percentage (vs. highest)
max_uptime = reliability_df['total_uptime_hours'].max()
reliability_df['uptime_percent'] = (reliability_df['total_uptime_hours'] / max_uptime) * 100

# Step 7: Simulate average user rating based on uptime %
reliability_df['avg_rating'] = np.where(
    reliability_df['uptime_percent'] > 90, 4.5,
    np.where(reliability_df['uptime_percent'] > 70, 4.0,
             np.where(reliability_df['uptime_percent'] > 50, 3.0,
                      np.where(reliability_df['uptime_percent'] > 30, 2.0, 1.0)))
)

# Step 8: Export to CSV (optional)
reliability_df.to_csv("reliability_scoring_sprint1.csv", index=False)

# Step 9: View the first few rows
print(reliability_df.head())


  df['start_datetime'] = pd.to_datetime(df['Start Date'] + ' ' + df['Start Time'])
  df['end_datetime'] = pd.to_datetime(df['End Date'] + ' ' + df['End Time'])


   CP ID  total_sessions  total_uptime_hours  error_sessions  uptime_percent  \
0  50230              16         3789.916667               4       93.396393   
1  50234               2           21.850000               0        0.538458   
2  50236               5          143.066667               0        3.525648   
3  50238               8          113.266667               1        2.791275   
4  50240              21          372.083333               1        9.169395   

   avg_rating  
0         4.5  
1         1.0  
2         1.0  
3         1.0  
4         1.0  


In [5]:
import pandas as pd
import numpy as np

# Step 1: Load the Palo Alto dataset
palo_df = pd.read_csv("palo_alto.csv")  # Replace with your file path

# Step 2: Convert 'Total Duration (hh:mm:ss)' to hours
def duration_to_hours(duration_str):
    try:
        h, m, s = map(int, duration_str.split(':'))
        return h + m / 60 + s / 3600
    except:
        return 0

palo_df['duration_hours'] = palo_df['Total Duration (hh:mm:ss)'].apply(duration_to_hours)

# Step 3: Flag sessions under 15 minutes as errors
palo_df['is_error'] = palo_df['duration_hours'] < 0.25

# Step 4: Group by charger ID ('EVSE ID') to calculate stats
reliability_palo = palo_df.groupby('EVSE ID').agg(
    total_sessions=('duration_hours', 'count'),
    total_uptime_hours=('duration_hours', 'sum'),
    error_sessions=('is_error', 'sum')
).reset_index()

# Step 5: Calculate uptime percentage
max_uptime = reliability_palo['total_uptime_hours'].max()
reliability_palo['uptime_percent'] = (reliability_palo['total_uptime_hours'] / max_uptime) * 100

# Step 6: Simulate average rating based on uptime %
reliability_palo['avg_rating'] = np.where(
    reliability_palo['uptime_percent'] > 90, 4.5,
    np.where(reliability_palo['uptime_percent'] > 70, 4.0,
             np.where(reliability_palo['uptime_percent'] > 50, 3.0,
                      np.where(reliability_palo['uptime_percent'] > 30, 2.0, 1.0)))
)

# Step 7: Export to CSV
reliability_palo.to_csv("palo_alto_reliability.csv", index=False)

# View output
print(reliability_palo.head())


  palo_df = pd.read_csv("palo_alto.csv")  # Replace with your file path


   EVSE ID  total_sessions  total_uptime_hours  error_sessions  \
0   3792.0            4369        10884.250833             159   
1   4318.0            3643         8898.387500             154   
2   6123.0            3623        10146.903333             136   
3   6626.0            3941        11304.371944             157   
4   6859.0            6040        13898.154444             235   

   uptime_percent  avg_rating  
0       33.738516         2.0  
1       27.582825         1.0  
2       31.452919         2.0  
3       35.040788         2.0  
4       43.080880         2.0  


In [6]:
import pandas as pd
import numpy as np

# Load the Perth dataset
perth_df = pd.read_csv("perth.csv")  # Change to your actual file path

# 1. Parse datetime (auto-detect format)
perth_df['start_datetime'] = pd.to_datetime(
    perth_df['Start Date'] + ' ' + perth_df['Start Time'], errors='coerce'
)
perth_df['end_datetime'] = pd.to_datetime(
    perth_df['End Date'] + ' ' + perth_df['End Time'], errors='coerce'
)

# 2. Calculate session duration in hours
perth_df['duration_hours'] = (perth_df['end_datetime'] - perth_df['start_datetime']).dt.total_seconds() / 3600

#  3. Remove negative durations
perth_df = perth_df[perth_df['duration_hours'] >= 0]

# 4. Simulate errors – sessions under 15 minutes
perth_df['is_error'] = perth_df['duration_hours'] < 0.25

# 5. Group by charger ID
reliability_perth = perth_df.groupby('CP ID').agg(
    total_sessions=('duration_hours', 'count'),
    total_uptime_hours=('duration_hours', 'sum'),
    error_sessions=('is_error', 'sum')
).reset_index()

# 6. Normalize uptime
max_uptime = reliability_perth['total_uptime_hours'].max()
reliability_perth['uptime_percent'] = (reliability_perth['total_uptime_hours'] / max_uptime) * 100

# 7. Simulate reliability rating
reliability_perth['avg_rating'] = np.where(
    reliability_perth['uptime_percent'] > 90, 4.5,
    np.where(reliability_perth['uptime_percent'] > 70, 4.0,
             np.where(reliability_perth['uptime_percent'] > 50, 3.0,
                      np.where(reliability_perth['uptime_percent'] > 30, 2.0, 1.0)))
)

# 8. Export to CSV
reliability_perth.to_csv("perth_reliability_cleaned.csv", index=False)

# 9. View sample
print(reliability_perth.head())


  perth_df['start_datetime'] = pd.to_datetime(
  perth_df['end_datetime'] = pd.to_datetime(


   CP ID  total_sessions  total_uptime_hours  error_sessions  uptime_percent  \
0  50245            1161          590.533333             289        2.808050   
1  50275             401        21030.016667               9      100.000000   
2  50276             596         3095.500000              25       14.719437   
3  50277             349         2571.883333              19       12.229583   
4  50278             392         2687.283333              22       12.778322   

   avg_rating  
0         1.0  
1         4.5  
2         1.0  
3         1.0  
4         1.0  


###  Step 2: Load & Clean Charger Metadata
This section loads the charger metadata which will be joined with session reliability data.

In [7]:
import pandas as pd

# Load your charger info dataset
charger_info = pd.read_csv("charger_info_mel.csv")

# Add unique Charger IDs
charger_info["Charger ID"] = ["MEL" + str(i+1).zfill(3) for i in range(len(charger_info))]


In [8]:
import numpy as np
from datetime import datetime, timedelta

session_logs = []
np.random.seed(42)

for charger_id in charger_info["Charger ID"]:
    for _ in range(5):
        start_time = datetime(2025, 7, 1) + timedelta(
            days=np.random.randint(0, 10),
            hours=np.random.randint(0, 24),
            minutes=np.random.randint(0, 60)
        )
        duration = np.random.randint(5, 120)  # in minutes
        end_time = start_time + timedelta(minutes=duration)
        status = "Error" if duration < 15 else np.random.choice(["Available", "In Use"])
        rating = round(np.random.uniform(1.0, 2.5), 1) if duration < 15 else round(np.random.uniform(3.5, 5.0), 1)
        session_logs.append({
            "Charger ID": charger_id,
            "Start Time": start_time,
            "End Time": end_time,
            "Duration (min)": duration,
            "Status": status,
            "User Rating": rating
        })

session_df = pd.DataFrame(session_logs)


###  Step 3: Identify Error Sessions
We flag session errors based on short durations (< 15 minutes).

In [9]:
session_df["Is Error"] = session_df["Duration (min)"] < 15


###  Step 4: Aggregate Charger Reliability Stats
We calculate the reliability of each charger using error rates, total sessions, and uptime.

In [10]:
reliability_summary = session_df.groupby("Charger ID").agg(
    total_sessions=("Duration (min)", "count"),
    total_uptime_minutes=("Duration (min)", "sum"),
    error_sessions=("Is Error", "sum"),
    avg_rating=("User Rating", "mean")
).reset_index()

# Convert minutes to hours
reliability_summary["total_uptime_hours"] = reliability_summary["total_uptime_minutes"] / 60

# Normalize uptime to get percent
max_uptime = reliability_summary["total_uptime_hours"].max()
reliability_summary["uptime_percent"] = (reliability_summary["total_uptime_hours"] / max_uptime) * 100


###  Step 5:  Charger Reliability Stats from Open Charge Map (OCM)
We calculate the reliability of each charger using error rates, total sessions, and uptime.

In [11]:
import requests
import pandas as pd

API_KEY = "41b4c236-dae1-4f8c-8a5f-5be2119c5c65"

url = "https://api.openchargemap.io/v3/poi/"

params = {
    "output": "json",
    "latitude": -37.8136,
    "longitude": 144.9631,
    "distance": 50,             # 50 km radius
    "distanceunit": "KM",
    "maxresults": 1000,         # Make sure this is spelled exactly like this
    "key": API_KEY
}

response = requests.get(url, params=params)

if response.status_code == 200:
    data = response.json()

    station_list = []
    for station in data:
        name = station['AddressInfo'].get('Title', 'Unknown')
        status = station.get('StatusType', {}).get('Title', 'Unknown')
        lat = station['AddressInfo'].get('Latitude', None)
        lon = station['AddressInfo'].get('Longitude', None)
        address = station['AddressInfo'].get('AddressLine1', '')
        power_kw = None
        connection_type = None

        if station.get('Connections'):
            power_kw = station['Connections'][0].get('PowerKW', None)
            connection_type = station['Connections'][0].get('ConnectionType', {}).get('Title', None)

        station_list.append({
            "Name": name,
            "Status": status,
            "Latitude": lat,
            "Longitude": lon,
            "Address": address,
            "PowerKW": power_kw,
            "ConnectionType": connection_type
        })

    df = pd.DataFrame(station_list)
    print(f"Total stations retrieved: {df.shape[0]}")
    display(df.head(10))  # Display first 10
else:
    print("API Error:", response.status_code)


Total stations retrieved: 115


Unnamed: 0,Name,Status,Latitude,Longitude,Address,PowerKW,ConnectionType
0,RMIT - City Campus,Operational,-37.806248,144.964016,17-21 Cardigan Street,75.0,CCS (Type 2)
1,Lorbek Luxury Cars,Operational,-37.821362,144.951481,30 Prohasky Street,11.0,Type 2 (Tethered Connector)
2,Tesla Supercharger South Melbourne,Operational,-37.83227,144.960963,Clarendon Street,250.0,CCS (Type 2)
3,Collingwood Library,Operational,-37.804466,144.993244,13 Stanton St,50.0,CCS (Type 2)
4,Abbotsford Supercharger,Operational,-37.809085,144.995885,"313 Victoria Street, Abbotsford Victoria Austr...",250.0,CCS (Type 2)
5,JET Charge Office,Operational,-37.828584,144.934439,350 Bridge Street,80.0,CHAdeMO
6,Evie Richmond Library,Operational,-37.825531,144.998745,Swan Street,50.0,CCS (Type 2)
7,650 Church Street Building 5,Operational,-37.831886,144.995867,Church Street,120.0,CCS (Type 2)
8,Woolworths Fishermans Bend,Operational,-37.831348,144.929827,Plumber Street,50.0,CCS (Type 2)
9,Middy’s Port Melbourne,Operational,-37.82819,144.925654,2 Thackray Road,25.0,CCS (Type 2)


## Step 6: Scoring stations based on Status and Power

In [12]:
# Step 1: Score stations based on Status and Power
df['status_score'] = df['Status'].apply(lambda x: 100 if x == 'Operational' else 0)
df['power_score'] = (df['PowerKW'] / df['PowerKW'].max()) * 100
df['reliability_score'] = (df['status_score'] * 0.6) + (df['power_score'] * 0.4)

# Step 2: Sort and show top 10 most reliable
top_stations = df.sort_values(by='reliability_score', ascending=False).head(10)

# Step 3: Display output
print("Top 10 most reliable stations (based on real API data):")
display(top_stations[['Name', 'Status', 'PowerKW', 'reliability_score']])


Top 10 most reliable stations (based on real API data):


Unnamed: 0,Name,Status,PowerKW,reliability_score
66,Shell Coles Express Taylors Lakes,Operational,350.0,100.0
87,Heatherton Road,Operational,350.0,100.0
34,Tesla Supercharger Brighton East,Operational,250.0,88.571429
4,Abbotsford Supercharger,Operational,250.0,88.571429
42,Tesla Supercharger Campbellfield,Operational,250.0,88.571429
49,Tesla Supercharger Oakleigh Repair Centre,Operational,250.0,88.571429
114,Mornington Super Charger,Operational,250.0,88.571429
45,Telsa Supercharger Box Hill,Operational,250.0,88.571429
2,Tesla Supercharger South Melbourne,Operational,250.0,88.571429
59,Ampol Derrimut,Operational,180.0,80.571429


In [13]:
# Step 1: Load your internal dataset
df_evat = pd.read_csv("charger_info_mel.csv")  # Ensure this CSV is in the same folder

# Step 2: Normalize station names for matching
evat_names = df_evat['Charger Name'].str.lower().str.strip()
ocm_names = df['Name'].str.lower().str.strip()

# Step 3: Create a column to flag if the EVAT station exists in OCM API result
df_evat['Exists_in_OCM'] = df_evat['Charger Name'].str.lower().str.strip().isin(ocm_names)

# Step 4: Display results
print(" Stations in EVAT dataset:")
display(df_evat[['Charger Name', 'Exists_in_OCM']])

# Optional: Display only unmatched stations
print("\n Stations in EVAT dataset NOT found in OCM:")
display(df_evat[~df_evat['Exists_in_OCM']][['Charger Name']])


 Stations in EVAT dataset:


Unnamed: 0,Charger Name,Exists_in_OCM
0,RMIT - City Campus,True
1,100 St Kilda Rd,False
2,11 Nicholson Street,False
3,Lorbek Luxury Cars,True
4,Tesla Supercharger South Melbourne,True
...,...,...
257,CTR Sea Lake,False
258,Evie Dartmoor,False
259,Anzac Ave Parking,False
260,Nelson St Parking/Charging,False



 Stations in EVAT dataset NOT found in OCM:


Unnamed: 0,Charger Name
1,100 St Kilda Rd
2,11 Nicholson Street
118,RACV Healsville
119,Evie Healsville
120,North Bellarine Aquatic Centre
...,...
257,CTR Sea Lake
258,Evie Dartmoor
259,Anzac Ave Parking
260,Nelson St Parking/Charging


## Step 7: Retrieving station ID's from OCM

In [14]:
import requests
import pandas as pd

#  Step 1: API config
API_KEY = "41b4c236-dae1-4f8c-8a5f-5be2119c5c65"
url = "https://api.openchargemap.io/v3/poi/"
params = {
    "output": "json",
    "latitude": -37.8136,       # Melbourne
    "longitude": 144.9631,
    "distance": 50,
    "distanceunit": "KM",
    "maxresults": 1000,
    "key": API_KEY
}

#  Step 2: Make API call
response = requests.get(url, params=params)

if response.status_code == 200:
    data = response.json()

    station_list = []
    for station in data:
        name = station['AddressInfo'].get('Title', 'Unknown')
        status = station.get('StatusType', {}).get('Title', 'Unknown')
        lat = station['AddressInfo'].get('Latitude')
        lon = station['AddressInfo'].get('Longitude')
        address = station['AddressInfo'].get('AddressLine1', '')
        power_kw = None
        connection_type = None

        if station.get('Connections'):
            power_kw = station['Connections'][0].get('PowerKW')
            connection_type = station['Connections'][0].get('ConnectionType', {}).get('Title')

        station_list.append({
            "Name": name,
            "Status": status,
            "Latitude": lat,
            "Longitude": lon,
            "Address": address,
            "PowerKW": power_kw,
            "ConnectionType": connection_type
        })

    #  Step 3: Create DataFrame
    df = pd.DataFrame(station_list)

    #  Step 4: Display all stations
    print(f" Total stations retrieved from OCM: {df.shape[0]}")
    display(df)

    # Optional: Save to CSV
    df.to_csv("melbourne_ocm_stations.csv", index=False)
    print(" Saved as 'melbourne_ocm_stations.csv'")

else:
    print(" API Error:", response.status_code)


 Total stations retrieved from OCM: 115


Unnamed: 0,Name,Status,Latitude,Longitude,Address,PowerKW,ConnectionType
0,RMIT - City Campus,Operational,-37.806248,144.964016,17-21 Cardigan Street,75.0,CCS (Type 2)
1,Lorbek Luxury Cars,Operational,-37.821362,144.951481,30 Prohasky Street,11.0,Type 2 (Tethered Connector)
2,Tesla Supercharger South Melbourne,Operational,-37.832270,144.960963,Clarendon Street,250.0,CCS (Type 2)
3,Collingwood Library,Operational,-37.804466,144.993244,13 Stanton St,50.0,CCS (Type 2)
4,Abbotsford Supercharger,Operational,-37.809085,144.995885,"313 Victoria Street, Abbotsford Victoria Austr...",250.0,CCS (Type 2)
...,...,...,...,...,...,...,...
110,Engie Cranbourne Park,Operational,-38.109075,145.281762,Cranbourne Drive,120.0,CHAdeMO
111,Wallan,Operational,-37.413967,144.979107,85 High Street,50.0,CCS (Type 2)
112,ClydeStone Square,Operational,-38.093410,145.341714,Matterhorn Drive,50.0,CCS (Type 2)
113,Evie Mornington,Operational,-38.224499,145.039643,Cromwell Street,50.0,CCS (Type 2)


 Saved as 'melbourne_ocm_stations.csv'


## Step 8: Top 10 Most Reliable Charging Stations in Melbourne (OCM):

In [15]:
import pandas as pd

#  1. Clean PowerKW values
df['PowerKW'] = pd.to_numeric(df['PowerKW'], errors='coerce').fillna(0)

#  2. Clean Status (default to 'Unknown' if missing)
df['Status'] = df['Status'].fillna('Unknown')

#  3. Score by status (Operational = 100, else 0)
df['status_score'] = df['Status'].apply(lambda x: 100 if x == 'Operational' else 0)

#  4. Score by normalized power
max_power = df['PowerKW'].max() if df['PowerKW'].max() > 0 else 1
df['power_score'] = (df['PowerKW'] / max_power) * 100

#  5. Final weighted reliability score
df['reliability_score'] = (df['status_score'] * 0.6) + (df['power_score'] * 0.4)

#  6. Sort by reliability score
scored_df = df.sort_values(by='reliability_score', ascending=False)

#  7. Select columns for display
final_df = scored_df[['Name', 'Status', 'PowerKW', 'ConnectionType', 'Latitude', 'Longitude', 'reliability_score']]

#  8. Display and save
print(" Top 10 Most Reliable Charging Stations in Melbourne (OCM):")
display(final_df.head(10))

# Save the full results
final_df.to_csv("melbourne_ocm_scored.csv", index=False)
print(" Scored data saved to 'melbourne_ocm_scored.csv'")


 Top 10 Most Reliable Charging Stations in Melbourne (OCM):


Unnamed: 0,Name,Status,PowerKW,ConnectionType,Latitude,Longitude,reliability_score
66,Shell Coles Express Taylors Lakes,Operational,350.0,CCS (Type 2),-37.696742,144.782052,100.0
87,Heatherton Road,Operational,350.0,CCS (Type 2),-37.971281,145.223697,100.0
2,Tesla Supercharger South Melbourne,Operational,250.0,CCS (Type 2),-37.83227,144.960963,88.571429
42,Tesla Supercharger Campbellfield,Operational,250.0,CCS (Type 2),-37.689125,144.956625,88.571429
45,Telsa Supercharger Box Hill,Operational,250.0,CCS (Type 2),-37.837816,145.134685,88.571429
49,Tesla Supercharger Oakleigh Repair Centre,Operational,250.0,CCS (Type 2),-37.915683,145.103279,88.571429
34,Tesla Supercharger Brighton East,Operational,250.0,CCS (Type 2),-37.907528,145.011178,88.571429
4,Abbotsford Supercharger,Operational,250.0,CCS (Type 2),-37.809085,144.995885,88.571429
114,Mornington Super Charger,Operational,250.0,CCS (Type 2),-38.234664,145.050947,88.571429
43,Ampol Foodary Altona North,Operational,180.0,CCS (Type 2),-37.846404,144.808531,80.571429


 Scored data saved to 'melbourne_ocm_scored.csv'


## User Feedback collection

In [17]:
import requests
import pandas as pd
import time

#  Replace with your valid API key
API_KEY = "41b4c236-dae1-4f8c-8a5f-5be2119c5c65"

headers = {
    "User-Agent": "EVAT-Student-Project/1.0",
    "X-API-Key": API_KEY
}

url = "https://api.openchargemap.io/v3/poi/"
params = {
    "output": "json",
    "latitude": -37.8136,
    "longitude": 144.9631,
    "distance": 50,
    "distanceunit": "KM",
    "maxresults": 1000
}

response = requests.get(url, headers=headers, params=params)

if response.status_code == 200:
    data = response.json()

    feedback_list = []
    for station in data:
        name = station['AddressInfo'].get('Title', 'Unknown')
        comments = station.get('UserComments')

        if comments:
            for comment in comments:
                feedback_list.append({
                    "Station Name": name,
                    "Rating": comment.get("Rating", None),
                    "Comment": comment.get("Comment", ""),
                    "Date": comment.get("DateCreated", "")
                })
        time.sleep(0.1)

    # Save feedback to CSV
    df_feedback = pd.DataFrame(feedback_list)
    df_feedback.to_csv("melbourne_ocm_feedback.csv", index=False)
    print(" Feedback saved to 'melbourne_ocm_feedback.csv'")

else:
    print(" API error:", response.status_code)


 Feedback saved to 'melbourne_ocm_feedback.csv'


In [18]:
import requests
import pandas as pd
import time

API_KEY = "41b4c236-dae1-4f8c-8a5f-5be2119c5c65"
BASE_URL = "https://api.openchargemap.io/v3/poi/"

# Step 1: Get list of POIs around Melbourne
params = {
    "output": "json",
    "latitude": -37.8136,
    "longitude": 144.9631,
    "distance": 50,
    "distanceunit": "KM",
    "maxresults": 1000,
    "compact": False,
    "verbose": True,
    "key": API_KEY
}

response = requests.get(BASE_URL, params=params)
data = response.json()

# Step 2: Extract station ID and loop to get feedback
feedback_list = []

print(f"Total stations to check: {len(data)}")

for i, station in enumerate(data):
    station_name = station.get('AddressInfo', {}).get('Title', 'Unknown')
    comments = station.get('UserComments', [])

    if comments:
        for comment in comments:
            feedback_list.append({
                "Station Name": station_name,
                "Rating": comment.get('Rating', ''),
                "Comment": comment.get('Comment', ''),
                "Date": comment.get('DateCreated', '')
            })

    # OPTIONAL: Delay to avoid rate-limiting
    time.sleep(0.2)

# Step 3: Save to CSV
feedback_df = pd.DataFrame(feedback_list)
feedback_df.to_csv("melbourne_ocm_feedback.csv", index=False)
print(" Feedback scraped and saved as 'melbourne_ocm_feedback.csv'")


Total stations to check: 115
 Feedback scraped and saved as 'melbourne_ocm_feedback.csv'


In [34]:
import pandas as pd

# Step 1: Load the dataset
df = pd.read_csv("EVAT-Final.csv")

# Step 2: View missing values
print(" Missing values per column:\n", df.isnull().sum())

# Step 3: Drop rows with missing critical info
df = df.dropna(subset=["Charger Name", "Address"])  # These are essential

# Step 4: Clean Power column
df['Power (kW)'] = pd.to_numeric(df['Power (kW)'], errors='coerce')
df['Power (kW)'] = df['Power (kW)'].fillna(df['Power (kW)'].median())

# Step 5: Fill other missing values
df['State'] = df['State'].fillna("VIC")
df['Suburb'] = df['Suburb'].fillna("Unknown")
df['Postal Code'] = df['Postal Code'].fillna("0000")
df['Usage Cost'] = df['Usage Cost'].fillna("Unknown")
df['Number of Points'] = df['Number of Points'].fillna(1)
df['Connection Types'] = df['Connection Types'].fillna("Unknown")
df['latitude'] = df['latitude'].fillna(method='ffill')  # forward-fill lat/lon
df['longitude'] = df['longitude'].fillna(method='ffill')

# Step 6: Keep reliability_score blank if not matched (no change needed)

# Step 7: Save cleaned dataset
df.to_csv("EVAT-Final-Cleaned.csv", index=False)
print(" Cleaned dataset saved as 'EVAT-Final-Cleaned.csv'")


 Missing values per column:
 Charger ID             0
Charger Name           0
Address                2
Suburb                16
State                  7
Postal Code           43
Power (kW)             4
Usage Cost            66
Number of Points       2
Connection Types       1
latitude              34
longitude             34
Normalized Name        0
reliability_score    146
dtype: int64
 Cleaned dataset saved as 'EVAT-Final-Cleaned.csv'


  df['latitude'] = df['latitude'].fillna(method='ffill')  # forward-fill lat/lon
  df['longitude'] = df['longitude'].fillna(method='ffill')


In [21]:
!pip install fuzzywuzzy[speedup]


Collecting fuzzywuzzy[speedup]
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting python-levenshtein>=0.12 (from fuzzywuzzy[speedup])
  Downloading python_levenshtein-0.27.1-py3-none-any.whl.metadata (3.7 kB)
Collecting Levenshtein==0.27.1 (from python-levenshtein>=0.12->fuzzywuzzy[speedup])
  Downloading levenshtein-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.27.1->python-levenshtein>=0.12->fuzzywuzzy[speedup])
  Downloading rapidfuzz-3.14.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading python_levenshtein-0.27.1-py3-none-any.whl (9.4 kB)
Downloading levenshtein-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (159 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.9/159.9 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
D

In [26]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Load EVAT dataset
df = pd.read_csv("EVAT-Final-Scored-Filled.csv")

# -----------------------------
# Simulate User Feedback & Rating
# -----------------------------
feedback_map = {
    'excellent': ["Fantastic experience!", "Smooth and quick charging.", "Highly reliable station."],
    'good': ["Works most of the time.", "Decent performance.", "Rare issues, overall okay."],
    'average': ["Occasional interruptions.", "Can be unreliable.", "Slow charging at times."],
    'poor': ["Frequent problems.", "Needs maintenance.", "Wouldn't recommend."]
}

def generate_feedback(score):
    if pd.isna(score):
        return "", np.nan
    if score >= 90:
        return random.choice(feedback_map['excellent']), round(random.uniform(4.5, 5.0), 1)
    elif score >= 70:
        return random.choice(feedback_map['good']), round(random.uniform(3.5, 4.4), 1)
    elif score >= 50:
        return random.choice(feedback_map['average']), round(random.uniform(2.5, 3.4), 1)
    else:
        return random.choice(feedback_map['poor']), round(random.uniform(1.0, 2.4), 1)

df[['User_Feedback', 'Rating']] = df['reliability_score'].apply(
    lambda x: pd.Series(generate_feedback(x))
)

# -----------------------------
# Simulate Uptime/Downtime & Status (realistic)
# -----------------------------
def simulate_uptime(score):
    if pd.isna(score):
        return round(random.uniform(70, 95), 2)
    if score >= 90:
        return round(random.gauss(98.5, 1), 2)
    elif score >= 75:
        return round(random.gauss(95, 2), 2)
    elif score >= 60:
        return round(random.gauss(90, 3), 2)
    elif score >= 50:
        return round(random.gauss(85, 5), 2)
    else:
        return round(random.gauss(75, 6), 2)

df['Uptime_%'] = df['reliability_score'].apply(simulate_uptime).clip(60, 100)
df['Downtime_%'] = (100 - df['Uptime_%']).round(2)
df['Status'] = df['Uptime_%'].apply(lambda x: "Online" if x >= 93 else "Needs Maintenance")

# -----------------------------
# Simulate Last Checked Timestamp
# -----------------------------
def random_timestamp():
    delta = timedelta(days=random.randint(0, 15), hours=random.randint(0, 23))
    return (datetime.now() - delta).strftime('%Y-%m-%d %H:%M:%S')

df['Last_Checked'] = [random_timestamp() for _ in range(len(df))]

# -----------------------------
# Save Final Enriched Dataset
# -----------------------------
df.to_csv("EVAT-Final-Enriched.csv", index=False)
print(" Final enriched dataset saved as 'EVAT-Final-Enriched.csv'")


 Final enriched dataset saved as 'EVAT-Final-Enriched.csv'


Step 1: Create Weighted Reliability + Sentiment-Based Scoring

## Importing Finalised EVAT Dataset Enriched

In [27]:
import pandas as pd

df = pd.read_csv("EVAT-Final-Enriched.csv")
print(df.columns.tolist())


['Charger ID', 'Charger Name', 'Address', 'Suburb', 'State', 'Postal Code', 'Power (kW)', 'Usage Cost', 'Number of Points', 'Connection Types', 'latitude', 'longitude', 'Normalized Name', 'reliability_score', 'User_Feedback', 'Rating', 'Uptime_%', 'Downtime_%', 'Status', 'Last_Checked']


## Generating HTML Page for EVAT Chargers Map

In [29]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster

# === Step 1: Load your enriched EVAT dataset ===
df = pd.read_csv("EVAT-Final-Enriched.csv")

# === Step 2: Drop rows without coordinates ===
df = df.dropna(subset=["latitude", "longitude"])

# === Step 3: Initialize a base Folium map centered around Melbourne ===
melbourne_coords = [-37.8136, 144.9631]
ev_map = folium.Map(location=melbourne_coords, zoom_start=11, tiles='CartoDB positron')

# === Step 4: Add clustering for better pin grouping ===
marker_cluster = MarkerCluster().add_to(ev_map)

# === Step 5: Add each charger as a pin with popup info ===
for _, row in df.iterrows():
    popup_text = f"""
    <b>{row.get('Charger Name', 'Unknown')}</b><br>
    <b>Address:</b> {row.get('Address', 'N/A')}<br>
    <b>Suburb:</b> {row.get('Suburb', 'N/A')}<br>
    <b>Power:</b> {row.get('Power (kW)', 'N/A')} kW<br>
    <b>Uptime:</b> {row.get('Uptime_%', 'N/A')}%<br>
    <b>Downtime:</b> {row.get('Downtime_%', 'N/A')}%<br>
    <b>Rating:</b> {row.get('Rating', 'N/A')} / 5<br>
    <b>Reliability Score:</b> {row.get('reliability_score', 'N/A')}<br>
    <b>Comment:</b> {row.get('Comment', 'N/A')}
    """

    folium.Marker(
        location=[row["latitude"], row["longitude"]],
        popup=folium.Popup(popup_text, max_width=350),
        icon=folium.Icon(color="blue", icon="bolt", prefix="fa")
    ).add_to(marker_cluster)

# === Step 6: Save the map to an HTML file ===
ev_map.save("EVAT_Chargers_Map.html")
print(" EVAT_Chargers_Map.html has been generated.")


 EVAT_Chargers_Map.html has been generated.
