Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove pandas dependency by decoupling csv writing from dataframes #176

Merged
merged 28 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3c3ee1b
Remove experiment app run files
machallboyd Apr 24, 2023
14cd54b
[#164] Start a boilerplate csv writer
machallboyd Apr 24, 2023
91dd683
[#164] refactor unused(?) get_events function
machallboyd Apr 24, 2023
2847878
[#164] put hive_cosim back
machallboyd Apr 24, 2023
7d9e83d
[#164] convert time_step_stats_handler to no pandas
machallboyd Apr 24, 2023
1cbde3f
[#164] remove vehicle charge pandas import
machallboyd Apr 24, 2023
d0b6a1f
[#164] strip out more reporting python
machallboyd Apr 24, 2023
a18a845
[#164] Fix return of time step data
machallboyd Apr 24, 2023
32ea944
[#164] Polymorphism for to_csv
machallboyd Apr 24, 2023
5bbb6e8
[#164] Correct docstrings to reflect lack of pandas
machallboyd Apr 24, 2023
3443d40
[#164] Correct type hints
machallboyd Apr 25, 2023
9399c00
[#164] Use Sequence instead of list for type hint
machallboyd Apr 25, 2023
1b254a6
[#164] Simplify test to satisfy mypy
machallboyd Apr 25, 2023
88a5e1d
[#164] Another stab at sorting out types
machallboyd Apr 25, 2023
b35ea19
[#164] Type hints: data for tabular format can be empty
machallboyd Apr 25, 2023
1bcf3a3
[#164] Type hints: interstitial commit
machallboyd Apr 25, 2023
9c53543
[#164] Type hints: Better express empty sequences
machallboyd Apr 25, 2023
0475d7c
Merge branch 'main' into feature/ditch-pandas
machallboyd Apr 25, 2023
4d87af2
[#164] Type hints: Another stab and fixing it
machallboyd Apr 25, 2023
6e59ed7
[#164] Drastically cut down on the specificity of type hints
machallboyd Apr 25, 2023
388c59e
[#164] Correct for Optional
machallboyd Apr 25, 2023
4e9c2dc
[#164] Back to 3.8 style for type hint
machallboyd Apr 25, 2023
a0fae6f
[#164] Remove unused imports
machallboyd Apr 25, 2023
703e77a
Black reformat
machallboyd Apr 25, 2023
7281ec4
Correct import for 3.8 hinting
machallboyd Apr 25, 2023
002d680
add run_batch back in
nreinicke Apr 25, 2023
1766148
Merge branch 'main' into feature/ditch-pandas
nreinicke Apr 25, 2023
94e1cd1
Merge branch 'main' into feature/ditch-pandas
nreinicke Apr 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 0 additions & 110 deletions nrel/hive/app/run_batch.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we want to bring this back since we supply a command line tool that uses it:

hive-batch = "nrel.hive.app.run_batch:run"

But, this doesn't use pandas so hopefully that's a trivial ask.

This file was deleted.

114 changes: 0 additions & 114 deletions nrel/hive/app/run_tune.py

This file was deleted.

38 changes: 16 additions & 22 deletions nrel/hive/reporting/handler/time_step_stats_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
import logging
import os
from collections import Counter
from collections.abc import Sequence
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Dict, FrozenSet, List, Optional
from typing import TYPE_CHECKING, Callable, Dict, FrozenSet, Optional, List

import numpy as np
import pandas as pd
from immutables import Map
from pandas import DataFrame

from nrel.hive.reporting.handler.handler import Handler
from nrel.hive.reporting.report_type import ReportType
from nrel.hive.state.vehicle_state.vehicle_state_type import VehicleStateType
from nrel.hive.util.io import to_csv, to_csv_dicts

if TYPE_CHECKING:
from nrel.hive.config import HiveConfig
Expand Down Expand Up @@ -42,7 +42,7 @@ def __init__(

if config.global_config.log_time_step_stats:
self.log_time_step_stats = True
self.data: List[Dict[str, Any]] = []
self.data: list = []
self.time_step_stats_outpath = scenario_output_directory.joinpath(
f"{file_name}_all.csv"
)
Expand All @@ -54,7 +54,7 @@ def __init__(
self.fleets_timestep_stats_outpath = scenario_output_directory.joinpath(
"fleet_time_step_stats/"
)
self.fleets_data: Dict[str, List[Dict[str, Any]]] = {}
self.fleets_data: dict = {}
for fleet_id in fleet_ids:
if fleet_id is None:
self.fleets_data["none"] = []
Expand All @@ -63,30 +63,25 @@ def __init__(
else:
self.log_fleet_time_step_stats = False

def get_time_step_stats(self) -> Optional[DataFrame]:
def get_time_step_stats(self) -> list:
"""
return a DataFrame of the time step level statistics.

:return: the time step stats DataFrame
"""
if not self.log_time_step_stats:
return None

return DataFrame(self.data)
return self.data

def get_fleet_time_step_stats(
self,
) -> Map[MembershipId, DataFrame]:
) -> Map[MembershipId, Sequence]:
"""
return an immutable map of time step stat DataFrames by membership id.
return an immutable map of time step stat data by membership id.

:return: the immutable map containing time step stats DataFrames by membership id
:return: the immutable map containing time step stats data by membership id
"""
result = Map(
{
fleet_id: DataFrame(data) if len(data) > 0 else None
for fleet_id, data in self.fleets_data.items()
}
{fleet_id: data if data else None for fleet_id, data in self.fleets_data.items()}
)
return result

Expand Down Expand Up @@ -369,24 +364,23 @@ def _get_report_filter_func(

def close(self, runner_payload: RunnerPayload):
"""
saves all time step stat DataFrames as csv files to the scenario output directory.
saves all time step stat data as csv files to the scenario output directory.

:return:
"""
if self.log_time_step_stats:
pd.DataFrame.to_csv(
to_csv_dicts(
self.get_time_step_stats(),
self.time_step_stats_outpath,
index=False,
)
log.info(f"time step stats written to {self.time_step_stats_outpath}")

if self.log_fleet_time_step_stats:
os.mkdir(self.fleets_timestep_stats_outpath)
for fleet_id, fleet_df in self.get_fleet_time_step_stats().items():
if fleet_df is not None:
for fleet_id, fleet_data in self.get_fleet_time_step_stats().items():
if fleet_data is not None:
outpath = self.fleets_timestep_stats_outpath.joinpath(
f"{self.file_name}_{fleet_id}.csv"
)
pd.DataFrame.to_csv(fleet_df, outpath, index=False)
to_csv(fleet_data, outpath)
log.info(f"fleet id: {fleet_id} time step stats written to {outpath}")
7 changes: 2 additions & 5 deletions nrel/hive/reporting/handler/vehicle_charge_events_handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from typing import List, Dict

import pandas as pd

from nrel.hive.reporting.handler.handler import Handler
from nrel.hive.reporting.report_type import ReportType
from nrel.hive.reporting.reporter import Report
Expand Down Expand Up @@ -38,13 +36,12 @@ def handle(self, reports: List[Report], runner_payload: RunnerPayload):
f"unable to parse charge event from report {report}, missing entry for {e}"
)

def get_events(self):
def get_events(self) -> Dict[str, list]:
"""
grabs the events as a pandas dataframe
:return: a pandas dataframe containing charge events
"""
df = pd.DataFrame(data=self.events)
return df
return self.events

def clear(self):
"""
Expand Down
9 changes: 4 additions & 5 deletions nrel/hive/reporting/reporter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Dict, NamedTuple, Optional, Tuple
from typing import TYPE_CHECKING, Dict, NamedTuple, Optional, Tuple, Any

from immutables import Map
from pandas import DataFrame

from nrel.hive.reporting.handler.stats_handler import StatsHandler
from nrel.hive.reporting.handler.time_step_stats_handler import TimeStepStatsHandler
Expand Down Expand Up @@ -73,10 +72,10 @@ def get_summary_stats(self, rp: RunnerPayload) -> Optional[Dict]:

def get_time_step_stats(
self,
) -> Tuple[Optional[DataFrame], Optional[Map[MembershipId, DataFrame]]]:
) -> Tuple[Optional[Any], Optional[Any]]:
"""
if a TimeStepStatsHandler exists, return the time step stats DataFrame and the fleet time step stats DataFrames
:return: the time step stats DataFrame and the fleet time step stats collection of DataFrames if they exist
if a TimeStepStatsHandler exists, return the time step stats and the fleet time step stats
:return: the time step stats and the fleet time step stats collection if they exist
"""
time_step_stats, fleet_time_step_stats = None, None
for handler in self.handlers:
Expand Down
16 changes: 16 additions & 0 deletions nrel/hive/util/io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import csv
import pathlib
from typing import Sequence


def to_csv(data: Sequence[Sequence[str]], path: pathlib.Path):
with open(path, "w") as f:
writer = csv.writer(f)
writer.writerows(data)


def to_csv_dicts(data: Sequence[dict], path: pathlib.Path):
with open(path, "w") as f:
writer = csv.DictWriter(f, fieldnames=data[0].keys())
writer.writeheader()
writer.writerows(data)