<a href="https://colab.research.google.com/github/DPaletti/mida_acv/blob/main/mida_acv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sustainable Mobility: classification of electric scooters rides

## Get Data

In [1]:
!git clone https://github.com/DPaletti/mida_acv
!mv mida_acv/data .
!yes|rm -r mida_acv

Cloning into 'mida_acv'...
remote: Enumerating objects: 246, done.[K
remote: Counting objects: 100% (51/51), done.[K
remote: Compressing objects: 100% (46/46), done.[K
remote: Total 246 (delta 4), reused 45 (delta 2), pack-reused 195[K
Receiving objects: 100% (246/246), 250.21 MiB | 21.90 MiB/s, done.
Resolving deltas: 100% (21/21), done.
Checking out files: 100% (162/162), done.


## Install dependencies

In [1]:
!pip install tsfresh
!pip install rdp

Collecting rdp
  Downloading https://files.pythonhosted.org/packages/67/42/80a54cc4387256335c32b48bd42db80967ab5f40d6ffcd8167b3dd988c11/rdp-0.8.tar.gz
Building wheels for collected packages: rdp
  Building wheel for rdp (setup.py) ... [?25l[?25hdone
  Created wheel for rdp: filename=rdp-0.8-cp37-none-any.whl size=4569 sha256=af1711568cd70c009f368fd9845fed2bd8ac12a85f85fbff8afd90d2e1bcd1e4
  Stored in directory: /root/.cache/pip/wheels/76/e4/02/c738593caece49c63180d093651bec3cd3b02ea3248f076f07
Successfully built rdp
Installing collected packages: rdp
Successfully installed rdp-0.8


In [5]:
# After installing tsfresh runtime needs to be restarted
exit()

## Imports

In [2]:
from typing import Tuple, List, Dict, Optional
import pandas as pd
import tsfresh as ts
import sklearn as sk
import rdp

  import pandas.util.testing as tm


## Read data

In [9]:
def read_data(data_path: str) -> Tuple[pd.DataFrame, pd.Series, pd.Series]:
    df = pd.DataFrame()
    weight_series = pd.Series()
    drivers_series = pd.Series()
    temp_df: pd.DataFrame
    i: int = 0
    for placement in {"deck", "stem"}:
        for driver_number in {"single", "double"}:
            for ds in Path(data_path).joinpath(placement, driver_number).iterdir():
                temp_df = pd.read_csv(str(ds))
                weight = temp_df["Weight"][0]
                temp_df = temp_df.assign(id=i)
                temp_df = temp_df.assign(IsDoublePassenger = 0  if driver_number == "single" else 1)
                temp_df = temp_df.assign(IsDeck = 0 if placement == "deck" else placement=="stem")
                temp_df = temp_df.drop(
                    ["Unnamed: 0", "Driver", "Placement"], axis=1
                )
                df = df.append(temp_df)
                i += 1
    return df.fillna(0)


## Path simplification

In [4]:
def get_path(X: pd.DataFrame) -> np.array:
    out = []
    for index, row in X.iterrows():
        out.append((row["Latitude"], row["Longitude"]))
    return np.array(out)

def simplify_path(X: pd.DataFrame, epsilon: float = 1e-6):
    out_df: pd.DataFrame = pd.DataFrame()
    for df in [x for _, x in X.groupby(["id"])]:
      df = df[rdp.rdp(get_path(df), epsilon=epsilon, return_mask=True)]
      out_df = out_df.append(df)
    return out_df

## Windowing

In [6]:
def window(X: pd.DataFrame,
           min_timeshift: int=0,
           max_timeshift: Optional[int] = None) -> pd.DataFrame:
  return ts.utilities.dataframe_functions.roll_time_series(
        df,
        column_id="id",
        column_sort="Timestamp",
        column_kind=None,
        min_timeshift=min_timeshift,
        max_timeshift=max_timeshift
    )


## Feature Extraction

In [8]:
def extract_features(X: pd.DataFrame):
  features = ts.extract_features(
          X.drop(["Weight, IsDoublePassenger"]),
          column_id="id",
          column_sort="Timestamp",
      )
  features["Weight"] = X["Weight"].values
  features["IsDoublePassenger"] = X["IsDoublePassenger"].values
  return features

## Feature Selection

In [None]:
def select_features(X: pd.DataFrame):
  weight_series = pd.Series()
  passenger_series = pd.Series()
  for df in [x for _, x in X.groupby(["id"])]:
    weight_series.loc[i] = x["Weight"][0]
    passenger_series.loc[i] = x["IsDoublePassenger"][0]
  features = X.drop("Weight", "IsDoublePassenger")
  weight_features = ts.select_features(features, weight_series))
  passenger_features = ts.select_features(features, passenger_series)
  weight_features = weight_features.add_prefix("weight_")
  passenger_features = passenger_features.add_prefix("passengers_")
  return weight_features.join(passenger_features, on="id")

## Analysis Pipeline

In [None]:
pipeline = sk.pipeline.Pipeline([("simplify_paths", sk.preprocessing.FunctionTransformer(simplify_path, epsilon=1e-6)), ("window", sk.preprocessing.FunctionTransformer(window, min_timeshift=0, max_timeshift=10)), ("extract_features", sk.preprocessing.FunctionTransformer(extract_features)), ("select_features", sk.preprocessing.FunctionTransformer(select_features))])