In [None]:
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.dates import AutoDateFormatter, AutoDateLocator

from os.path import exists

import gdown
import zipfile

### Data Loading

In [None]:
# # Download the data if it doesn't exists locally already
# def download_device_data(url:str, filename:str):
#     file_exists = exists('../data/' + filename) 
#     if file_exists==False:
#         output = '../data/' + filename + '.zip'
#         gdown.download(url, output, quiet=False, fuzzy=True)

#         # Unzip the data 
#         with zipfile.ZipFile(output, 'r') as zip_ref:
#             zip_ref.extractall('../data/')
#     return None

In [None]:
# urls = ['https://drive.google.com/file/d/1QC6afqmWSHNpgsoe7j7g4E3YaYoHqeIt/view?usp=sharing', 
#         'https://drive.google.com/file/d/1Y_m2Awl9161Rs-7xiXbQKV7NOsDI6DI5/view?usp=sharing', 
#         'https://drive.google.com/file/d/1-mayhsGwpSnSHQYDRUDT2bY9EFSrrpOZ/view?usp=sharing',
#         'https://drive.google.com/file/d/10USDOONYDPo8BAY46qhQOAPdpwnC1S4D/view?usp=sharing'
#        ]
# filenames = ['devices1.csv',
#             'devices2.csv',
#             'devices3.csv',
#             'devices4.csv',
#            ]
# for url, filename in zip(urls, filenames):
#     download_device_data(url, filename)

In [None]:
# Plot a specified column of the dataframe for a specific battery_id
def plot_battery_param(battery_id, column, df):
    x = df.loc[df["battery_id"] == battery_id].index
    y = df[column].loc[df["battery_id"] == battery_id]
    plt.plot(x, y)
    plt.xlabel('datapoint')
    plt.ylabel(column)
    plt.title('battery_id = %s'%battery_id)

In [None]:
# For any specific battery (battery_id) calculate a statistic (such as mean, max, min) of a parameter (such as 'temperature') at 10 SoC bins.
from scipy.stats import binned_statistic
def binned_by_SoC (battery_id, param, df, statistic = 'mean'):
    x = df["state_of_charge_percent"].loc[df["battery_id"]==battery_id] # Data to be binned
    values = df["temperature"].loc[df["battery_id"]==battery_id] # Data on which the statistic is computed
    binned_values, bin_edges, _ = binned_statistic(x, values, statistic = statistic, bins=[0,10,20,30,40,50,60,70,80,90,100])
    return binned_values, bin_edges

#### Data Frame Columns

 `current_out`: current withdrawn from the box by the consumer <br>
 `current_in`: current supplied to the box from the solar panel <br>
 `current`: `current_out` - `current-in` --> net current that goes in or out of the battery <br>
 `temperature`: temperature in deg C <br>
 `timestamp`: YYYY-MM-DD HH:MM:SS <br>
 `panel_voltage`: voltage of the battery in V <br>
 `state_of_charge_percent`: state of charge of the battery in percent <br>
 `battery_id`: unique identifier of the battery <br>

In [None]:
# Load times series csv file
# Each file contains data from approximately 50 devices
ts1_df = pd.read_csv('..\data/devices1.csv')
ts2_df = pd.read_csv('..\data/devices2.csv')
ts3_df = pd.read_csv('..\data/devices3.csv')
ts4_df = pd.read_csv('..\data/devices4.csv')
ts_df = pd.concat([ts1_df, ts2_df, ts3_df, ts4_df], axis = 0, ignore_index = True) # Complete dataframe

In [None]:
#View slices of the complete dataframe ts_df
ts_df.iloc[:10]

In [None]:
ts_df.loc[ts_df["battery_id"]==0].loc[:1000,:]

In [None]:
# Examination - print some parameters.
print('ts_df.shape: ', ts_df.shape)
print('max_voltage: ', ts_df["battery_voltage"].max())
print('min_voltage: ', ts_df["battery_voltage"].min())
# Note that the convention here is that charging currents are negative.
print('max charging current: ', ts_df["current"].loc[ts_df["current"] < 0].min())
print('min charging current: ', ts_df["current"].loc[ts_df["current"] < 0].max())
print('max discharging current: ', ts_df["current"].loc[ts_df["current"] >= 0].max())
print('min discharging current: ', ts_df["current"].loc[ts_df["current"] >= 0].min())

Downsample the ts_df dataframe by removing rows that contain NaN values.

In [None]:
ts_df_lean = ts_df.dropna()

In [None]:
print('Shape of initial dataframe ts_df: ', ts_df.shape)
print('Shape of initial dataframe ts_df_lean: ', ts_df_lean.shape)

Create two new time-related columns and add them to the dataframe.<br>
Column 'time_battery_seconds' contains values which start at 0 for every battery, subsequently counting the operating time of the battery in seconds. <br>
Column 'dt' gives the sampling time step. Will be useful later to calculate time-weighted averages. At the first row of every battery, dt is zero.

In [None]:
from datetime import datetime
time_battery_sec = np.empty((0,1))
dt_sec = np.empty((0,1))
for battery in range(0,200):
    single_battery_df = ts_df_lean.loc[ts_df_lean['battery_id']==battery]
    # Get the starting timestamp of this battery. This will be time zero.
    # Also place in this more convenient datetime format which will allow subtraction of datetimes.
    datetime_0 = datetime.strptime(single_battery_df["timestamp"].iloc[0], "%Y-%m-%d %H:%M:%S")
    # Initialise numpy array to store battery_time. Loops using pandas would be too slow.
    battery_time = single_battery_df["timestamp"].to_numpy()
    for i, entry in enumerate(battery_time):
        datetime_1 = datetime.strptime(entry, "%Y-%m-%d %H:%M:%S")
        battery_time[i] = float((datetime_1-datetime_0).total_seconds())
    # Convert to float:
    battery_time = battery_time.astype(float)
    battery_dt = np.zeros((battery_time.shape[0],)) #Useful mostly because it puts a zero at the 0th index.
    battery_dt[1:] = battery_time[1:] - battery_time[0:-1]
    time_battery_sec = np.append(time_battery_sec, battery_time)
    dt_sec = np.append(dt_sec, battery_dt)

In [None]:
#Sanity check of dimensions:
print('ts_df_lean.shape: ', ts_df_lean.shape)
print('time_battery_seconds.shape: ', time_battery_sec.shape)
print('dt.shape: ', dt_sec.shape)

In [None]:
# Add new pandas Series to lean (i.e. downsampled) dataframe
ts_df_lean.loc[:, "time_battery_sec"] = time_battery_sec
ts_df_lean.loc[:, "dt_sec"] = dt_sec

In [None]:
# Extract temperature features at SoC bins. Specifically extract max, min, and mean values.
# Initialise numpy arrays to store the features
temp_features_min = np.empty((200,10))
temp_features_min[:] = np.NaN
temp_features_max = np.empty((200,10))
temp_features_max[:] = np.NaN
temp_features_mean = np.empty((200,10))
temp_features_mean[:] = np.NaN

for battery in range (0,200):
    temp_features_min[battery, :], _ = binned_by_SoC (battery, "temperature", df = ts_df, statistic = 'min')
    temp_features_max[battery, :], _ = binned_by_SoC (battery, "temperature", df = ts_df, statistic = 'max')
    temp_features_mean[battery, :], _ = binned_by_SoC (battery, "temperature", df = ts_df, statistic = 'mean')
# cols = ['battery_id','temp_mean_0to10SoC', 'temp_mean_10to20SoC', 'temp_mean_20to30SoC', 'temp_mean_30to40SoC', 'temp_mean_40to50SoC', 'temp_mean_50to60SoC', 'temp_mean_60to70SoC',
#     'temp_mean_70to80SoC', 'temp_mean_80to90SoC', 'temp_mean_90to100SoC']
# df_features = pd.Dataframe(columns = cols)
# xticks = ['0','10','20','30','40','50','60','70','80','90']
# plt.bar(temp_bin_edges[:-1], temp_bin_means, align = 'edge', width = 9.5)#, tick_label=xticks)

In [None]:
# Add the temperature features to pandas dataframes for easier visualisation
cols = ['0to10SoC', '10to20SoC', '20to30SoC', '30to40SoC', '40to50SoC', '50to60SoC', '60to70SoC',
        '70to80SoC', '80to90SoC', '90to100SoC']
df_temp_features_min = pd.DataFrame(temp_features_min, columns = cols)
df_temp_features_max = pd.DataFrame(temp_features_max, columns = cols)
df_temp_features_mean = pd.DataFrame(temp_features_mean, columns = cols)

In [None]:
df_temp_features_mean.iloc[1]

In [None]:
#Plots SoC-binned bar charge of a given dataframe. Specify param (e.g. Mean Temp) for title
def SoC_binned_bar_chart(battery_id, df, param=''):
    x = df.columns
    height = df.iloc[battery_id]
    f, ax = plt.subplots()
    ax.bar(x, height, align = 'center')
    ax.set_title('battery_id = %s. Plot of %s'%(battery_id, param))
    plt.setp(ax.get_xticklabels(), rotation=30)

In [None]:
SoC_binned_bar_chart(0, df_temp_features_mean, 'Mean Temp')

In [None]:
temp_bin_edges = [0,10,20,30,40,50,60,70,80,90]
xticks = ['0','10','20','30','40','50','60','70','80','90']
plt.bar(temp_bin_edges, temp_bin_means, align = 'edge', width = 9.5)#, tick_label=xticks)

In [None]:
plot_battery_param(0, 'state_of_charge_percent', ts_df)

In [None]:
plot_battery_param(0, 'temperature', ts_df)

In [None]:
ts_df.loc[ts_df["battery_id"]==0].loc[:1000,:]

### Plotting

In [None]:
# Basic plot of the data

xtick_locator = AutoDateLocator()
xtick_formatter = AutoDateFormatter(xtick_locator)

start_pidx = 0 
end_pidx = 6000 

dates = matplotlib.dates.date2num(ts_df.loc[ts_df["battery_id"]==0].loc[start_pidx:end_pidx, "timestamp"])
current = ts_df.loc[ts_df["battery_id"]==0].loc[start_pidx:end_pidx, "current"]
voltage = ts_df.loc[ts_df["battery_id"]==0].loc[start_pidx:end_pidx, "battery_voltage"]

fig, host = plt.subplots()
par1 = host.twinx()

p1, = host.plot(dates, current, 'b', label='Current')
p2, = par1.plot(dates, voltage, 'r', label='Voltage')

host.set_ylabel('Current (A)')
par1.set_ylabel('Voltage (V)')
host.set_xlabel('Date')

host.xaxis.set_major_locator(xtick_locator)
host.xaxis.set_major_formatter(xtick_formatter)
fig.autofmt_xdate()

lines = [p1, p2]
host.legend(lines, [l.get_label() for l in lines], loc=1, bbox_to_anchor=(0.995,0.22))
fig.show()