In [10]:
from contextlib import redirect_stdout # For printing our files
import os # OS File system utilization in printing files
import pandas as pd #Manipulating our data frames
import numpy as np #Utilizing numpy commands

In [12]:
#Code made by Paul Williamson/KB5MU with very small edits by me - much thanks!
uba_folder = '' #format for your github repo where needed
uba_extension = ".uba"
output_extension = "_summmary.txt"

for filename in os.listdir(uba_folder):
    if filename.endswith(uba_extension):
        with open(os.path.join(uba_folder, filename), 'r', encoding='cp1252') as f:
            with open(filename+output_extension, 'w') as outf:
                print(filename)
                at_messages = False
                for line in f:
                    if line.strip() == "<Messages>":
                        at_messages = True
                    if at_messages:
                        print(line.strip(), file=outf)

FileNotFoundError: [WinError 3] The system cannot find the path specified: ''

In [14]:
# As a conscious choice, we have chosen not to attempt to automate this process because of the potential difficulty costing more time than sorting each .uba summary.txt manually. 
# However, if wanted, you can attempt to automate this process. In this experiment however, we have sorted each file into their corresponding file types manually. 
# In doing so, here are our results.
# We found that each file contains two identifiable markers that signify the start of the discharge and the start of the long 3 hour equalization charge. 
# We are generally able to tell which file has what amount of cycles because of this. 
# IN CASE We want to utilize later cycles of the batteries, we can only use sixgroup. However, all other files have at least one cycle to use, so we can utilize all of the files for the first cycle analysis.
# 1HDC = 1 Hour Dis-Charge
# 3HEC = 3 Hour Equalization Charge
#1HDC, 3HEC, 1HDC
threegroup = [19, 20, 83, 84]

#1HDC, 3HEC, 1HDC, 3HEC
fourgroup = [6, 4, 3, 2, 1]

#1HDC, 3HEC, 1HDC, 3HEC, 1HDC
fivegroup = [95, 96, 94, 93, 65, 54, 53, 31, 30, 29, 15]

#1HDC, 3HEC, 1HDC, 3HEC, 1HDC, 3HEC
sixgroup = [91, 89, 88, 87, 86, 85, 80, 79, 78, 77, 74, 73, 72, 71, 66, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 52, 51, 50, 49, 46, 45, 42, 41, 40, 39, 38, 37, 36, 35, 34, 32, 28, 26, 25, 21, 22, 21, 18, 17, 16,  14, 13]


In [16]:
trurange = list(range(1,69))+list(range(83,99)) # After iterating through which files exist, we have created a range of all possible useable files.
file = {} # Create a dictionary to store our formatted and read files
dischargestart = 'Finish=MaxChargeC' # Start of the discharge cycle, graphed as a plateau in the data
chargestart = 'Finish=CutoffV' # End of the discharge cycle, start of the charge cycle, graphed as a trough in the data
split = 'Finish=MaxDischargeTime' # Split halfway through the charge cycle as current increases, appears as the point where the charge cycle changes.
end = 'Finish=UserRequest:Stop' # End of the data file

In [18]:
for x in trurange:
    try:
        file[x] =(
             pd.read_csv(
                f'NiCd - Key Lime #{x}(data).csv', 
                skip_blank_lines=False, #We want all index numbers to stay the same
                keep_default_na=True, #We want to keep all of our N/A values for the same reason
                names=['Time', 'Volt', 'M/A', 'C', 'Sig'],
            )
        )
    except:
        print(f'fail in {x}')

In [20]:
locsigdc = {} #first true discharge positional location
locsigch = {} #first true charge positional location
locsigsp = {} #first true split positional location
locsigend = {} #end of the the first charge cycle
for x in file.keys(): 
    if len(file[x].loc[file[x]['Sig'] == dischargestart].index.tolist()) >=3:
        if len(file[x].loc[file[x]['Sig'] == chargestart].index.tolist()) >=3:
            if len(file[x].loc[file[x]['Sig'] == split].index.tolist()) >=3:
                if file[x].loc[file[x]['Sig'] == dischargestart].index.tolist()[0]\
                < file[x].loc[file[x]['Sig'] == chargestart].index.tolist()[1]\
                < file[x].loc[file[x]['Sig'] == split].index.tolist()[2]:
                        locsigdc[x] = (file[x].loc[file[x]['Sig'] == dischargestart].index.tolist()[0]) 
                        locsigch[x] = (file[x].loc[file[x]['Sig'] == chargestart].index.tolist()[1])
                        locsigsp[x] = (file[x].loc[file[x]['Sig'] == split].index.tolist()[2])
                        locsigend[x] = (file[x].loc[file[x]['Sig'] == split].index.tolist()[2] + 500)
                


In [22]:
dischargecycle = {} #first discharge
splitchargecycle = {} #first half of the charge cycle
postsplit = {} # last half of the charge cycle
for x in locsigdc.keys(): #We want to see which ones fail this cell, so we will run it through the same keys again.
        try:
            if locsigsp[x] < 3000:
                dischargecycle[x] = file[x]['Volt'].truncate(before=locsigdc[x],
                                                             after=locsigch[x])
                splitchargecycle[x] = file[x]['Volt'].truncate(before=locsigch[x], 
                                                               after=locsigsp[x])
                postsplit[x] = file[x]['Volt'].truncate(before=locsigsp[x],
                                                        after=locsigend[x])
        except:
            print(f'fail in {x}')

In [24]:
with open('DirectComparison.csv', 'w') as f:
    with redirect_stdout(f):
        for x in file.keys():
            if x in dischargecycle.keys():
                for y in dischargecycle.keys(): 
                    print(abs(((dischargecycle[x].reset_index(drop=True)).sub(dischargecycle[y].reset_index(drop=True))).sum()), f', file {x} - {y}')


In [30]:
gofish = pd.read_csv('DirectComparison.csv', 
                     names=['Diff', 'CoreFiles'])
tetris = gofish.drop_duplicates(subset=['Diff'])
jenga = gofish.drop_duplicates(subset=['Diff']).sort_values(by=['Diff'], 
                                                          axis=0, 
                                                          ascending=True, 
                                                          inplace=False)

In [32]:
tetris.to_csv('UnsortedFile.csv', index=False)
jenga.to_csv('SortedFile.csv', index=False)