In [1]:
#Author: Elyana Crowder Melnick
#Last updated: 2/14/2025
import numpy as np
import pandas as pd
from string import Template
import csv
import re 
import matplotlib.pyplot as plt
pd.set_option('display.max_colwidth', None)

# Description: 
This code takes a single column version of the `TCL/Spin_Boson/Reduced_Nonzero_Terms.xlsx` spreadsheet, and converts it into a form more reasonable for typsetting. That is:
The terms were initally grouped into single rows by their initial correlation terms, this code 
1) "splits" them into multiple lines

2) Also considers direct additive cancellations between terms of different "groups" 

3) Translates from the shorthand notation used in the spreadsheet to LaTeX symbols

The input for this a CSV, and the output is a `.txt` which contains an aligned equation that can be copy/pasted into a `.tex` file. 


### Functions

In [2]:
def ShortHand_toTex(s):
    #Types of terms: 
    # G -> \Gamma
    # GTat -> \Gamma^T(a-t)
    # Gat-Gtb -> \Delta \Gamma(a-t,t-b) 
    # * -> \circ
    
    #1) find the gammas 
    #2) is it a delta gamma? (look to the right)
    #3) is transposed?
    #new sub, replace (convert times too)
    #print("converting: ",s)
    while "G" in s:
        i_start = s.find("G")
        T_offset = 0
        T_sub = ""
        sign_offset = 0
        if s[i_start+1] == "T":
            #transposed case
            T_offset = 1 #meaning, the transpose ofsets the times by this much: e.g. Gab versus GTab
            T_sub = "^T"
        t_s = decode_times(s[i_start+T_offset+1:i_start+T_offset+3]) 
        is_delta = False
        sub_2 = ""
        D_offset = 0
        if s[i_start+T_offset+3] == "-":
            is_delta = True
            t_s2 = decode_times(s[i_start+(2*T_offset)+5:i_start+(2*T_offset)+7]) #need to check indexing
            D_offset = 4+T_offset  #does include additional transpose if need
            # e.g. from len being Gab (3) [or GTab (3+T_offset)] to Gab-Gab (7) or GTab-GTba (9)
        if is_delta: 
            new_sub = "\Delta\gamma"+T_sub+"("+t_s+","+t_s2+")"
        else: 
            #is there a leading sign?
            new_sub = "\gamma"+T_sub+"("+t_s+")"
            if s[i_start - 1] == "-":
                new_sub = "(-" + new_sub+")"
                sign_offset = 1
            
        #replacing it: 
        s = s[0:i_start-(1+sign_offset)]+new_sub+s[i_start+4+T_offset+D_offset:] #part before + newsub + part after
        
    #Now, rewriting the system operators: 
    #finding, a,b where i_start -1 is not _
    #NOW, using regex:
    #(?<!_)[ab] negative lookbehind assertion
   
    aops = re.compile("(?<![_gmth{])a")#pattern
    bops = re.compile("(?<![_gmth{])b")#pattern
    s = re.sub(aops, r"\\hat{a}", s)
    s = re.sub(bops, r"\\hat{b}", s)
    s = s.replace("0","\hat{0}")
    s = s.replace("p", r"\hat{\varrho}")
    s = s.replace("*",r"\circ")
    #Last step: switch gammas to Gammas
    s = s.replace("\gamma", "\Gamma")
    return s


def decode_times(s):
    #given string of two characters, translate into latex
    #a -> t_a
    #t -> t
    #b -> t_b
    # 0  = 0 
    #anything with zero second, the zero shoudl be omitted e.g a0 -> t_a NOT t_a - 0
    res = ""
    subscripts = ["a","b"]
    if s[0] in subscripts:
        t_1 = "t_"+s[0]
    else:
        t_1 = s[0]
        
    if s[1] in subscripts:
        t_2 = "t_"+s[1]
    else:
        t_2 = s[1]
        
    if s[0] == "0":
        res = "-"+t_2
    elif s[1] == "0":
        res = t_1
    else:
        res = t_1+"-"+t_2        
    return res

def bounds_to_tex(s,TCL4=False):
    # eg. +(0ta)(0ab)
    if TCL4:
        res =  r"\int_{{t_{0[2]}}}^{{t_{0[3]}}}dt_{0[4]}".format(s)
    else:
        res =  r"\int_{{t_{0[2]}}}^{{t_{0[3]}}}dt_{0[4]}\int_{{t_{0[7]}}}^{{t_{0[8]}}}dt_{0[9]}".format(s)
    res = res.replace("t_0","0")
    res = res.replace("t_t","t")
    return s[0]+res

def merger_integral(b,s):
    #note, original entries has -[0,] omitted
    #outdated, do not use
    signed = 1
    if b[0] == "-" and s[0] == "-":
        sign = "&-"
    elif b[0] == "-" or s[0] == "-":
        sign = "&+"
    else: 
        sign = "&-"
    if s[0] not in ["+","-"]: 
        signed = 0 #integrand has no leading sign
    return sign+b[1:]+"[\hat{0},"+s[signed:]+r"]\\"

#want to instead, factor out the overall integral: +(0ta)(0ab), thus make sure each integrand has it's own sign

def factor_signs(b,i,update=True):
    # single row, NON DISTRIBUTIVE, assuming rows have already been split! 
    if b[0] == "-":
        #need to move negative over 
        if i[0] == "-":
            i = "+"+i[1:]
        elif i[0] == "+":
                i = "-" + i[1:]
        else: 
            i = "-" + i
    if i[0] != "+" and i[0] !="-":
        #i implicitly positive
        i = "+"+ i
    b = "+"+b[1:]
    if update:
        return i
    else:
        return b
#how to determine integrands that can be split (withough expanding commutators)
# print(s)
# print("number of terms with rho:", s.count("p"))
#define:
#for an indepenent term, all of the left square brackets are closed: 
#basically, for each "+,-" check if brackets before are all closed and if there is a p factor (there should be )

def valid_Parens(s):
    #like the LeetCode problem...
    #but first remove non brackets
    non_bracket = re.compile("[^][{}()]")
    s = re.sub(non_bracket,"",s)
    if len(s)== 0:
        return False

    o = ["[","(","{"]
    c = ["]",")","}"]
    stack = []
    #need to verify equal numbers:
    counts = {"open" :[s.count(k) for k in o], "close":[s.count(k) for k in c]}
    if counts["open"] != counts["close"]:
#         print("unequal brackets")
        return False
    else:
        if s[0] in c: #starts with closing
            return False
        else:
            for b in s:
                if b in o:
                    stack.append(b)
                else: #closing
                    if len(stack) == 0:
                        return False
                    else:
                        last_elt = stack.pop()
                        if  last_elt != o[c.index(b)]:
                            return False
    return True
 
def row_splitter(row,flag=False): #debugged
    
    if type(row) == list: 
        return row
    s_split = []
    matches = re.finditer(re.compile("(-)|(\+)"),row)
    temp_ind = 0
    for m in matches:
#         print("-------------------------------")
#         print(m.start(0),row[m.start(0)], row[m.start(0)-1:m.start(1)+1])
        sub = row[temp_ind:m.start(0)] 
#         print("sub = ",sub)
        #m.start(0) index of addition/subtraction
        if valid_Parens(sub) and "p" in sub:
            if flag:
                print("found")
            temp_ind = m.start(0)
            s_split.append(sub)
    s_split.append(row[temp_ind:])
    return s_split

def check_cancel(I):
    if len(I) != 2:
        return False
    else:
        res = 1
        for elt in I:
            res = res*int(elt[0]+"1") 
        return res < 0

### Automated Row splitting (v2): 

In [3]:
excel = pd.read_csv("./Reduced_Nonzero_Terms_CSV.csv",header=0,names=["C","bounds","integrand"])
#print(excel)
#print(excel.shape[0]) #current number of "lines" = 80

line_lens = excel.integrand.apply(lambda x: len(x))#number of characters in each line, just to check. 
print("number of lines:",excel.shape[0]," | maximum line length: ", max(line_lens)) #need maximum to be less than approximately 60  
#splitting the unprocessed rows
"""By identifying [[,()]] complete closed parenthesis terms.
   E.g. Each row is like [A] = [B + C] => [B,C]
   This does not foil and split terms like [A(B + C)] => [AB,AC] """
excel.integrand  = excel.integrand.apply(row_splitter) 
excel = excel.explode("integrand",ignore_index=True) #making new rows with split terms

line_lens = excel.integrand.apply(lambda x: len(x))#number of characters in each line, just to check. 
print("number of lines after automated splitting:",excel.shape[0], " | maximum line length: ", max(line_lens)) #need maximum to be less than approximately 60  

#moving negative bound signs to integrand (must do AFTER splitting, since apply outer parens in some cases)
excel["integrand"] = excel.apply(lambda x: factor_signs(x.bounds,x.integrand,True),axis=1) #update integrand signs
excel["bounds"] = excel.apply(lambda x: factor_signs(x.bounds,x.integrand,False),axis=1) #update integrand signs
#print(excel.to_string())
#converting integrands to latex 
 
excel.to_excel("./Automated_splits_TCL6.xlsx") # Saving to excel intermediately


number of lines: 80  | maximum line length:  287
number of lines after automated splitting: 201  | maximum line length:  82


## Extra Hardcoded factorization splits for TCL6

In [4]:

#There are a few terms that are written as A(B + C) that need to be seperated still, at indices 
#4,7,115,118,186,189
#This are identifable since they have more than one "p" (\rho), and are very noticible in the spreadsheet
#Will hardcode in the splits here (based on string searches, but not generalized)
#ind = 4, k = 20, m = 50. For A(B+C), k is the index of "("+1 and m is the index of "+"+1
#ind = 7, k = 20, m = 50
#ind 115, k = 20, m = 51
#ind 118, k = 20, m = 49
#ind 186, k = 32, m = 54
#ind 189, k = 34, m = 54
manual_splits = [[4,20,50],
                 [7,20,50],
                 [115,20,51],
                 [118,20,49],
                 [186,32,54],
                 [189,34,54]] #hardcoded, instead of opening up the spreadsheet [ind,k,m]
for [ind,k,m] in manual_splits:
    row_i = excel.loc[ind,'integrand']
    #check signs
    s_C = row_i[0]
    if row_i[m] == "-":
        if row_i[0] == "+":
            s_C = "-"
        else: 
            s_C = "+"
    rows_i = [row_i[0:k]+row_i[k+1:m],s_C+row_i[1:k]+row_i[m+1:-1]]
    #print(rows_i)
    excel.integrand[ind] = rows_i
excel = excel.explode("integrand",ignore_index=True) #making new rows with split terms, this DOES shift, indices
excel.to_excel("./Automated_splits_andHardcode_TCL6.xlsx")
line_lens = excel.integrand.apply(lambda x: len(x))#number of characters in each line, just to check. 
#print("new maximum line length", max(line_lens)) #need maximum to be less than approximately 60  
print("number of lines after extra splits: ", excel.shape[0]," | new maximum line length: ", max(line_lens))
#plt.plot(line_lens)

number of lines after extra splits:  207  | new maximum line length:  55


## Row cancellations
Removing rows +A such that -A is also a row.  

In [5]:


excel["unsigned"] = excel.integrand.apply(lambda x: x[1:]) #add new column, with sign removed
cancelled = excel.groupby("unsigned").filter(lambda x: check_cancel(x["integrand"])) #will store in seperate df
excel = excel.groupby("unsigned").filter(lambda x: not check_cancel(x["integrand"]))

  
cancelled.to_excel("./ListofCanceledRows_aftersplits_TCL6.xlsx") # Saving to excel intermediately




### Full processing after row cancellations (auto and manual) (v3): 

In [6]:

#converting integrands to latex 
excel.integrand = excel.integrand.apply(ShortHand_toTex)
excel.bounds= excel.bounds.apply(bounds_to_tex) #(also integration bounds)

  
excel.to_excel("./Cancellations_splits_TCL6.xlsx") # Saving to excel intermediately    

#finally, want to group by original correlation function, 
#make caption the correlation function, and header, make it the actual element
#final_df.to_csv("ReducedEqns_tex.csv",sep="\n",header=False, index=False,quoting=csv.QUOTE_NONE)
#options: sep="\n",header=G.bounds[0]+, columns = "Integrand", index=False,quoting=csv.QUOTE_NONE, mode = 'a'
filename = "Grouped_Split_ReducedEqns_v4.txt"
head = r"\begin{align}\text{TCL6} &= -\Big{[}\hat{0},"+excel.bounds[0][1:]+r"\\"
f = open(filename, "w+")
f.write(head)
f.write('\n')
f.close()

#add closing bracket and newline element to integrands
excel.integrand = excel.integrand.apply(lambda x: "&"+x+r"\\")

for CorrFuns,sub_table in excel.groupby("C"):
    f = open(filename, "a")
    #f.write(r"\begin{align}")
    f.write(r"%"+CorrFuns)
    f.write('\n')
    f.close()
    #sub_table.to_csv(filename,sep="\n",header=[CorrFuns+head], columns = ["integrand"],index=False,quoting=csv.QUOTE_NONE, mode = 'a')
    sub_table.to_csv(filename,sep="\n",header=None, columns = ["integrand"],index=False,quoting=csv.QUOTE_NONE, mode = 'a')
f = open(filename, "a")
f.write(r"\Big{]} + h.c. \end{align} ")
f.write('\n')
f.close()