From e9b0260b0abcca10672a7e3ace381c1a642b786c Mon Sep 17 00:00:00 2001 From: donerancl Date: Tue, 30 Jan 2024 14:55:44 -0500 Subject: [PATCH] squashed --- ipython/fragment_reattachment_example.ipynb | 1021 ++++++++++--------- rmgpy/molecule/fragment_utils.py | 312 ++++++ 2 files changed, 875 insertions(+), 458 deletions(-) create mode 100644 rmgpy/molecule/fragment_utils.py diff --git a/ipython/fragment_reattachment_example.ipynb b/ipython/fragment_reattachment_example.ipynb index a3de845210..9c45b4cc7a 100644 --- a/ipython/fragment_reattachment_example.ipynb +++ b/ipython/fragment_reattachment_example.ipynb @@ -32,17 +32,17 @@ } ], "source": [ - "from rdkit.Chem.rdmolops import *\n", + "import sys\n", + "rmgpy_loc = \"/home/gridsan/adoner/RMG-Py/\"\n", + "sys.path.append(rmgpy_loc)\n", "from rmgpy.molecule.fragment import Fragment\n", "from rmgpy.tools.canteramodel import Cantera\n", - "from rmgpy.chemkin import *\n", + "from rmgpy.chemkin import load_chemkin_file\n", + "from rmgpy.molecule.fragment_utils import match_sequences, match_concentrations_with_same_sums, match_concentrations_with_different_sums, shuffle, flatten, merge_frag_to_frag, merge_frag_list\n", "import re\n", "import os\n", "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import sys\n", - "rmgpy_loc = \"/home/gridsan/adoner/RMG-Py/\"\n", - "sys.path.append(rmgpy_loc)" + "import matplotlib.pyplot as plt" ] }, { @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 7, "id": "9802cd48-4a7b-4244-acf8-cba0fc1aab69", "metadata": {}, "outputs": [], @@ -64,12 +64,12 @@ "class FragList():\n", " '''\n", " to instantiate a FragList:\n", - " fl = Fraglist(frag_list) \n", + " fl = Fraglist(frag_list)\n", " where frag_list is a list of tuples of fragments and their amounts\n", " '''\n", "\n", " def __init__(self, frag_list):\n", - " self.raw = frag_list\n", + " self.raw_fragment_output = frag_list\n", "\n", " def sort(self):\n", " '''\n", @@ -78,7 +78,7 @@ " general_L_list - 2L fragments\n", " rr_ll_list - 2R or 2L fragments\n", " r_l_moles - 1R and 1L fragments\n", - " multi_label_frag_3 - fragments with 3 cutting labels \n", + " multi_label_frag_3 - fragments with 3 cutting labels\n", " multi_label_frag_4 - fragments with 4 cutting labels\n", "\n", " note: in our experience fragments with more than 4 cutting labels has never happened, but a warning will be printed if it does happen\n", @@ -93,41 +93,44 @@ " multi_label_frag_3 = []\n", " multi_label_frag_4 = []\n", "\n", - " for i, item in enumerate(self.raw):\n", + " for i, item in enumerate(self.raw_fragment_output):\n", " frag, amt = item\n", " if amt > 1e-6 and '[' not in frag:\n", - " if len(re.findall(r'R', frag)) == 0 and len(re.findall(r'L', frag)) == 0:\n", + " count_of_L_labels = len(re.findall(r'L', frag))\n", + " count_of_R_labels = len(re.findall(r'R', frag))\n", + " count_of_cutting_labels = count_of_L_labels + count_of_R_labels\n", + " if count_of_R_labels == 0 and count_of_L_labels == 0:\n", " moles_remain.append((frag, amt))\n", - " elif len(re.findall(r'R', frag)) == 1 and len(re.findall(r'L', frag)) == 0:\n", + " elif count_of_R_labels == 1 and count_of_L_labels == 0:\n", " one_R_dict[frag] = amt\n", - " elif len(re.findall(r'R', frag)) == 2 and len(re.findall(r'L', frag)) == 0:\n", + " elif count_of_R_labels == 2 and count_of_L_labels == 0:\n", " general_R_list.append((frag, amt * 2))\n", " rr_ll_list.append(frag)\n", - " elif len(re.findall(r'R', frag)) == 0 and len(re.findall(r'L', frag)) == 1:\n", + " elif count_of_R_labels == 0 and count_of_L_labels == 1:\n", " one_L_dict[frag] = amt\n", - " elif len(re.findall(r'R', frag)) == 0 and len(re.findall(r'L', frag)) == 2:\n", + " elif count_of_R_labels == 0 and count_of_L_labels == 2:\n", " general_L_list.append((frag, amt * 2))\n", " rr_ll_list.append(frag)\n", - " elif len(re.findall(r'R', frag)) == 1 and len(re.findall(r'L', frag)) == 1:\n", + " elif count_of_R_labels == 1 and count_of_L_labels == 1:\n", " r_l_moles.append((frag, amt))\n", " else:\n", - " if len(re.findall(r'[LR]', frag)) == 3:\n", + " if count_of_cutting_labels == 3:\n", " multi_label_frag_3.append(\n", " (frag, amt)) # 2R1L, 1R2L, 3R, 3L\n", - " elif len(re.findall(r'[LR]', frag)) == 4:\n", + " elif count_of_cutting_labels == 4:\n", " multi_label_frag_4.append((frag, amt))\n", " else:\n", " print(\n", - " f\"Warning! {len(re.findall(r'[LR]',frag))} cutting labels in {frag}\")\n", - " self.R1dict = one_R_dict\n", - " self.L1dict = one_L_dict\n", - " self.Rlist = general_R_list\n", - " self.Llist = general_L_list\n", - " self.RRLLlist = rr_ll_list\n", - " self.RLlist = r_l_moles\n", - " self.CL3 = multi_label_frag_3\n", - " self.CL4 = multi_label_frag_4\n", - " self.molesremain = moles_remain\n", + " f\"Warning! {count_of_cutting_labels} cutting labels in {frag}\")\n", + " self.R1dict=one_R_dict\n", + " self.L1dict=one_L_dict\n", + " self.Rlist=general_R_list\n", + " self.Llist=general_L_list\n", + " self.RRLLlist=rr_ll_list\n", + " self.RLlist=r_l_moles\n", + " self.CL3=multi_label_frag_3\n", + " self.CL4=multi_label_frag_4\n", + " self.molesremain=moles_remain\n", "\n", " def random_pick_frag(target_dict):\n", " '''\n", @@ -138,11 +141,11 @@ " '''\n", " import random\n", " import re\n", - " frag_dict_list = [x for x in target_dict.items() if len(\n", + " frag_dict_list=[x for x in target_dict.items() if len(\n", " re.findall(r'[LR]', x[0])) == 1]\n", - " sum_dict = sum([x[1] for x in frag_dict_list])\n", - " frag_dict_prob = [x[1]/sum_dict for x in frag_dict_list]\n", - " item = np.random.choice(frag_dict_list, 1, p=frag_dict_prob)\n", + " sum_dict=sum([x[1] for x in frag_dict_list])\n", + " frag_dict_prob=[x[1] / sum_dict for x in frag_dict_list]\n", + " item=np.random.choice(frag_dict_list, 1, p = frag_dict_prob)\n", "\n", " return item\n", "\n", @@ -152,31 +155,31 @@ " target_dict - dictionary of species smiles and moles\n", " returns: the target_dict with 1 randomly chosen 1-cutting label fragment fully paired with other randomly chosen 1-cutting label fragments\n", " '''\n", - " additional_frag_list = []\n", - " frag1 = FragList.random_pick_frag(target_dict)\n", + " additional_frag_list=[]\n", + " frag1=FragList.random_pick_frag(target_dict)\n", "\n", " if target_dict[frag1] >= amount:\n", " target_dict[frag1] -= amount\n", " additional_frag_list.append((frag1, amount))\n", "\n", " else:\n", - " remain = amount - target_dict[frag1]\n", + " remain=amount - target_dict[frag1]\n", " additional_frag_list.append((frag1, amount))\n", - " target_dict[frag1] = 0\n", + " target_dict[frag1]=0\n", "\n", " while remain > 0:\n", - " frag1 = FragList.random_pick_frag(target_dict)\n", + " frag1=FragList.random_pick_frag(target_dict)\n", "\n", " if target_dict[frag1] >= remain:\n", " target_dict[frag1] -= remain\n", " additional_frag_list.append((frag1, remain))\n", - " remain = 0\n", + " remain=0\n", "\n", " else:\n", - " frag_amt = target_dict[frag1]\n", - " target_dict[frag1] = 0\n", + " frag_amt=target_dict[frag1]\n", + " target_dict[frag1]=0\n", " additional_frag_list.append((frag1, frag_amt))\n", - " remain = remain - frag_amt\n", + " remain=remain - frag_amt\n", " return additional_frag_list\n", "\n", " def pair_CL4s(self):\n", @@ -185,60 +188,56 @@ " '''\n", "\n", " for species, amount in self.CL4: # 4R, 3R1L, 2R2L, 1R3L, 4L\n", - "\n", - " if len(re.findall(r'R', species)) == 4 and len(re.findall(r'L', species)) == 0:\n", - " paired_frag_list = FragList.pair_frag(amount, self.L1dict)\n", + " ount_of_R_labels=len(re.findall(r'R', species))\n", + " count_of_L_labels=len(re.findall(r'L', species))\n", + " if count_of_R_labels == 4 and count_of_L_labels == 0:\n", + " paired_frag_list=FragList.pair_frag(amount, self.L1dict)\n", " for frag_amt in paired_frag_list:\n", - " frag = frag_amt[0]\n", - " amt = frag_amt[1]\n", - " frag1 = frag # 1L\n", - " frag2 = species # 4R\n", - " frag_new = FragList.merge_frag_to_frag(\n", - " frag1, frag2, 'R') # L,R,R -> 3\n", + " frag=frag_amt[0]\n", + " amt=frag_amt[1]\n", + " frag1=frag # 1L\n", + " frag2=species # 4R\n", + " frag_new=FragList.merge_frag_to_frag(frag1, frag2, 'R') # L,R,R -> 3\n", " self.CL3.append((frag_new, amt))\n", "\n", - " elif len(re.findall(r'R', species)) == 0 and len(re.findall(r'L', species)) == 4:\n", + " elif count_of_R_labels == 0 and count_of_L_labels == 4:\n", " paired_frag_list = FragList.pair_frag(amount, self.R1dict)\n", " for frag_amt in paired_frag_list:\n", " frag = frag_amt[0]\n", " amt = frag_amt[1]\n", " frag1 = frag # 1R\n", " frag2 = species # 4L\n", - " frag_new = FragList.merge_frag_to_frag(\n", - " frag2, frag1, 'R') # L,R,R -> 3L\n", + " frag_new = FragList.merge_frag_to_frag(frag2, frag1, 'R') # L,R,R -> 3L\n", " self.CL3.append((frag_new, amt))\n", "\n", - " elif len(re.findall(r'R', species)) == 2 and len(re.findall(r'L', species)) == 2:\n", + " elif count_of_R_labels == 2 and count_of_L_labels == 2:\n", " paired_frag_list = FragList.pair_frag(amount, self.L1dict)\n", " for frag_amt in paired_frag_list:\n", " frag = frag_amt[0]\n", " amt = frag_amt[1]\n", " frag1 = frag # 1L\n", " frag2 = species # 2R2L\n", - " frag_new = FragList.merge_frag_to_frag(\n", - " frag1, frag2, 'R') # L,R,R -> 1R2L\n", + " frag_new = FragList.merge_frag_to_frag(frag1, frag2, 'R') # L,R,R -> 1R2L\n", " self.CL3.append((frag_new, amt))\n", "\n", - " elif len(re.findall(r'R', species)) == 3 and len(re.findall(r'L', species)) == 1:\n", + " elif count_of_R_labels == 3 and count_of_L_labels == 1:\n", " paired_frag_list = FragList.pair_frag(amount, self.R1dict)\n", " for frag_amt in paired_frag_list:\n", " frag = frag_amt[0]\n", " amt = frag_amt[1]\n", " frag1 = frag # 1R\n", " frag2 = species # 3R1L\n", - " frag_new = FragList.merge_frag_to_frag(\n", - " frag2, frag1, 'R') # L,R,R -> 3R\n", + " frag_new = FragList.merge_frag_to_frag(frag2, frag1, 'R') # L,R,R -> 3R\n", " self.CL3.append((frag_new, amt))\n", "\n", - " elif len(re.findall(r'R', species)) == 1 and len(re.findall(r'L', species)) == 3:\n", + " elif count_of_R_labels == 1 and count_of_L_labels == 3:\n", " paired_frag_list = FragList.pair_frag(amount, self.L1dict)\n", " for frag_amt in paired_frag_list:\n", " frag = frag_amt[0]\n", " amt = frag_amt[1]\n", " frag1 = frag # 1L\n", " frag2 = species # 1R3L\n", - " frag_new = FragList.merge_frag_to_frag(\n", - " frag1, frag2, 'R') # L,R,R -> 3L\n", + " frag_new = FragList.merge_frag_to_frag(frag1, frag2, 'R') # L,R,R -> 3L\n", " self.CL3.append((frag_new, amt))\n", "\n", " def pair_CL3s(self):\n", @@ -247,7 +246,9 @@ " '''\n", "\n", " for species, amount in self.CL3:\n", - " if len(re.findall(r'R', species)) == 2 and len(re.findall(r'L', species)) == 1:\n", + " count_of_R_labels = len(re.findall(r'R', species))\n", + " count_of_L_labels = len(re.findall(r'L', species))\n", + " if count_of_R_labels == 2 and count_of_L_labels == 1:\n", " paired_frag_list = FragList.pair_frag(amount, self.R1dict)\n", " for frag_amt in paired_frag_list:\n", " frag = frag_amt[0]\n", @@ -258,7 +259,7 @@ " self.Rlist.append((frag_new, amt * 2))\n", " self.RRLLlist.append(frag_new)\n", "\n", - " elif len(re.findall(r'R', species)) == 1 and len(re.findall(r'L', species)) == 2:\n", + " elif count_of_R_labels == 1 and count_of_L_labels == 2:\n", " paired_frag_list = FragList.pair_frag(amount, self.L1dict)\n", " for frag_amt in paired_frag_list:\n", " frag = frag_amt[0]\n", @@ -269,7 +270,7 @@ " self.Llist.append((frag_new, amt * 2))\n", " self.RRLLlist.append(frag_new)\n", "\n", - " elif len(re.findall(r'R', species)) == 3 and len(re.findall(r'L', species)) == 0:\n", + " elif count_of_R_labels == 3 and count_of_L_labels == 0:\n", " paired_frag_list = FragList.pair_frag(amount, self.L1dict)\n", " for frag_amt in paired_frag_list:\n", " frag = frag_amt[0]\n", @@ -281,7 +282,7 @@ " self.RRLLlist.append(frag_new)\n", "\n", " # 3L\n", - " elif len(re.findall(r'R', species)) == 0 and len(re.findall(r'L', species)) == 3:\n", + " elif count_of_R_labels == 0 and count_of_L_labels == 3:\n", " paired_frag_list = FragList.pair_frag(amount, self.R1dict)\n", " for frag_amt in paired_frag_list:\n", " frag = frag_amt[0]\n", @@ -302,9 +303,9 @@ " self.Llist.append((one_L_frag, amt))\n", "\n", " def grind(conc, size):\n", - " \"\"\"\n", + " '''\n", " Split fragment concentrations into several repeating concentration units with specified size\n", - " \"\"\"\n", + " '''\n", " grinded_conc = []\n", " for label, c in conc:\n", " times = int(c / size)\n", @@ -315,306 +316,6 @@ "\n", " return grinded_conc\n", "\n", - " def match_sequences(seq1, seq2, diff_tol=1e-6):\n", - " \"\"\"\n", - " Given two lists (each item is int or float):\n", - " seq1 and seq2 with same sum, the method returns\n", - " matched indices and values.\n", - " Example:\n", - " seq1 = [1, 3, 1]\n", - " seq2 = [2, 1, 2]\n", - " return: [[(0,0),1],\n", - " [(1,0),1],\n", - " [(1,1),1],\n", - " [(1,2),1],\n", - " [(2,2),1]]\n", - " \"\"\"\n", - " # check if sums are close to same\n", - " sum_diff = sum(seq2) - sum(seq1)\n", - " assert (\n", - " abs(sum_diff / 1.0 / sum(seq1)) <= diff_tol\n", - " ), \"seq1 has different sum (diff={0}) than seq2.\".format(sum_diff)\n", - "\n", - " # force the sum to be same if the difference\n", - " # is small enough\n", - " if sum_diff >= 0:\n", - " seq1[-1] = seq1[-1] + sum_diff\n", - " else:\n", - " seq2[-1] = seq2[-1] - sum_diff\n", - "\n", - " # make cumulative sequences\n", - " cum_seq1 = [seq1[0]]\n", - " for item1 in seq1[1:]:\n", - " cum_seq1.append(cum_seq1[-1] + item1)\n", - "\n", - " cum_seq2 = [seq2[0]]\n", - " for item2 in seq2[1:]:\n", - " cum_seq2.append(cum_seq2[-1] + item2)\n", - "\n", - " # add index tags two both cumulative seqs\n", - " pin1 = 0\n", - " pin2 = 0\n", - " matched_indices = []\n", - " matched_cum_values = []\n", - " while pin1 < len(cum_seq1) and pin2 < len(cum_seq2):\n", - " matched_indices.append((pin1, pin2))\n", - "\n", - " if cum_seq1[pin1] > cum_seq2[pin2]:\n", - " matched_cum_values.append(cum_seq2[pin2])\n", - " pin2 += 1\n", - " elif cum_seq1[pin1] < cum_seq2[pin2]:\n", - " matched_cum_values.append(cum_seq1[pin1])\n", - " pin1 += 1\n", - " else:\n", - " matched_cum_values.append(cum_seq2[pin2])\n", - " pin1 += 1\n", - " pin2 += 1\n", - "\n", - " # get matches\n", - " matches = []\n", - " for i in range(len(matched_indices)):\n", - " matched_index_tup = matched_indices[i]\n", - " matched_cum_value = matched_cum_values[i]\n", - " if i == 0:\n", - " previous_cum_value = 0\n", - " else:\n", - " previous_cum_value = matched_cum_values[i - 1]\n", - "\n", - " matches.append(\n", - " [matched_index_tup, matched_cum_value - previous_cum_value])\n", - "\n", - " return matches\n", - "\n", - " def match_concentrations_with_same_sums(conc1, conc2, diff_tol=1e-6):\n", - " \"\"\"match_concentrations_with_same_sums\n", - " Given two lists with each item to be a tuple\n", - " (species label, concentration)\n", - " conc1 and conc2 with same total concentrations,\n", - " the method returns matched species labels and\n", - " concentrations.\n", - " Example:\n", - " conc1 = [('a', 1),\n", - " ('b', 3),\n", - " ('c', 1)]\n", - " conc2 = [('x', 2),\n", - " ('y', 1),\n", - " ('z', 2)]\n", - " return: [(('a','x'),1),\n", - " (('b','x'),1),\n", - " (('b','y'),1),\n", - " (('b','z'),1),\n", - " (('c','z'),1)]\n", - " \"\"\"\n", - " labels1 = [tup[0] for tup in conc1]\n", - " labels2 = [tup[0] for tup in conc2]\n", - "\n", - " seq1 = [tup[1] for tup in conc1]\n", - " seq2 = [tup[1] for tup in conc2]\n", - "\n", - " matches_seq = FragList.match_sequences(seq1, seq2, diff_tol)\n", - "\n", - " matches_conc = []\n", - " for match_seq in matches_seq:\n", - " matched_label_index1 = match_seq[0][0]\n", - " matched_label_index2 = match_seq[0][1]\n", - " matched_value = match_seq[1]\n", - "\n", - " matched_label1 = labels1[matched_label_index1]\n", - " matched_label2 = labels2[matched_label_index2]\n", - " match_conc = ((matched_label1, matched_label2), matched_value)\n", - " matches_conc.append(match_conc)\n", - " return matches_conc\n", - "\n", - " def match_concentrations_with_different_sums(conc1, conc2):\n", - " \"\"\"\n", - " Given two lists with each item to be a tuple\n", - " (species label, concentration)\n", - " conc1 and conc2 with different total concentrations,\n", - " the method returns matched species labels and\n", - " concentrations.\n", - " Example:\n", - " conc1 = [('a', 1),\n", - " ('b', 3),\n", - " ('c', 1)]\n", - " conc2 = [('x', 2),\n", - " ('y', 1),\n", - " ('z', 10)]\n", - " return: [(('a','x', 'z', 'z'),1),\n", - " (('b','x', 'z', 'z'),1),\n", - " (('b','y', 'z', 'z'),1),\n", - " (('b','z', 'z'),1),\n", - " (('c','z', 'z'),1)]\n", - " \"\"\"\n", - " labels1 = [tup[0] for tup in conc1]\n", - " labels2 = [tup[0] for tup in conc2]\n", - "\n", - " seq1 = [tup[1] for tup in conc1]\n", - " seq2 = [tup[1] for tup in conc2]\n", - "\n", - " matches_conc = []\n", - " pin1 = 0\n", - " pin2 = 0\n", - " val1 = seq1[pin1]\n", - " val2 = seq2[pin2]\n", - "\n", - " while True:\n", - " if val1 > val2:\n", - " match = ((labels1[pin1], labels2[pin2]), val2)\n", - " matches_conc.append(match)\n", - " val1 = val1 - val2\n", - " pin2 += 1\n", - " if pin2 == len(seq2):\n", - " break\n", - " val2 = seq2[pin2]\n", - " elif val1 < val2:\n", - " match = ((labels1[pin1], labels2[pin2]), val1)\n", - " matches_conc.append(match)\n", - " val2 = val2 - val1\n", - " pin1 += 1\n", - " if pin1 == len(seq1):\n", - " break\n", - " val1 = seq1[pin1]\n", - " else:\n", - " match = ((labels1[pin1], labels2[pin2]), val1)\n", - " matches_conc.append(match)\n", - " pin1 += 1\n", - " pin2 += 1\n", - " if pin1 == len(seq1):\n", - " break\n", - " val1 = seq1[pin1]\n", - " if pin2 == len(seq2):\n", - " break\n", - " val2 = seq2[pin2]\n", - "\n", - " # if pin2 first reaches the end\n", - " # append all the remaining seq1 to matches_conc\n", - " if pin2 == len(seq2) and pin1 < len(seq1):\n", - " remain_conc1 = [(labels1[pin1], val1)] + conc1[(pin1 + 1):]\n", - " matches_conc.extend(remain_conc1)\n", - "\n", - " # if pin1 first reaches the end\n", - " # let matches_conc match with remaining seq2\n", - " elif pin1 == len(seq1) and pin2 < len(seq2):\n", - " remain_conc2 = [(labels2[pin2], val2)] + conc2[(pin2 + 1):]\n", - " matches_conc = FragList.match_concentrations_with_different_sums(\n", - " matches_conc, remain_conc2\n", - " )\n", - "\n", - " # if pin1 and pin2 reach the ends at same time\n", - " # matches_conc is ready to return\n", - " return matches_conc\n", - "\n", - " def shuffle(conc, seed=None):\n", - " \"\"\"\n", - " Randomly shuffle a list of fragments\n", - " \"\"\"\n", - " idx_arr = np.arange(len(conc))\n", - "\n", - " if seed is not None:\n", - " np.random.seed(seed)\n", - " np.random.shuffle(idx_arr)\n", - "\n", - " return [conc[idx] for idx in idx_arr]\n", - "\n", - " def flatten(combo):\n", - " \"\"\"\n", - " Given a combo nested `tuple`, e.g.,\n", - " ((('LY', 'XR'), ('LWL', 'RUR'))\n", - " return a list of labels contained in\n", - " the combo ['LY', 'XR', 'LWL', 'RUR']\n", - " \"\"\"\n", - " return_list = []\n", - " for i in combo:\n", - " if isinstance(i, tuple):\n", - " return_list.extend(FragList.flatten(i))\n", - " else:\n", - " return_list.append(i)\n", - " return return_list\n", - "\n", - " def merge_frag_to_frag(frag1, frag2, label): # label should match the desired merging l/'abel on frag2\n", - " from rmgpy.molecule import Bond\n", - " from rmgpy.molecule.fragment import Fragment, CuttingLabel\n", - " \n", - " frag_spe1 = Fragment().from_smiles_like_string(frag1)\n", - " frag_spe2 = Fragment().from_smiles_like_string(frag2)\n", - " # find position of desired CuttingLabel\n", - " # need to find CuttingLabel on frag2 first\n", - " for vertex in frag_spe2.vertices:\n", - " if isinstance(vertex, CuttingLabel):\n", - " if vertex.symbol == label:\n", - " cut2 = vertex\n", - " \n", - " atom2 = list(cut2.edges.keys())[0]\n", - " frag_spe2.remove_atom(cut2)\n", - " break\n", - "\n", - " if cut2.symbol[0] == 'L':\n", - " Ctl = cut2.symbol.replace('L', 'R')\n", - " else: # that means this CuttingLabel is R something\n", - " \n", - " Ctl = cut2.symbol.replace('R', 'L')\n", - " \n", - " # merge to frag_spe1\n", - " for vertex in frag_spe1.vertices:\n", - " if isinstance(vertex, CuttingLabel):\n", - " if vertex.symbol == Ctl:\n", - " cut1 = vertex\n", - " atom1 = list(cut1.edges.keys())[0]\n", - " frag_spe1.remove_atom(cut1)\n", - " break\n", - " \n", - " # new merged fragment\n", - " new_frag = frag_spe1.merge(frag_spe2)\n", - " new_frag.add_bond(Bond(atom1=atom1, atom2=atom2, order=1))\n", - " new_frag = new_frag.copy(deep=True)\n", - " new_frag.update()\n", - " return new_frag # return Fragment obtl\n", - "\n", - " def merge_frag_list(to_be_merged):\n", - " import os\n", - " # merges fragments in list from right to left\n", - " species_list = []\n", - " ethylene = []\n", - " newlist = []\n", - " warnings = []\n", - "\n", - " while len(to_be_merged) > 1:\n", - "\n", - " # second to last fragmentin list\n", - " frag1 = to_be_merged[-2].smiles\n", - " frag2 = to_be_merged[-1].smiles # last fragment in list\n", - "\n", - " if 'R' in frag1 and 'L' in frag2:\n", - " newfrag = FragList.merge_frag_to_frag(frag1, frag2, 'L')\n", - "\n", - " elif 'L' in frag1 and 'R' in frag2:\n", - " newfrag = FragList.merge_frag_to_frag(frag1, frag2, 'R')\n", - "\n", - " # warn user if last two fragments in list cannot be merged (no R/L combo to be made)\n", - " else:\n", - " print('Warning! Could not merge fragments {} and {}'.format(\n", - " frag1, frag2))\n", - "\n", - " if 'L' in frag1 and 'L' in frag2:\n", - " newfrag = FragList.merge_frag_to_frag(\n", - " frag1.replace('L', 'R'), frag2, 'L')\n", - " if len(to_be_merged) > 2:\n", - " cut = len(to_be_merged)-2\n", - " newfraglist = to_be_merged[:cut]\n", - "\n", - " newfraglist.append(newfrag)\n", - " elif len(to_be_merged) == 2:\n", - "\n", - " newfraglist = [newfrag]\n", - "\n", - " to_be_merged = newfraglist\n", - "\n", - " to_be_merged = newfraglist\n", - " # newlist.append(newfraglist) # if done merging list, write final structure to list of smiles structures\n", - "\n", - " # print('{}% of fragments fully merged...'.format(np.round(100*(i+1)/len(flattened_matches_random)),1))\n", - " # print(newfraglist)\n", - " return newfraglist\n", "\n", " def grind_endcaps(self, grindsize=1, repeats=1):\n", " '''\n", @@ -640,7 +341,7 @@ " '''\n", " matches = FragList.match_concentrations_with_same_sums(self.glmoles,\n", " self.grmoles,\n", - " diff_tol=1e-3)\n", + " rtol=1e-3)\n", " self.endcaps = []\n", " self.middles = []\n", " for match in matches:\n", @@ -726,6 +427,12 @@ " self.grouped.extend(flattened_matches_random)\n", "\n", " def get_mwd(self, bins=10, fname='mwd.png'):\n", + " '''\n", + " loop through molecules i.e. grouped fragments\n", + " calculate molecular weight\n", + " create histogram of molecular weights, weighted by molar amount\n", + " store histogram data in histdata\n", + " '''\n", " self.mwd_amts = [x[1] for x in self.grouped]\n", " self.mwd_mws = []\n", " for fraglist, amt in self.grouped:\n", @@ -736,7 +443,22 @@ " self.histdata = plt.hist(\n", " self.mwd_mws, bins=bins, weights=self.mwd_amts)\n", " plt.xlabel(\"Molecular Weight (g/mol)\")\n", - " plt.ylabel(\"Moles\")\n" + " plt.ylabel(\"Moles\")\n", + "\n", + " def reattach(self, grindsize = 1):\n", + " '''\n", + " this parent function combines the steps of reattachment into one\n", + " '''\n", + " self.sort() # sort the fragments by number and type of cutting labels\n", + " self.pair_CL4s() # reattach 4-cutting label fragments to make 3-cutting label fragments\n", + " self.pair_CL3s() # reattach 3-cutting label fragments to make 2-cutting label fragments\n", + " self.update_lists() # add 1-cutting label fragments to either Rlist or Llist\n", + " self.grind_endcaps(grindsize=grindsize) # grind the concentrations of fragments into smaller sub-concentrations\n", + " self.pair_endcaps() # pair together 1R and 1L fragments to make endcap pairs\n", + " self.grind_middles() # grind the concentrations of middle LR fragments into smaller sub-concentrations of same size as above\n", + " self.distribute_middles() # randomly select an endcap pair for each middle LR fragment\n", + " return self\n", + "\n" ] }, { @@ -750,12 +472,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "9d5bd8fb-2a97-487b-8cc2-e1349f5612e1", "metadata": {}, "outputs": [], "source": [ - "working_dir = os.path.join('./')\n", + "working_dir = os.getcwd()\n", "chemkin_path = os.path.join(working_dir, 'data/chem_annotated.inp')\n", "species_dict_path = os.path.join(working_dir, 'data/species_dictionary.txt')\n", "results_path = os.path.join(working_dir, 'results')\n", @@ -778,7 +500,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "2f9f50fc-eba0-4156-8d87-1ed68294560d", "metadata": {}, "outputs": [], @@ -806,7 +528,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "6d69f465-2eff-4549-b9af-9531d77de360", "metadata": {}, "outputs": [], @@ -829,7 +551,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "100d1e81-0c4a-499d-bb45-63ed17fc605c", "metadata": {}, "outputs": [], @@ -851,12 +573,12 @@ "metadata": {}, "source": [ "## 2. Random Reattachment Procedure\n", - "- make a list of tuples of the each fragment species and its final concentration" + "- make a list of tuples of each fragment species and its final concentration" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "2082ed31-adaf-4346-bcd2-9ff09666faed", "metadata": {}, "outputs": [], @@ -900,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "ebe147a1-ad99-4320-8813-45b2141353f8", "metadata": { "tags": [] @@ -910,17 +632,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "grinding endcaps to 0.0005\n", - "grinding middle pieces to 0.0005\n", - "grinding endcaps to 0.0005\n", - "grinding middle pieces to 0.0005\n", - "grinding endcaps to 0.0005\n", - "grinding middle pieces to 0.0005\n" + "grinding endcaps to 0.001\n", + "grinding middle pieces to 0.001\n", + "grinding endcaps to 0.001\n", + "grinding middle pieces to 0.001\n", + "grinding endcaps to 0.001\n", + "grinding middle pieces to 0.001\n" ] }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -933,30 +655,10 @@ "%matplotlib inline\n", "hdl = []\n", "hdl = []\n", - "for i in range(3): # perform reattachment n times for repeatability analysis\n", - "\n", - " fl = FragList(frag_list) # instantiate fraglist object\n", - "\n", - " fl.sort() # sort the fragments by number and type of cutting labels\n", - "\n", - " fl.pair_CL4s() # reattach 4-cutting label fragments to make 3-cutting label fragments\n", - "\n", - " fl.pair_CL3s() # reattach 3-cutting label fragments to make 2-cutting label fragments\n", - "\n", - " fl.update_lists() # add 1-cutting label fragments to either Rlist or Llist\n", - "\n", - " # grind the concentrations of fragments into smaller sub-concentrations\n", - " fl.grind_endcaps(grindsize=0.0005)\n", - "\n", - " fl.pair_endcaps() # pair together 1R and 1L fragments to make endcap pairs\n", - "\n", - " fl.grind_middles() # grind the concentrations of middle LR fragments into smaller sub-concentrations of same size as above\n", - "\n", - " fl.distribute_middles() # randomly select an endcap pair for each middle LR fragment\n", - "\n", - " # calculate and plot the molecular weight distribution of the resulting fragment groups\n", + "for i in range(3): \n", + " fl = FragList(frag_list)\n", + " fl.reattach(grindsize=0.001)\n", " fl.get_mwd(bins=range(0, 501, 15))\n", - "\n", " hdl.append(fl.histdata) # save histogram data for repeatability analysis\n", "\n" ] @@ -974,13 +676,13 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "83a0e056-66dd-4e79-b4b0-f6b74c518a58", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -1006,10 +708,458 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 10, "id": "c2b46571-b217-4962-9578-631740a17466", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "species mole percent\n", + "C.......................................................................................................24.35%\n", + "C=CC....................................................................................................20.65%\n", + "miscillaneous large molecules............................................................................9.93%\n", + "CCC......................................................................................................1.39%\n", + "C=CC=CC..................................................................................................1.29%\n", + "C=CCCC...................................................................................................1.09%\n", + "C=CCC(C)C=CC.............................................................................................1.06%\n", + "C=C(C)C=CC...............................................................................................0.84%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC.....................................................................0.77%\n", + "CC=CCC...................................................................................................0.77%\n", + "C=C(C)CCC................................................................................................0.76%\n", + "CC=CC(C)CCC..............................................................................................0.74%\n", + "C=CCC(C)CC(C)CC(C)CCC....................................................................................0.71%\n", + "C=CCC(=C)C...............................................................................................0.67%\n", + "C=CCC=C..................................................................................................0.66%\n", + "C=C(C)C...................................................................................................0.6%\n", + "CC=CC(C)C=CC.............................................................................................0.59%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC...............................................................0.58%\n", + "CC1=CCCC1................................................................................................0.57%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC..................................................................0.55%\n", + "CCCCC....................................................................................................0.52%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC.................................................................0.5%\n", + "C=C(C)CC(C)C=CC..........................................................................................0.49%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C........................................................................0.45%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC............................................................0.45%\n", + "C=CCC(C)CC(C)CC(C)CC(C)C=CC..............................................................................0.44%\n", + "CC=CC(C)C................................................................................................0.44%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC....................................................................0.42%\n", + "C=C(C)CC(C)CC(C)CC(C)CCC.................................................................................0.42%\n", + "C=CCC(C)CCC..............................................................................................0.41%\n", + "C=CCC(C)CC(=C)C..........................................................................................0.41%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC......................................................................0.4%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CCC................................................................................0.4%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC.......................................................0.4%\n", + "CCCC(C)CC(C)CC(C)C.......................................................................................0.39%\n", + "CC1=CC=CC1...............................................................................................0.39%\n", + "C=CCC(C)CC(C)CC(C)C......................................................................................0.38%\n", + "C=C(C)CC(C)CCC...........................................................................................0.38%\n", + "CC=CC(C)CC(C)CC(C)CCC....................................................................................0.38%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.......................................................................0.37%\n", + "CCCC(C)CC(C)CC(C)CCC.....................................................................................0.37%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C.................................................................0.36%\n", + "CC=CC(C)CC(C)CC(C)CC(C)C.................................................................................0.35%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC......................................................................0.35%\n", + "CC=CC(C)CC(C)C=CC........................................................................................0.34%\n", + "C=CCC(C)CC(C)CC(C)C=CC...................................................................................0.34%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC.................................................................0.33%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC................................................0.33%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC...................................................0.32%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)C=CC...........................................................................0.32%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC.....................................................0.31%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C....................................................................0.29%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(=C)C...........................................................................0.28%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..................................................................0.28%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.........................................................0.28%\n", + "CC1=CCC=C1...............................................................................................0.28%\n", + "C=C(C)CC(C)CC(C)CC(C)C=CC................................................................................0.28%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC.................................................0.25%\n", + "C=CCC(C)CC(C)CC(C)CC=C...................................................................................0.25%\n", + "C=CCC(C)CC(C)C=CC........................................................................................0.25%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C............................................................0.24%\n", + "C=CCC(C)CC(C)CC(C)CC(=C)C................................................................................0.24%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC...............................................................0.24%\n", + "C=C(C)CC(C)CC(C)C=CC.....................................................................................0.24%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)C=CC.........................................................................0.24%\n", + "C=C(C)CC(C)C.............................................................................................0.22%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C....................................................................0.22%\n", + "C=CCC(C)C................................................................................................0.22%\n", + "C=C(C)CC(C)CC(C)CC(C)C...................................................................................0.21%\n", + "C=CCC(C)CC(C)CC(C)CC(C)C.................................................................................0.21%\n", + "C=CCC(C)CC=C.............................................................................................0.21%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC......................................................0.21%\n", + "C=C(C)CC(=C)C............................................................................................0.21%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.........................................................0.2%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC...........................................................0.2%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC.............................................0.19%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC.............................................................0.19%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC................................................................0.19%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C..................................................0.19%\n", + "C1=CCCC1.................................................................................................0.18%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CCC...............................................................................0.18%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(=C)C.............................................................................0.17%\n", + "C=C(C)CC(C)CC(=C)C.......................................................................................0.17%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC..........................................................0.17%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C........................................................................0.17%\n", + "CCC=C(C)C................................................................................................0.17%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC..................................................0.16%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)C=CC.........................................................................0.16%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC.......................................................0.16%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C...................................................0.16%\n", + "CC=CC(C)CC(C)CC(C)CC(C)C=CC..............................................................................0.16%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..................................................................0.16%\n", + "CC=CC(C)CC(C)C...........................................................................................0.16%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC...........................................0.16%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC................................................0.15%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C.........................................................0.15%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C..............................................................0.14%\n", + "CC=CC(C)CC(C)CCC.........................................................................................0.14%\n", + "C=C......................................................................................................0.13%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C.............................................0.13%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CCC............................................................................0.12%\n", + "C1=CCC=C1................................................................................................0.12%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC..............................................0.12%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC=C..............................................................................0.11%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC.......................................................0.11%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C.....................................................0.11%\n", + "CCCC(C)C.................................................................................................0.11%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC......................................................................0.11%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.....................................................0.11%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C................................................................0.1%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..............................................................0.1%\n", + "C=CC=CCC=C................................................................................................0.1%\n", + "C=CCC(C)CC(C)C............................................................................................0.1%\n", + "CC=CC(C)=CCC..............................................................................................0.1%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC.........................................0.1%\n", + "CCC=C(C)CCC...............................................................................................0.1%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC.................................................0.09%\n", + "C=C(C)CC(C)=CCC..........................................................................................0.09%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C...............................................................0.09%\n", + "C=C(C)C=C(C)C............................................................................................0.09%\n", + "C=CCC(C)CC(C)CC(=C)C.....................................................................................0.09%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..............................................0.09%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C...................................................0.09%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C....................................................................0.09%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC.................................................................0.09%\n", + "CC(C)C...................................................................................................0.09%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)C............................................................................0.09%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C..........................................0.09%\n", + "CC=CCC(C)CC(C)CC(C)C.....................................................................................0.09%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C................................................0.09%\n", + "C=CC=CCC(=C)C............................................................................................0.08%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C................................................0.08%\n", + "C=CC=CC=CC...............................................................................................0.08%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C......................................................................0.08%\n", + "C=C(C)CC(C)CC(C)C........................................................................................0.07%\n", + "CC=CC(C)CC(C)=CCC........................................................................................0.07%\n", + "CCC(C)CC(C)CC(C)C........................................................................................0.07%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC...........................................................0.07%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC...........................................0.07%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C..............................................................0.07%\n", + "CC=CC(C)C=C(C)C..........................................................................................0.07%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C...............................................0.07%\n", + "CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.....................................................................0.07%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C.................................................................0.07%\n", + "C=CC=C(C)C...............................................................................................0.06%\n", + "C=CCC(C)CC(C)CC=C........................................................................................0.06%\n", + "C=CCC(C)CC(C)CC(C)C=C(C)C................................................................................0.06%\n", + "C=CC=CCC(C)C=CC..........................................................................................0.06%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)=CCC...........................................................................0.06%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)C=CC...........................................................................0.06%\n", + "C=CCC(C)CC(C)CC(C)CC(C)=CCC..............................................................................0.06%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC...............................................................0.05%\n", + "CC(C)CC(C)C..............................................................................................0.05%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC............................................0.05%\n", + "CC=CC(C)CCC(C)CC(C)CC(C)C................................................................................0.05%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C............................................................0.05%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.................................................................0.05%\n", + "C=C(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C...................................................................0.05%\n", + "C=C(C)C=CCCC.............................................................................................0.05%\n", + "C=CC=CCCC................................................................................................0.05%\n", + "C=CCC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C......................................................................0.05%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)C............................................................................0.05%\n", + "CC=CC(C)CC(C)CC(C)CC(C)=CCC..............................................................................0.05%\n", + "CCC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC................................................................0.05%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C..............................................................0.05%\n", + "CCCC(C)CC(C)CC(C)CC(C)C..................................................................................0.05%\n", + "CCC=C(C)CC(C)CC(C)CC(C)C.................................................................................0.05%\n", + "CC=CC(C)CC(C)CC(C)C......................................................................................0.05%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.....................................................0.05%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C...............................................0.05%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C.......................................................0.05%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C............................................................0.05%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C.................................................................0.05%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC..................................................................0.05%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.............................................................0.04%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C..................................................................0.04%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC............................................................0.04%\n", + "CCC=C(C)CC(C)CC(C)CC(C)CCC...............................................................................0.04%\n", + "C=CCC(C)CC(C)CCC.........................................................................................0.04%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC..........................................................0.04%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.......................................................0.04%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC...........................................................0.04%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)C..............................................................................0.04%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC....................................................................0.04%\n", + "C=CC=CCC(C)CC(=C)C.......................................................................................0.04%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C...................................................................0.04%\n", + "C=CC=CCC(C)CC(C)CC(C)C=CC................................................................................0.04%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C.............................................................0.04%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.........................................0.04%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C........................................0.04%\n", + "CC=CC(C)CC(C)C=C(C)C.....................................................................................0.04%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)C................................................................................0.04%\n", + "CC(C)CC(C)CC(C)CC(C)C....................................................................................0.04%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC..................................................0.04%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC...............................................................0.04%\n", + "CCCCC(C)CC(C)CC(C)C......................................................................................0.04%\n", + "CC(C)=CC(C)C.............................................................................................0.03%\n", + "CCC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC.................................................0.03%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC...................................................0.03%\n", + "C=CCCC(C)CC(C)CC(C)C.....................................................................................0.03%\n", + "CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..........................................................0.03%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C....................................................0.03%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..........................................0.03%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(=C)C........................................................................0.03%\n", + "C=CCC(C)CC(C)=CCC........................................................................................0.03%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.......................................................................0.03%\n", + "C=C1C=CCC1...............................................................................................0.03%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC................................................0.03%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C........................................................0.03%\n", + "CC=CC(C)CC(C)CC(C)=CCC...................................................................................0.03%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C...................................................................0.03%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.........................................0.03%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C........................................................................0.03%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC...........................................0.03%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC................................................0.03%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC.....................................................0.03%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C...................................................0.03%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(=C)C.............................................................................0.03%\n", + "C=CC=CCC(C)CC(C)CC(C)C...................................................................................0.03%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC......................................0.03%\n", + "CCCC(C)CC(C)C............................................................................................0.03%\n", + "C=C(C)CC(C)CC(C)CC(C)C=C(C)C.............................................................................0.03%\n", + "CCC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C...................................................0.03%\n", + "C=CCC(C)C=C(C)C..........................................................................................0.03%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.........................................................................0.03%\n", + "CCC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..................................................................0.03%\n", + "CC=CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C......................................................................0.03%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.......................................................0.03%\n", + "CCCC(C)CC(C)CC(C)C=C(C)C.................................................................................0.03%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C.............................................0.03%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C......................................0.02%\n", + "C=C(C)CC(C)CC(C)C=C(C)C..................................................................................0.02%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C.........................................................0.02%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..........................................................0.02%\n", + "CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.........................................................................0.02%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC................................................0.02%\n", + "CC(C)=CC(C)CC(C)CC(C)CC(C)C..............................................................................0.02%\n", + "C=C(C)C=CCC(=C)C.........................................................................................0.02%\n", + "C=CCC(C)CC(C)CC(C)CC(C)C=C(C)C...........................................................................0.02%\n", + "C=C(C)CC(C)C=C(C)C.......................................................................................0.02%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC......................................0.02%\n", + "C=C(C)C=CCC(C)C=CC.......................................................................................0.02%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.........................................0.02%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C...........................................0.02%\n", + "CC(C)=CC(C)CC(C)CC(C)CC(C)CC(C)C.........................................................................0.02%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C...............................................0.02%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC...............................................................0.02%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C..................................................0.02%\n", + "CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C......................................................0.02%\n", + "CC=CC(C)CC(C)CC(C)C=CC...................................................................................0.02%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC=C.........................................................................0.02%\n", + "CCCC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.......................................................................0.02%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C........................................................0.02%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC............................................0.02%\n", + "C=CC=CCC(C)CC(C)CC(C)CC=C................................................................................0.02%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C............................................0.02%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC..............................................................0.02%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C...............................................0.02%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C..........................................................0.02%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC............................................................0.02%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC.......................................................0.02%\n", + "C=C(C)CCC(C)CC(C)CC(C)C..................................................................................0.02%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..............................................0.02%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C.....................................0.02%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C........................................0.02%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC.............................................0.02%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(=C)C.....................................................................0.02%\n", + "CC(C)=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..........................................................0.02%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C..................................................0.02%\n", + "C=C(C)CC(C)CC(C)=CCC.....................................................................................0.02%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)=CCC.........................................................................0.02%\n", + "C=CCC(C)=CCC.............................................................................................0.02%\n", + "C=CC=C(C)CC(C)CC=C.......................................................................................0.02%\n", + "C=C(C)C=CCC(C)CC(=C)C....................................................................................0.02%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)=CCC...........................................................................0.02%\n", + "CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C................................................................0.02%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C.........................................................0.02%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC=CC..................................................................0.02%\n", + "C=CC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC...............................................................0.02%\n", + "C=CCC(C)CC=CC............................................................................................0.02%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C.......................................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC...........................................0.01%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CCC..............................................................................0.01%\n", + "CCCC(C)=CC(C)C...........................................................................................0.01%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC...............................................0.01%\n", + "CC(C)=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.....................................0.01%\n", + "C=CC=CCC(C)CC(C)CC(C)C=C(C)C.............................................................................0.01%\n", + "CC=CC(C)=CC(C)CC(C)CC(C)CC(C)C=CC........................................................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C..................................................0.01%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C....................................................0.01%\n", + "C=CC=C(C)CC(C)CC(C)CC(C)CC=C.............................................................................0.01%\n", + "C=CC=C(C)CC(C)CC(C)CC(C)C................................................................................0.01%\n", + "CCCC(C)C=C(C)C...........................................................................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C.............................................0.01%\n", + "CCCC(C)CC(C)CC(C)C1C=CC=C1C..............................................................................0.01%\n", + "CCCC(C)CCC...............................................................................................0.01%\n", + "CC(C)=CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C...................................................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C..........................................0.01%\n", + "CC(C)=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C...............................................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC..........................................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CCC..........................................................................0.01%\n", + "C=CCC=CC(=C)C............................................................................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC........................................0.01%\n", + "C=CCC=CC.................................................................................................0.01%\n", + "C=C(C)C=C(C)C=CC.........................................................................................0.01%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.................................................................0.01%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)C..............................................................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.................................................................0.01%\n", + "C=CCC(C)CC(C)C=C(C)C.....................................................................................0.01%\n", + "CC=CC(C)=CC(C)C=CC.......................................................................................0.01%\n", + "C=CC=C(C)C=CC............................................................................................0.01%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C..........................................0.01%\n", + "CCC=C(C)CC(C)CC(C)CC(C)C=C(C)C...........................................................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)=CCC.........................................................................0.01%\n", + "CCC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC...........................................0.01%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C....................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.....................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC...................................0.01%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..............................................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C....................................................0.01%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C..............................................................0.01%\n", + "C=C(C)CCC=CC.............................................................................................0.01%\n", + "C=C1CC=CC1...............................................................................................0.01%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C..............................................0.01%\n", + "C=CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C..................................................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C...........................................0.01%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)C.............................................................................0.01%\n", + "C=CCC(C)=CC(C)C..........................................................................................0.01%\n", + "C=CCC(C)CC=CC(=C)C.......................................................................................0.01%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC............................................................0.01%\n", + "CCC=C(C)C=C(C)C..........................................................................................0.01%\n", + "C=CC=CC(C)CC(C)CC(C)CC=C.................................................................................0.01%\n", + "C=CC=CCC(C)CC(C)CC(C)CCC.................................................................................0.01%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C...................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C..............................................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(=C)C..................................................................................0.01%\n", + "CCC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C.............................................0.01%\n", + "C=C(C)CC(C)CCC(C)CC(C)CC(C)C.............................................................................0.01%\n", + "CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC...................................................................0.01%\n", + "C=CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC...................................................................0.01%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C...............................................0.01%\n", + "CCC=C(C)CC(C)C...........................................................................................0.01%\n", + "C=CC=C(C)CC=C............................................................................................0.01%\n", + "C=CC=C(C)CC(C)C=CC.......................................................................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=CC(=C)C..............................................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C...................................0.01%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C......................................................0.01%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC....................................................................0.01%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CCC..........................................................................0.01%\n", + "CCCC(C)CC(C)CC(C)CC(C)C=C(C)C............................................................................0.01%\n", + "C=CC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C...........................................................0.01%\n", + "CC(C)=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C................................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=CC...................................................................0.01%\n", + "CCC=CCC(C)C=CCC(C)C......................................................................................0.01%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..................................................0.01%\n", + "CC=CC....................................................................................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=CC................................................................0.01%\n", + "C=CCC(C)C(C)CC=C.........................................................................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C....................................................0.01%\n", + "C=C(C)C=CC=C(C)C.........................................................................................0.01%\n", + "CCC=C(C)CCC(C)CC(C)CC(C)C................................................................................0.01%\n", + "C=CC=C(C)CC(=C)C.........................................................................................0.01%\n", + "CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.................................................0.01%\n", + "C=CC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC................................................0.01%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC.............................................0.01%\n", + "C=CC=CC=C(C)C............................................................................................0.01%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C......................................................................0.01%\n", + "C=C(C)C=CC=CC............................................................................................0.01%\n", + "CCC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C....................................0.01%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC=CC...................................................0.01%\n", + "CCC=C(C)CC(C)CC(C)C......................................................................................0.01%\n", + "C=CC=CC=CCCC=CC=CC=C......................................................................................0.0%\n", + "CC(C)=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.....................................................0.0%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=CC(=C)C...........................................0.0%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=CC.....................................................0.0%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C.........................................0.0%\n", + "CCCC(C)CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC...............................................................0.0%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC......................................................0.0%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=CC(=C)C................................................0.0%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C..............................................0.0%\n", + "CCCC(C)CC=CC(C)CC(C)CC(C)CCC..............................................................................0.0%\n", + "C=CCC(C)CC(C)CC(C)CC=CC(=C)C..............................................................................0.0%\n", + "CC=CC(C)CC(C)CC(C)CC(C)C=C(C)C............................................................................0.0%\n", + "CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C............................................0.0%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C................................................0.0%\n", + "C=CC=CCC(C)C..............................................................................................0.0%\n", + "CC=CC=CC..................................................................................................0.0%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C=CC................................................0.0%\n", + "CC=CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC....................................................0.0%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C....................................................................0.0%\n", + "C=C(C)CC(C)CC(C)CC(C)CC=CC................................................................................0.0%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC...........................................0.0%\n", + "CC(C)=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C............................................0.0%\n", + "CCC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC...........................................................0.0%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C................................0.0%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC..........................................................0.0%\n", + "CCC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C.............................................................0.0%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C.........................................0.0%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..................................0.0%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C................................................................0.0%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CCC(C)CC(C)CC(C)C...................................................0.0%\n", + "CC=CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC...................................................................0.0%\n", + "CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.......................................................................0.0%\n", + "C=C(C)CC(C)CC(C)C(C)CC(C)C(C)CCC..........................................................................0.0%\n", + "C=CC=C(C)CC(C)CC(C)CC(C)CCC...............................................................................0.0%\n", + "CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CC(C)C..............................................0.0%\n", + "C=CC=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C....................................................0.0%\n", + "C=CC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C...................................................0.0%\n", + "C=C(C)CC(C)CC(C)C(C)CC(C)C(C)CC(C)C(C)CCC.................................................................0.0%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C.............................................0.0%\n", + "CC=CC(C)C1C=CC=C1C........................................................................................0.0%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C.............................................................0.0%\n", + "CCCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C1C=CC=C1C................................................................0.0%\n", + "C=CCCC=C..................................................................................................0.0%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C....................................0.0%\n", + "CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C........................................................0.0%\n", + "C=CC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC=C...............................................................0.0%\n", + "C=CC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C..................................................................0.0%\n", + "CC=CC(C)=CC(C)CC(C)C=CC...................................................................................0.0%\n", + "C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C.........................................0.0%\n", + "CC=CC(C)=CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=CC..........................................................0.0%\n", + "CCC=CCC(C)C=CCC(C)C=CCC(C)C...............................................................................0.0%\n", + "C=C1C=CC=C1...............................................................................................0.0%\n", + "C=CCC(C)CC(C)CC(C)CCC=CC..................................................................................0.0%\n", + "CC=CCC(C)CC(C)CC(C)C=CC...................................................................................0.0%\n", + "C=CC=CC(C)CC(C)CC(C)CCC...................................................................................0.0%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)=CCC.......................................................................0.0%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)C=CC..............................................................................0.0%\n", + "C=CC=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C.............................................0.0%\n", + "C=CC=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C.......................................0.0%\n", + "C=C(C)C=CCC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(=C)C........................................0.0%\n", + "C=CCC(C)CCC(=C)C..........................................................................................0.0%\n", + "C=C(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)CC(C)C=C(C)C=CC........................................0.0%\n", + "C=CCC(C=C)CC..............................................................................................0.0%\n", + "C=CC=C(C)CCC(C)CC(C)=CC=C.................................................................................0.0%\n", + "C=CC=CCCC=CC=C............................................................................................0.0%\n", + "CC(C)CCC(C)CC(C)CC(C)C....................................................................................0.0%\n", + "CC1=CC(C2C=C(C)CC2)CC1....................................................................................0.0%\n", + "C=CC=CCCC=C...............................................................................................0.0%\n", + "C=CCCC1=CC=CC1............................................................................................0.0%\n" + ] + } + ], "source": [ "results_dictionary = {}\n", "for fraglist, amt in fl.grouped:\n", @@ -1031,51 +1181,6 @@ " print(str(x).ljust(80, '.') +\n", " str(str(np.round(y/total*100, 2))+'%').rjust(30, '.'))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "57e2ce0d-875f-4dcf-9927-4dc0b4d6481f", - "metadata": {}, - "outputs": [], - "source": [ - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ebff6236-0ccf-442d-965f-03b555db725a", - "metadata": { - "scrolled": true, - "tags": [] - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "19573d77-561a-4863-8fba-918da68b652b", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "628e779b-acba-45e6-a864-744c1eb4db91", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "357db984-42cd-4acd-9e27-bd82b8873df4", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/rmgpy/molecule/fragment_utils.py b/rmgpy/molecule/fragment_utils.py new file mode 100644 index 0000000000..25fe5e5cb5 --- /dev/null +++ b/rmgpy/molecule/fragment_utils.py @@ -0,0 +1,312 @@ +from rmgpy.molecule.fragment import Fragment +from rmgpy.tools.canteramodel import Cantera +from rmgpy.chemkin import load_chemkin_file +import re +import os +import numpy as np +import matplotlib.pyplot as plt + + +def match_sequences(seq1, seq2, rtol=1e-6): + ''' + Given two lists (each item is int or float): + seq1 and seq2 with same sum, the method returns + matched indices and values. + Example: + seq1 = [1, 3, 1] + seq2 = [2, 1, 2] + return: [[(0,0),1], + [(1,0),1], + [(1,1),1], + [(1,2),1], + [(2,2),1]] + ''' + sum_diff = sum(seq2) - sum(seq1) + assert ( + np.isclose(sum(seq1), sum(seq2), rtol=rtol) + ), "seq1 has different sum (diff={0}) than seq2.".format(sum_diff) + + # force the sum to be same if the difference + # is small enough + if sum_diff >= 0: + seq1[-1] = seq1[-1] + sum_diff + else: + seq2[-1] = seq2[-1] - sum_diff + + # make cumulative sequences + cum_seq1 = np.cumsum(seq1) + cum_seq2 = np.cumsum(seq2) + + # add index tags two both cumulative seqs + pin1 = 0 + pin2 = 0 + matched_indices = [] + matched_cum_values = [] + while pin1 < len(cum_seq1) and pin2 < len(cum_seq2): + matched_indices.append((pin1, pin2)) + + if cum_seq1[pin1] > cum_seq2[pin2]: + matched_cum_values.append(cum_seq2[pin2]) + pin2 += 1 + elif cum_seq1[pin1] < cum_seq2[pin2]: + matched_cum_values.append(cum_seq1[pin1]) + pin1 += 1 + else: + matched_cum_values.append(cum_seq2[pin2]) + pin1 += 1 + pin2 += 1 + + # get matches + matches = [] + for i in range(len(matched_indices)): + matched_index_tup = matched_indices[i] + matched_cum_value = matched_cum_values[i] + if i == 0: + previous_cum_value = 0 + else: + previous_cum_value = matched_cum_values[i - 1] + + matches.append( + [matched_index_tup, matched_cum_value - previous_cum_value]) + + return matches + + +def match_concentrations_with_same_sums(conc1, conc2, rtol=1e-6): + '''match_concentrations_with_same_sums + Given two lists with each item to be a tuple + (species label, concentration) + conc1 and conc2 with same total concentrations, + the method returns matched species labels and + concentrations. + Example: + conc1 = [('a', 1), + ('b', 3), + ('c', 1)] + conc2 = [('x', 2), + ('y', 1), + ('z', 2)] + return: [(('a','x'),1), + (('b','x'),1), + (('b','y'),1), + (('b','z'),1), + (('c','z'),1)] + ''' + labels1 = [tup[0] for tup in conc1] + labels2 = [tup[0] for tup in conc2] + + seq1 = [tup[1] for tup in conc1] + seq2 = [tup[1] for tup in conc2] + + matches_seq = FragList.match_sequences(seq1, seq2, rtol) + + matches_conc = [] + for match_seq in matches_seq: + matched_label_index1 = match_seq[0][0] + matched_label_index2 = match_seq[0][1] + matched_value = match_seq[1] + + matched_label1 = labels1[matched_label_index1] + matched_label2 = labels2[matched_label_index2] + match_conc = ((matched_label1, matched_label2), matched_value) + matches_conc.append(match_conc) + return matches_conc + + +def match_concentrations_with_different_sums(conc1, conc2): + """ + Given two lists with each item to be a tuple + (species label, concentration) + conc1 and conc2 with different total concentrations, + the method returns matched species labels and + concentrations. + Example: + conc1 = [('a', 1), + ('b', 3), + ('c', 1)] + conc2 = [('x', 2), + ('y', 1), + ('z', 10)] + return: [(('a','x', 'z', 'z'),1), + (('b','x', 'z', 'z'),1), + (('b','y', 'z', 'z'),1), + (('b','z', 'z'),1), + (('c','z', 'z'),1)] + """ + labels1 = [tup[0] for tup in conc1] + labels2 = [tup[0] for tup in conc2] + + seq1 = [tup[1] for tup in conc1] + seq2 = [tup[1] for tup in conc2] + + matches_conc = [] + pin1 = 0 + pin2 = 0 + val1 = seq1[pin1] + val2 = seq2[pin2] + + while True: + if val1 > val2: + match = ((labels1[pin1], labels2[pin2]), val2) + matches_conc.append(match) + val1 = val1 - val2 + pin2 += 1 + if pin2 == len(seq2): + break + val2 = seq2[pin2] + elif val1 < val2: + match = ((labels1[pin1], labels2[pin2]), val1) + matches_conc.append(match) + val2 = val2 - val1 + pin1 += 1 + if pin1 == len(seq1): + break + val1 = seq1[pin1] + else: + match = ((labels1[pin1], labels2[pin2]), val1) + matches_conc.append(match) + pin1 += 1 + pin2 += 1 + if pin1 == len(seq1): + break + val1 = seq1[pin1] + if pin2 == len(seq2): + break + val2 = seq2[pin2] + + # if pin2 first reaches the end + # append all the remaining seq1 to matches_conc + if pin2 == len(seq2) and pin1 < len(seq1): + remain_conc1 = [(labels1[pin1], val1)] + conc1[(pin1 + 1):] + matches_conc.extend(remain_conc1) + + # if pin1 first reaches the end + # let matches_conc match with remaining seq2 + elif pin1 == len(seq1) and pin2 < len(seq2): + remain_conc2 = [(labels2[pin2], val2)] + conc2[(pin2 + 1):] + matches_conc = FragList.match_concentrations_with_different_sums( + matches_conc, remain_conc2 + ) + + # if pin1 and pin2 reach the ends at same time + # matches_conc is ready to return + return matches_conc + + +def shuffle(conc, seed=None): + """ + Randomly shuffle a list of fragments + """ + idx_arr = np.arange(len(conc)) + + if seed is not None: + np.random.seed(seed) + np.random.shuffle(idx_arr) + + return [conc[idx] for idx in idx_arr] + + +def flatten(combo): + """ + Given a combo nested `tuple`, e.g., + ((('LY', 'XR'), ('LWL', 'RUR')) + return a list of labels contained in + the combo ['LY', 'XR', 'LWL', 'RUR'] + """ + return_list = [] + for i in combo: + if isinstance(i, tuple): + return_list.extend(FragList.flatten(i)) + else: + return_list.append(i) + return return_list + + +# label should match the desired merging l/'abel on frag2 +def merge_frag_to_frag(frag1, frag2, label): + from rmgpy.molecule import Bond + from rmgpy.molecule.fragment import Fragment, CuttingLabel + + frag_spe1 = Fragment().from_smiles_like_string(frag1) + frag_spe2 = Fragment().from_smiles_like_string(frag2) + # find position of desired CuttingLabel + # need to find CuttingLabel on frag2 first + for vertex in frag_spe2.vertices: + if isinstance(vertex, CuttingLabel): + if vertex.symbol == label: + cut2 = vertex + + atom2 = list(cut2.edges.keys())[0] + frag_spe2.remove_atom(cut2) + break + + if cut2.symbol[0] == 'L': + Ctl = cut2.symbol.replace('L', 'R') + else: # that means this CuttingLabel is R something + + Ctl = cut2.symbol.replace('R', 'L') + + # merge to frag_spe1 + for vertex in frag_spe1.vertices: + if isinstance(vertex, CuttingLabel): + if vertex.symbol == Ctl: + cut1 = vertex + atom1 = list(cut1.edges.keys())[0] + frag_spe1.remove_atom(cut1) + break + + # new merged fragment + new_frag = frag_spe1.merge(frag_spe2) + new_frag.add_bond(Bond(atom1=atom1, atom2=atom2, order=1)) + new_frag = new_frag.copy(deep=True) + new_frag.update() + return new_frag # return Fragment obtl + + +def merge_frag_list(to_be_merged): + import os + # merges fragments in list from right to left + species_list = [] + ethylene = [] + newlist = [] + warnings = [] + + while len(to_be_merged) > 1: + + # second to last fragmentin list + frag1 = to_be_merged[-2].smiles + frag2 = to_be_merged[-1].smiles # last fragment in list + + if 'R' in frag1 and 'L' in frag2: + newfrag = FragList.merge_frag_to_frag(frag1, frag2, 'L') + + elif 'L' in frag1 and 'R' in frag2: + newfrag = FragList.merge_frag_to_frag(frag1, frag2, 'R') + + # warn user if last two fragments in list cannot be merged (no R/L + # combo to be made) + else: + print('Warning! Could not merge fragments {} and {}'.format( + frag1, frag2)) + + if 'L' in frag1 and 'L' in frag2: + newfrag = FragList.merge_frag_to_frag( + frag1.replace('L', 'R'), frag2, 'L') + if len(to_be_merged) > 2: + cut = len(to_be_merged) - 2 + newfraglist = to_be_merged[:cut] + + newfraglist.append(newfrag) + elif len(to_be_merged) == 2: + + newfraglist = [newfrag] + + to_be_merged = newfraglist + + to_be_merged = newfraglist + # newlist.append(newfraglist) # if done merging list, write final + # structure to list of smiles structures + + # print('{}% of fragments fully merged...'.format(np.round(100*(i+1)/len(flattened_matches_random)),1)) +# print(newfraglist) + return newfraglist