# Module 1 - Problem 1: Structured Data From CSV File

Build different structured data from information in external file

1) Dictionary of unstructured data
2) Dictionary of dictionaries
3) Dictionary of lists or tuples
4) List of list or tuples

In [2]:
from sys import path

path.append("../../src/")
import csv

### Read information in CSV file

In [3]:
# Open CSV File
with open("../../src/amino_acids.csv", "r") as f:
    # Save information in Reader Variable
    reader: list[list[str, float]] = list(csv.reader(f))

### 1) Dictionary of unstructured data
    *{name of amino acid: molar mass} 
    *{name of amino acid: molecular formule}
    *{molar mass: molecular formule}

In [10]:
amino_acids_molar_mass: dict[str:float] = {
    x[1].strip(): float(x[3]) for count, x in enumerate(reader) if count > 1
}
print(amino_acids_molar_mass)

{'Arginine': 174.2, 'Asparagine': 132.12, 'Aspartic Acid': 133.1, 'Cysteine': 121.16, 'Glutamic Acid': 147.13, 'Glutamine': 146.14, 'Glycine': 75.07, 'Histidine': 155.15, 'Isoleucine': 131.17, 'Leucine': 131.18, 'Lysine': 146.19, 'Methionine': 149.21, 'Phenylalanine': 165.19, 'Proline': 115.13, 'Serine': 105.09, 'Threonine': 119.12, 'Tryptophan': 204.23, 'Tyrosine': 181.19, 'Valine': 117.15}


In [8]:
amino_acids_molecular_formule: dict[str:str] = {
    x[1].strip(): x[-2].strip() for count, x in enumerate(reader) if count > 1
}
print(amino_acids_molecular_formule)

{'Arginine': 'C6H14N4O2', 'Asparagine': 'C4H8N2O3', 'Aspartic Acid': 'C4H7NO4', 'Cysteine': 'C3H7NO2S', 'Glutamic Acid': 'C5H9NO4', 'Glutamine': 'C5H10N2O3', 'Glycine': 'C2H5NO2', 'Histidine': 'C6H9N3O2', 'Isoleucine': 'C6H13NO2', 'Leucine': 'C6H13NO2', 'Lysine': 'C6H14N2O2', 'Methionine': 'C5H11NO2S', 'Phenylalanine': 'C9H11NO2', 'Proline': 'C5H9NO2', 'Serine': 'C3H7NO3', 'Threonine': 'C4H9NO3', 'Tryptophan': 'C11H12NO3', 'Tyrosine': 'C9H11NO3', 'Valine': 'C5H11NO2'}


In [11]:
molar_mass_molecular_formule: dict[float:str] = {
    float(x[3]): x[-2].strip() for count, x in enumerate(reader) if count > 1
}
print(molar_mass_molecular_formule)

{174.2: 'C6H14N4O2', 132.12: 'C4H8N2O3', 133.1: 'C4H7NO4', 121.16: 'C3H7NO2S', 147.13: 'C5H9NO4', 146.14: 'C5H10N2O3', 75.07: 'C2H5NO2', 155.15: 'C6H9N3O2', 131.17: 'C6H13NO2', 131.18: 'C6H13NO2', 146.19: 'C6H14N2O2', 149.21: 'C5H11NO2S', 165.19: 'C9H11NO2', 115.13: 'C5H9NO2', 105.09: 'C3H7NO3', 119.12: 'C4H9NO3', 204.23: 'C11H12NO3', 181.19: 'C9H11NO3', 117.15: 'C5H11NO2'}


### 2) Dictionary of dictionaries
    * {name of amino acid: {molecular formule: masa molar}}

In [13]:
aa_mf_mm: dict[str : dict[str:float]] = {
    x[1].strip(): {x[-2].strip(): float(x[3])}
    for count, x in enumerate(reader)
    if count > 1
}
print(aa_mf_mm)

{'Arginine': {'C6H14N4O2': 174.2}, 'Asparagine': {'C4H8N2O3': 132.12}, 'Aspartic Acid': {'C4H7NO4': 133.1}, 'Cysteine': {'C3H7NO2S': 121.16}, 'Glutamic Acid': {'C5H9NO4': 147.13}, 'Glutamine': {'C5H10N2O3': 146.14}, 'Glycine': {'C2H5NO2': 75.07}, 'Histidine': {'C6H9N3O2': 155.15}, 'Isoleucine': {'C6H13NO2': 131.17}, 'Leucine': {'C6H13NO2': 131.18}, 'Lysine': {'C6H14N2O2': 146.19}, 'Methionine': {'C5H11NO2S': 149.21}, 'Phenylalanine': {'C9H11NO2': 165.19}, 'Proline': {'C5H9NO2': 115.13}, 'Serine': {'C3H7NO3': 105.09}, 'Threonine': {'C4H9NO3': 119.12}, 'Tryptophan': {'C11H12NO3': 204.23}, 'Tyrosine': {'C9H11NO3': 181.19}, 'Valine': {'C5H11NO2': 117.15}}


### 3) Dictionary of lists or tuples
    * {name of amino acid: [molecular formule, Rf, masa molar]}
    * {name of amino acid: (molecular formule, Rf, masa molar)}

In [14]:
aa_mf_rf_mm: dict[str : list[str, float, float]] = {
    x[1].strip(): [x[-2].strip(), float(2), float(x[3])]
    for count, x in enumerate(reader)
    if count > 1
}
print(aa_mf_rf_mm)

{'Arginine': ['C6H14N4O2', 2.0, 174.2], 'Asparagine': ['C4H8N2O3', 2.0, 132.12], 'Aspartic Acid': ['C4H7NO4', 2.0, 133.1], 'Cysteine': ['C3H7NO2S', 2.0, 121.16], 'Glutamic Acid': ['C5H9NO4', 2.0, 147.13], 'Glutamine': ['C5H10N2O3', 2.0, 146.14], 'Glycine': ['C2H5NO2', 2.0, 75.07], 'Histidine': ['C6H9N3O2', 2.0, 155.15], 'Isoleucine': ['C6H13NO2', 2.0, 131.17], 'Leucine': ['C6H13NO2', 2.0, 131.18], 'Lysine': ['C6H14N2O2', 2.0, 146.19], 'Methionine': ['C5H11NO2S', 2.0, 149.21], 'Phenylalanine': ['C9H11NO2', 2.0, 165.19], 'Proline': ['C5H9NO2', 2.0, 115.13], 'Serine': ['C3H7NO3', 2.0, 105.09], 'Threonine': ['C4H9NO3', 2.0, 119.12], 'Tryptophan': ['C11H12NO3', 2.0, 204.23], 'Tyrosine': ['C9H11NO3', 2.0, 181.19], 'Valine': ['C5H11NO2', 2.0, 117.15]}


In [15]:
aa_mf_rf_mm: dict[str : tuple[str, float, float]] = {
    x[1].strip(): (x[-2].strip(), float(2), float(x[3]))
    for count, x in enumerate(reader)
    if count > 1
}
print(aa_mf_rf_mm)

{'Arginine': ('C6H14N4O2', 2.0, 174.2), 'Asparagine': ('C4H8N2O3', 2.0, 132.12), 'Aspartic Acid': ('C4H7NO4', 2.0, 133.1), 'Cysteine': ('C3H7NO2S', 2.0, 121.16), 'Glutamic Acid': ('C5H9NO4', 2.0, 147.13), 'Glutamine': ('C5H10N2O3', 2.0, 146.14), 'Glycine': ('C2H5NO2', 2.0, 75.07), 'Histidine': ('C6H9N3O2', 2.0, 155.15), 'Isoleucine': ('C6H13NO2', 2.0, 131.17), 'Leucine': ('C6H13NO2', 2.0, 131.18), 'Lysine': ('C6H14N2O2', 2.0, 146.19), 'Methionine': ('C5H11NO2S', 2.0, 149.21), 'Phenylalanine': ('C9H11NO2', 2.0, 165.19), 'Proline': ('C5H9NO2', 2.0, 115.13), 'Serine': ('C3H7NO3', 2.0, 105.09), 'Threonine': ('C4H9NO3', 2.0, 119.12), 'Tryptophan': ('C11H12NO3', 2.0, 204.23), 'Tyrosine': ('C9H11NO3', 2.0, 181.19), 'Valine': ('C5H11NO2', 2.0, 117.15)}


### 4 List of lists or tuples
    * [[name of amino acid, molecular formule, Rf, masa molar]]
    * [(name of amino acid, molecular formule, Rf, masa molar)]

In [16]:
aa_mf_rf_mm: list[list[str, str, float, float]] = [
    [x[1].strip(), x[-2].strip(), float(2), float(x[3])]
    for count, x in enumerate(reader)
    if count > 1
]
print(aa_mf_rf_mm)

[['Arginine', 'C6H14N4O2', 2.0, 174.2], ['Asparagine', 'C4H8N2O3', 2.0, 132.12], ['Aspartic Acid', 'C4H7NO4', 2.0, 133.1], ['Cysteine', 'C3H7NO2S', 2.0, 121.16], ['Glutamic Acid', 'C5H9NO4', 2.0, 147.13], ['Glutamine', 'C5H10N2O3', 2.0, 146.14], ['Glycine', 'C2H5NO2', 2.0, 75.07], ['Histidine', 'C6H9N3O2', 2.0, 155.15], ['Isoleucine', 'C6H13NO2', 2.0, 131.17], ['Leucine', 'C6H13NO2', 2.0, 131.18], ['Lysine', 'C6H14N2O2', 2.0, 146.19], ['Methionine', 'C5H11NO2S', 2.0, 149.21], ['Phenylalanine', 'C9H11NO2', 2.0, 165.19], ['Proline', 'C5H9NO2', 2.0, 115.13], ['Serine', 'C3H7NO3', 2.0, 105.09], ['Threonine', 'C4H9NO3', 2.0, 119.12], ['Tryptophan', 'C11H12NO3', 2.0, 204.23], ['Tyrosine', 'C9H11NO3', 2.0, 181.19], ['Valine', 'C5H11NO2', 2.0, 117.15]]


In [17]:
aa_mf_rf_mm: list[tuple[str, str, float, float]] = [
    (x[1].strip(), x[-2].strip(), float(2), float(x[3]))
    for count, x in enumerate(reader)
    if count > 1
]
print(aa_mf_rf_mm)

[('Arginine', 'C6H14N4O2', 2.0, 174.2), ('Asparagine', 'C4H8N2O3', 2.0, 132.12), ('Aspartic Acid', 'C4H7NO4', 2.0, 133.1), ('Cysteine', 'C3H7NO2S', 2.0, 121.16), ('Glutamic Acid', 'C5H9NO4', 2.0, 147.13), ('Glutamine', 'C5H10N2O3', 2.0, 146.14), ('Glycine', 'C2H5NO2', 2.0, 75.07), ('Histidine', 'C6H9N3O2', 2.0, 155.15), ('Isoleucine', 'C6H13NO2', 2.0, 131.17), ('Leucine', 'C6H13NO2', 2.0, 131.18), ('Lysine', 'C6H14N2O2', 2.0, 146.19), ('Methionine', 'C5H11NO2S', 2.0, 149.21), ('Phenylalanine', 'C9H11NO2', 2.0, 165.19), ('Proline', 'C5H9NO2', 2.0, 115.13), ('Serine', 'C3H7NO3', 2.0, 105.09), ('Threonine', 'C4H9NO3', 2.0, 119.12), ('Tryptophan', 'C11H12NO3', 2.0, 204.23), ('Tyrosine', 'C9H11NO3', 2.0, 181.19), ('Valine', 'C5H11NO2', 2.0, 117.15)]
