In [26]:
class DNA: # 틀
    def __init__(self, seq: str, sample_id: str): # self는 객체 그 자신
        self.seq = seq
        self.sample_id = sample_id

    def get_length(self):
        return len(self.seq)

    def get_gc_content(self) -> float:
        num_c = self.seq.count("C")
        num_g = self.seq.count("G")
        return (num_c + num_g) / self.get_length() * 100

    def is_palindrome(self) -> bool:
        return self.seq[::-1] == self.seq

    def get_transcribed(self) -> str:
        return self.seq.replace("T", "U")

    def __eq__(self, other) -> bool:
        return self.seq == other.seq

    def __str__(self) -> str: # print()
        return f">{self.sample_id}\n{self.seq}"

    def __add__(self, other) -> str: # +
        return self.seq + other.seq

dna1 = DNA("ACGT", "test_sample") # dna1은 object
dna2 = DNA("ACGT", "test2_sample")
print(dna1 + dna2)
print(dna1.get_gc_content(), dna2.get_gc_content())
print(dna1.is_palindrome(), dna2.is_palindrome())
print(dna1.get_transcribed(), dna2.get_transcribed())
print(dna1 == dna2) # print(dna1.seq == dna2.seq)

ACGTACGT
50.0 50.0
False False
ACGU ACGU
True


In [75]:
dna1 = DNA("ACGT", "sample")
assert dna1.get_length() == 4

In [31]:
class RNA(DNA):
    def __init__(self):
        pass

    def get_transcribed(self):
        return "not applicable"

rna1 = RNA()
rna1.seq = "AUGC"
print(rna1.get_length())
print(rna1.get_transcribed())

4
not applicable


In [34]:
with open("test.txt") as handle:
    for line in handle:
        print(line.strip())

test1
test3
test2
test4
test5


In [37]:
with open("test.txt") as handle:
    for line in handle:
        if int(line[4:]) % 2 == 0:
            print(line.strip())

test2
test4
test10


In [38]:
with open("sample.csv") as handle:
    for line in handle:
        print(line.strip().split(","))

['SAMPLE', 'GENE', 'VALUE']
['1', 'BRAF', '3.84']
['2', 'TERT', '6.15']
['3', 'KRAS', '2.76']


In [None]:
all_data = list()
with open("sample.csv") as handle:
    header = handle.readline().strip().split(",")
    for line in handle:
        data = line.strip().split(",")
        all_data.append(dict(zip(header, data)))
        
for elem in all_data:
    print(elem)

{'SAMPLE': '1', 'GENE': 'BRAF', 'VALUE': '3.84'}
{'SAMPLE': '2', 'GENE': 'TERT', 'VALUE': '6.15'}
{'SAMPLE': '3', 'GENE': 'KRAS', 'VALUE': '2.76'}


In [None]:
with open("sample.tsv") as handle:
    for line in handle:
        print(line.strip().split("\t")) # \t 탭문자

['SAMPLE', 'GENE', 'VALUE']
['1', 'BRAF', '3.84']
['2', 'TERT', '6.15']
['3', 'KRAS', '2.76']


In [None]:
all_data = list()
with open("sample.tsv") as handle:
    header = handle.readline().strip().split("\t")
    for line in handle:
        data = line.strip().split("\t")
        all_data.append(dict(zip(header, data)))

all_data_sorted = sorted(
    all_data, key=lambda x: float(x["VALUE"]), reverse=True)
for elem in all_data_sorted:
    print(elem)

{'SAMPLE': '2', 'GENE': 'TERT', 'VALUE': '6.15'}
{'SAMPLE': '1', 'GENE': 'BRAF', 'VALUE': '3.84'}
{'SAMPLE': '3', 'GENE': 'KRAS', 'VALUE': '2.76'}


In [57]:
import json

with open("test1.json") as handle:
    data = json.load(handle)

print(data)
print(type(data))
for k, v in data.items():
    print(f"{k}: {v}")

{'SAMPLE': '1', 'GENE': 'BRAF', 'VALUE': '3.84'}
<class 'dict'>
SAMPLE: 1
GENE: BRAF
VALUE: 3.84


In [61]:
import json

with open("test2.json") as handle:
    data = json.load(handle)

print(data)
print(type(data))
print("------")
for elem in data:
    print(elem)

[{'SAMPLE': '1', 'GENE': 'BRAF', 'VALUE': '3.84'}, {'SAMPLE': '2', 'GENE': 'TERT', 'VALUE': '6.15'}, {'SAMPLE': '3', 'GENE': 'KRAS', 'VALUE': '2.76'}]
<class 'list'>
------
{'SAMPLE': '1', 'GENE': 'BRAF', 'VALUE': '3.84'}
{'SAMPLE': '2', 'GENE': 'TERT', 'VALUE': '6.15'}
{'SAMPLE': '3', 'GENE': 'KRAS', 'VALUE': '2.76'}


In [62]:
import json

with open("test2.json") as handle:
    data = json.load(handle)

# VALUE의 평균 구하기
value_total = 0
for elem in data:
    value_total += float(elem["VALUE"])

avr = value_total / len(data)
print(avr)

4.25


In [70]:
import json

data = list()
with open("sample.csv") as handle:
    header = handle.readline().strip().split(",")
    for line in handle:
        row = line.strip().split(",")
        data.append(dict(zip(header, row)))

print(data)
with open("result.json", "w") as handle:
    json.dump(data, handle)

with open("result2.json", "w") as handle:
    json.dump(data, handle, indent=2)


[{'SAMPLE': '1', 'GENE': 'BRAF', 'VALUE': '3.84'}, {'SAMPLE': '2', 'GENE': 'TERT', 'VALUE': '6.15'}, {'SAMPLE': '3', 'GENE': 'KRAS', 'VALUE': '2.76'}]


In [76]:
pip install pytest

Note: you may need to restart the kernel to use updated packages.


In [1]:
import gzip

with gzip.open("data/test.txt.gz", "rt") as handle:
    for line in handle:
        print(line.strip())

Hello World!
This is a gzipped test file.
