# Notebook for development of the script - 2

In [1]:
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from Bio.Seq import *
from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA, IUPACUnambiguousDNA
import reprlib

In [2]:
class Genome(Seq):
    """Classe Genome"""
    def __init__(self, seq, circular=True):
        Seq.__init__(self, seq, alphabet=IUPACUnambiguousDNA())
        self.circular=circular
        
#     def __str__(self):
#         """To show the attributes and values of the instance"""
#         out=''
#         for key, value in self.__dict__.items():
#             out+='{:20s}  {}\n'.format(key, reprlib.repr(value))
#         return out

    def sequencing(self, read_length=100, reads_nb=5000):
        reads=[]
        for _ in range(reads_nb):
            start = rd.randint(0, len(self)-1)
            read_seq = self._data[start:min(start+100, len(self))] + self._data[0:max(start+100-len(self), 0)]
            read = Read(read_seq)
            reads.append(read)
        return reads


In [3]:
class Read(Seq):
    """Classe Read"""
    def __init__(self, seq, circular=True):
        Seq.__init__(self, seq, alphabet=IUPACUnambiguousDNA())
    
#     def __str__(self):
#         """To show the attributes and values of the instance"""
#         out=''
#         for key, value in self.__dict__.items():
#             out+='{:20s}  {}\n'.format(key, reprlib.repr(value))
#         return out
    
    def generate_kmers(self, kmers_length=30):
        """Returns a list of the k-mers included in the read"""
        kmers=[]
        for i in range(len(self) - kmers_length):
            kmer_seq = self._data[i:i+kmers_length]
            kmer = Kmer(kmer_seq)
            kmers.append(kmer)
        return kmers


In [4]:
class Kmer(Seq):
    """Classe Kmer"""
    def __init__(self, seq):
        Seq.__init__(self, seq, alphabet=IUPACUnambiguousDNA())
        self.prefix = seq[:-1]
        self.suffix = seq[1:]
    
#     def __str__(self):
#         """To show the attributes and values of the instance"""
#         out=''
#         for key, value in self.__dict__.items():
#             out+='{:20s}  {}\n'.format(key, reprlib.repr(value))
#         return out
    

In [5]:
# PROGRAMME PRINCIPAL
# Genome generation
genome_length= 10000
genome= Genome(''.join(rd.choices(["A", "T", "G", "C"], k=genome_length)), circular=True)

# Reads generation
reads = genome.sequencing(read_length=100, reads_nb=5000)

# K-mers generation
kmers=set()
for read in reads:
    kmers.update(read.generate_kmers(kmers_length=30))

# (K-1)-mers generation
km1mers=set()
for kmer in kmers:
    km1mers.update([kmer.prefix, kmer.suffix])




In [10]:
class Graph:
    def __init__(self, km1mers):
        self.nodes = tuple(km1mers)
        self.matrix = np.zeros((len(self.nodes), len(self.nodes)))
        self.eulerian = None
    
    def fill_matrix(self, kmers):
        n = 0
        for kmer in kmers:
            print('K-mer {:5d}'.format(n), end='\r')
            i = self.nodes.index(kmer.prefix)
            j = self.nodes.index(kmer.suffix)
            self.matrix[i, j]+=1
            n+=1
    
    def test_eulerian(self):
        for i in range(len(self.nodes)):
            if self.matrix[i, :].sum() != self.matrix[:, i].sum():
                print("PROBLEM : This graph is not Eulerian.")
                self.eulerian = False
                break
        if self.eulerian != False:
            print('SUCCESS : This graph is Eulerian !!')
            self.eulerian = True
        

In [17]:
# Graph generation
graph = Graph(km1mers)
graph.fill_matrix(kmers)
graph.test_eulerian()

K-mer     0K-mer     1K-mer     2K-mer     3K-mer     4K-mer     5K-mer     6K-mer     7K-mer     8K-mer     9K-mer    10K-mer    11K-mer    12K-mer    13K-mer    14K-mer    15K-mer    16K-mer    17K-mer    18K-mer    19K-mer    20K-mer    21K-mer    22K-mer    23K-mer    24K-mer    25K-mer    26K-mer    27K-mer    28K-mer    29K-mer    30K-mer    31K-mer    32K-mer    33K-mer    34K-mer    35K-mer    36K-mer    37K-mer    38K-mer    39K-mer    40K-mer    41K-mer    42K-mer    43K-mer    44K-mer    45K-mer    46K-mer    47K-mer    48K-mer    49K-mer    50K-mer    51K-mer    52K-mer    53K-mer    54K-mer    55K-mer    56K-mer    57K-mer    58K-mer    59K-mer    60K-mer    61K-mer    62K-mer    63K-mer    64K-mer    65K-mer    66K-mer    67K-mer    68K-mer    69K-mer    70K-mer    71K-mer    72K-mer    73K-mer    74K-mer    75K-mer    76K-mer    77K-mer    78K-mer    79K-mer    80K-mer    81K-mer    82K-me

K-mer  1361K-mer  1362K-mer  1363K-mer  1364K-mer  1365K-mer  1366K-mer  1367K-mer  1368K-mer  1369K-mer  1370K-mer  1371K-mer  1372K-mer  1373K-mer  1374K-mer  1375K-mer  1376K-mer  1377K-mer  1378K-mer  1379K-mer  1380K-mer  1381K-mer  1382K-mer  1383K-mer  1384K-mer  1385K-mer  1386K-mer  1387K-mer  1388K-mer  1389K-mer  1390K-mer  1391K-mer  1392K-mer  1393K-mer  1394K-mer  1395K-mer  1396K-mer  1397K-mer  1398K-mer  1399K-mer  1400K-mer  1401K-mer  1402K-mer  1403K-mer  1404K-mer  1405K-mer  1406K-mer  1407K-mer  1408K-mer  1409K-mer  1410K-mer  1411K-mer  1412K-mer  1413K-mer  1414K-mer  1415K-mer  1416K-mer  1417K-mer  1418K-mer  1419K-mer  1420K-mer  1421K-mer  1422K-mer  1423K-mer  1424K-mer  1425K-mer  1426K-mer  1427K-mer  1428K-mer  1429K-mer  1430K-mer  1431K-mer  1432K-mer  1433K-mer  1434K-mer  1435K-mer  1436K-mer  1437K-mer  1438K-mer  1439K-mer  1440K-mer  1441K-mer  1442K-mer  1443K-m

K-mer  2873K-mer  2874K-mer  2875K-mer  2876K-mer  2877K-mer  2878K-mer  2879K-mer  2880K-mer  2881K-mer  2882K-mer  2883K-mer  2884K-mer  2885K-mer  2886K-mer  2887K-mer  2888K-mer  2889K-mer  2890K-mer  2891K-mer  2892K-mer  2893K-mer  2894K-mer  2895K-mer  2896K-mer  2897K-mer  2898K-mer  2899K-mer  2900K-mer  2901K-mer  2902K-mer  2903K-mer  2904K-mer  2905K-mer  2906K-mer  2907K-mer  2908K-mer  2909K-mer  2910K-mer  2911K-mer  2912K-mer  2913K-mer  2914K-mer  2915K-mer  2916K-mer  2917K-mer  2918K-mer  2919K-mer  2920K-mer  2921K-mer  2922K-mer  2923K-mer  2924K-mer  2925K-mer  2926K-mer  2927K-mer  2928K-mer  2929K-mer  2930K-mer  2931K-mer  2932K-mer  2933K-mer  2934K-mer  2935K-mer  2936K-mer  2937K-mer  2938K-mer  2939K-mer  2940K-mer  2941K-mer  2942K-mer  2943K-mer  2944K-mer  2945K-mer  2946K-mer  2947K-mer  2948K-mer  2949K-mer  2950K-mer  2951K-mer  2952K-mer  2953K-mer  2954K-mer  2955K-m

K-mer  3873K-mer  3874K-mer  3875K-mer  3876K-mer  3877K-mer  3878K-mer  3879K-mer  3880K-mer  3881K-mer  3882K-mer  3883K-mer  3884K-mer  3885K-mer  3886K-mer  3887K-mer  3888K-mer  3889K-mer  3890K-mer  3891K-mer  3892K-mer  3893K-mer  3894K-mer  3895K-mer  3896K-mer  3897K-mer  3898K-mer  3899K-mer  3900K-mer  3901K-mer  3902K-mer  3903K-mer  3904K-mer  3905K-mer  3906K-mer  3907K-mer  3908K-mer  3909K-mer  3910K-mer  3911K-mer  3912K-mer  3913K-mer  3914K-mer  3915K-mer  3916K-mer  3917K-mer  3918K-mer  3919K-mer  3920K-mer  3921K-mer  3922K-mer  3923K-mer  3924K-mer  3925K-mer  3926K-mer  3927K-mer  3928K-mer  3929K-mer  3930K-mer  3931K-mer  3932K-mer  3933K-mer  3934K-mer  3935K-mer  3936K-mer  3937K-mer  3938K-mer  3939K-mer  3940K-mer  3941K-mer  3942K-mer  3943K-mer  3944K-mer  3945K-mer  3946K-mer  3947K-mer  3948K-mer  3949K-mer  3950K-mer  3951K-mer  3952K-mer  3953K-mer  3954K-mer  3955K-m

K-mer  5372K-mer  5373K-mer  5374K-mer  5375K-mer  5376K-mer  5377K-mer  5378K-mer  5379K-mer  5380K-mer  5381K-mer  5382K-mer  5383K-mer  5384K-mer  5385K-mer  5386K-mer  5387K-mer  5388K-mer  5389K-mer  5390K-mer  5391K-mer  5392K-mer  5393K-mer  5394K-mer  5395K-mer  5396K-mer  5397K-mer  5398K-mer  5399K-mer  5400K-mer  5401K-mer  5402K-mer  5403K-mer  5404K-mer  5405K-mer  5406K-mer  5407K-mer  5408K-mer  5409K-mer  5410K-mer  5411K-mer  5412K-mer  5413K-mer  5414K-mer  5415K-mer  5416K-mer  5417K-mer  5418K-mer  5419K-mer  5420K-mer  5421K-mer  5422K-mer  5423K-mer  5424K-mer  5425K-mer  5426K-mer  5427K-mer  5428K-mer  5429K-mer  5430K-mer  5431K-mer  5432K-mer  5433K-mer  5434K-mer  5435K-mer  5436K-mer  5437K-mer  5438K-mer  5439K-mer  5440K-mer  5441K-mer  5442K-mer  5443K-mer  5444K-mer  5445K-mer  5446K-mer  5447K-mer  5448K-mer  5449K-mer  5450K-mer  5451K-mer  5452K-mer  5453K-mer  5454K-me

K-mer  6872K-mer  6873K-mer  6874K-mer  6875K-mer  6876K-mer  6877K-mer  6878K-mer  6879K-mer  6880K-mer  6881K-mer  6882K-mer  6883K-mer  6884K-mer  6885K-mer  6886K-mer  6887K-mer  6888K-mer  6889K-mer  6890K-mer  6891K-mer  6892K-mer  6893K-mer  6894K-mer  6895K-mer  6896K-mer  6897K-mer  6898K-mer  6899K-mer  6900K-mer  6901K-mer  6902K-mer  6903K-mer  6904K-mer  6905K-mer  6906K-mer  6907K-mer  6908K-mer  6909K-mer  6910K-mer  6911K-mer  6912K-mer  6913K-mer  6914K-mer  6915K-mer  6916K-mer  6917K-mer  6918K-mer  6919K-mer  6920K-mer  6921K-mer  6922K-mer  6923K-mer  6924K-mer  6925K-mer  6926K-mer  6927K-mer  6928K-mer  6929K-mer  6930K-mer  6931K-mer  6932K-mer  6933K-mer  6934K-mer  6935K-mer  6936K-mer  6937K-mer  6938K-mer  6939K-mer  6940K-mer  6941K-mer  6942K-mer  6943K-mer  6944K-mer  6945K-mer  6946K-mer  6947K-mer  6948K-mer  6949K-mer  6950K-mer  6951K-mer  6952K-mer  6953K-mer  6954K-m

K-mer  8371K-mer  8372K-mer  8373K-mer  8374K-mer  8375K-mer  8376K-mer  8377K-mer  8378K-mer  8379K-mer  8380K-mer  8381K-mer  8382K-mer  8383K-mer  8384K-mer  8385K-mer  8386K-mer  8387K-mer  8388K-mer  8389K-mer  8390K-mer  8391K-mer  8392K-mer  8393K-mer  8394K-mer  8395K-mer  8396K-mer  8397K-mer  8398K-mer  8399K-mer  8400K-mer  8401K-mer  8402K-mer  8403K-mer  8404K-mer  8405K-mer  8406K-mer  8407K-mer  8408K-mer  8409K-mer  8410K-mer  8411K-mer  8412K-mer  8413K-mer  8414K-mer  8415K-mer  8416K-mer  8417K-mer  8418K-mer  8419K-mer  8420K-mer  8421K-mer  8422K-mer  8423K-mer  8424K-mer  8425K-mer  8426K-mer  8427K-mer  8428K-mer  8429K-mer  8430K-mer  8431K-mer  8432K-mer  8433K-mer  8434K-mer  8435K-mer  8436K-mer  8437K-mer  8438K-mer  8439K-mer  8440K-mer  8441K-mer  8442K-mer  8443K-mer  8444K-mer  8445K-mer  8446K-mer  8447K-mer  8448K-mer  8449K-mer  8450K-mer  8451K-mer  8452K-mer  8453K-me

SUCCESS : This graph is Eulerian !!
