In [None]:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.IO;
using System.Text.Json;
using System.Text.RegularExpressions;
var random = new Random();

In [None]:
class Diagnostic{
    public Diagnostic(string execution_time,int k_mer, string input_data, string output_data, int error = -1){
        this.execution_time = execution_time;
        this.k_mer = k_mer;
        this.input_data = input_data;
        this.output_data = output_data;
        this.error = error;
    }
    public string execution_time { get; set; }
    public int k_mer { get; set; }
    public int error { get; set; }
    public string input_data { get; set; }
    public string output_data { get; set; }
}
async Task WriteToFile(string stream,string fileName){
    await File.WriteAllTextAsync(fileName+".json", stream);
}

In [None]:
class Node{
    public static Dictionary<string,Node> lookup_table;
    public static Node root;
    public static Node leaf;
    public string name;
    public List<Node> inbounds;
    public List<Node> outbounds;
    private Node(string name){
        this.name = name;
        inbounds = new List<Node>();
        outbounds = new List<Node>();
    }
    public static void AddConnection(string left_name,string right_name){
        if(lookup_table == null)
            lookup_table = new Dictionary<string,Node>();
        Node left_node,right_node;
        lookup_table.TryGetValue(left_name,out left_node);
        if(left_node == null){
            left_node = new Node(left_name);
            lookup_table.Add(left_name,left_node);
        }
        lookup_table.TryGetValue(right_name,out right_node);
        if(right_node == null){
            right_node = new Node(right_name);
            lookup_table.Add(right_name,right_node);
        }
        if(root == null)
            root = left_node;
        leaf = right_node;
        
        left_node.outbounds.Add(right_node);
        right_node.inbounds.Add(left_node);
    }
}

In [None]:
var lines = File.ReadLines("genom.fna");

In [None]:
lines = lines.Where(i => !i.StartsWith('>')).ToList();

In [None]:
var data = string.Join("", lines).Replace("\n","");

In [None]:
//var data = "to_every_thing_turn_turn_turn_there_is_a_season";
Console.WriteLine(data.Length);

2821361


In [None]:
List<string> constructReads(int k){
    var reads = new List<string>();
    for(int i = 0;i<data.Length-k+1;i++)
        reads.Add(data.Substring(i,k));
    return reads;
}
// var reads = constructReads(5);
// Console.WriteLine(reads.Count())

In [None]:
void constructNodeGraph(List<string> reads){
    var k = reads[0].Length;
    Node.lookup_table = null;
    Node.root = null;
    foreach(var read in reads)
        Node.AddConnection(read.Substring(0,k-1),read.Substring(1,k-1));
}
// constructNodeGraph(reads);

In [None]:
bool isBalanced(){
    bool isBalanced = true;
    foreach(var kvp in Node.lookup_table){
        if(kvp.Value.inbounds.Count() != kvp.Value.outbounds.Count())
            isBalanced = false;
    }
    return isBalanced;
}
// isBalanced()

In [None]:
void connectRootToLeaf(){
    Node.AddConnection(Node.leaf.name,Node.root.name);
}
// connectRootToLeaf()

In [None]:
bool has_edge(out Node select){
    select = Node.lookup_table.FirstOrDefault(i => i.Value.outbounds.Count > 0).Value;
    return (select != null);
}
string traverse(){
    List<Node> traverse = new List<Node>();
    Node select;
    while(has_edge(out select)){
        var append_index = traverse.IndexOf(select);
        var cycle_flag = false;
        List<Node> cycle = new List<Node>();
        Node left=select;
        Node right;
        while(!cycle_flag){
            cycle.Add(left);
            var idx = random.Next(left.outbounds.Count);
            right = left.outbounds[idx];
            left.outbounds.RemoveAt(idx);
            //right.inbounds.Remove(left);
            if(select.name == right.name){
                cycle.Add(right);
                cycle_flag = true;
            }else
                left = right;
        }
        if(append_index == -1)
            traverse.AddRange(cycle);
        else{
            traverse.InsertRange(append_index+1,cycle.Skip(1));
        }
    }
    var str = new StringBuilder();
    traverse.RemoveAt(traverse.Count()-1); //delete last == tail > head
    traverse.ForEach(i => str.Append(i.name.Substring(0,1)));
    str.Append(traverse.Last().name.Substring(1));
    return str.ToString();
}

In [None]:
int hammingDistance(string a, string b){
    var err=0;
    for(int i = 0;i<a.Count();i++){
        if(a[i] != b[i])
            err++;
    }
    return err;
}

In [None]:
async void saveDiagnostic(Diagnostic diagnostic, string fileName){
    string jsonString = JsonSerializer.Serialize(diagnostic);
    await WriteToFile(jsonString,fileName);
}

In [None]:
Diagnostic processData(int k, bool saveResults = false,int dupeIdx = -1,string subFolderName= ""){
    var reads = constructReads(k);
    constructNodeGraph(reads);
    connectRootToLeaf();
    var watch = System.Diagnostics.Stopwatch.StartNew();
    var result = traverse();
    watch.Stop();
    var elapsed = watch.Elapsed.ToString();
    var diagnostic = new Diagnostic(elapsed,k,data,result);
    if(saveResults)
        saveDiagnostic(diagnostic,subFolderName+"genom_k_"+(dupeIdx == -1 ? k.ToString(): k.ToString()+"_"+dupeIdx.ToString()));
    Console.WriteLine("k= "+(dupeIdx == -1 ? k.ToString(): k.ToString()+"_"+dupeIdx.ToString())+" Done!");
    return diagnostic;
}


In [None]:
foreach(int i in new int[]{1750}){
    for(int j = 1; j < 6;j++){
        processData(i,true,j,"Data/");
    }
}

In [None]:
void HummingDistance(Diagnostic d){
    int distance = 0;
    for(int i = 0; i< d.input_data.Length; i++){
        if(d.input_data[i] != d.output_data[i])
            distance++;
    }
    d.error = distance;
}

In [None]:
void CalculateErrors(){
    var files = Directory.GetFiles(Directory.GetCurrentDirectory()+"/Data").Where(i => Regex.IsMatch(i,".*[.]json")).Select(i => i.Substring(Directory.GetCurrentDirectory().Length+1));
    foreach(var fileName in files){
        var jsonString = File.ReadAllText(fileName);
        var diagnostic = JsonSerializer.Deserialize<Diagnostic>(jsonString);
        if(diagnostic.error == -1)
            HummingDistance(diagnostic);
        //Console.WriteLine(fileName);
        saveDiagnostic(diagnostic,fileName.Substring(0,fileName.Length-5));
    }
}
CalculateErrors();