In [1]:
from pymongo import MongoClient
import requests
from SPARQLWrapper import SPARQLWrapper, JSON
from pprint import pprint
import csv

In [2]:
class MutantMongo(object):
    so_map = {
        'SYNONYMOUS' : {
            'name': 'synonymous',
            'id': 'SO:0001814'
        },
        'Non-neutral':{
            'name': 'non-synonymous',
            'id': 'SO:0001816'
        },
       'NON-CODING':{
            'name': 'non_transcribed_region',
            'id': 'SO:0000183'
        },
        'Neutral':{
            'name': 'silent_mutation',
            'id': 'SO:0001017'
        },
        'NONSENSE': {
            'name': 'stop_gained',
            'id': 'SO:0001587'
        }
    }

    def __init__(self, mut_json=None):
        """

        :param mut_json:
        :return:
        """
        self.mut_json = mut_json
        self.client = MongoClient()
        self.genomes = self.client.genomes
        self.mutants = self.genomes.mutants
        self.pmids = {}
        self.insert_log = []

        self.mut_json['_id'] = '%s-%s-%s' % (self.mut_json['name'],
                                             self.mut_json['locusTag'],
                                             self.mut_json['coordinate']['start'])

    def push2mongo(self):
        try:
            self.mutants.insert_one(self.mut_json)
            return {
                    'write_success': True,
                    'duplicate_key': False,
                    }
        except Exception as e:
            print(e.args)
            if 'E11000' in e.args:
                return {
                    'write_success': False,
                    'duplicate_key': True,
                    }
            else:
                return {
                    'write_success': False,
                    'duplicate_key': False,
                    }

    def delete_one_mongo(self):
        try:
            self.mutants.delete_one({'_id': self.mut_json['_id']})
            return {'delete_success': True}

        except Exception as e:
            return {'delete_success': False}

    def add_gff_from_json(self):
        self.mut_json['gff'] = {
            "seqname": self.mut_json['chromosome'],
            "source": self.mut_json['mutant_type'],
            "feature": 'mutation',
            "start": self.mut_json['coordinate']['start'],
            "end": self.mut_json['coordinate']['start'],
            "score": '.',
            "strand": '.',
            "phase": '.',
            "attribute": 'id={}'.format(self.mut_json['name'])
        }

    @staticmethod
    def get_pub(pmid):
        url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&id={}'.format(pmid)
        r = requests.get(url)
        return r.json()

In [3]:
with open('Kokes_mutants.csv', 'r') as km:
    reader = csv.reader(km, delimiter=',')
    columns = 

['\ufeff"Table S2', ' related to Figure 1: Single nucleotide variants (SNVs) identified in the genomes of 934 sequenced C. t L2/434/Bu strains."', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['genome_position_L2', 'locus_tag_L2', 'locus_tag_DUW3', 'gene_name_L2', 'gene_product_L2', 'snv_type', 'aa_sub', 'ref_base_L2', 'variant_base_L2', 'mutant_strain', 'read_depth', 'variant_read_count', 'variant_frequency', 'mutant_type', 'ref_pmid', 'ref_qid']
['78', 'CTL0001', 'CT633', 'hemB', 'Delta-aminolevulinic acid dehydratase', 'SYNONYMOUS', '', 'C', 'T', 'CTL2M_Pool 05', '1065', '158', '15%', 'chemical', '25920978', 'Q28791052']
['186', 'CTL0001', 'CT633', 'hemB', 'Delta-aminolevulinic acid dehydratase', 'Neutral', 'M62I', 'G', 'A', 'CTL2M_Pool 22', '325', '10', '3%', 'chemical', '25920978', 'Q28791052']
['326', 'CTL0001', 'CT633', 'hemB', 'Delta-aminolevulinic acid dehydratase', 'Neutral', 'A109V', 'C', 'T', 'C

In [1]:
mut_json = {
  "taxid": "471472",
  "locusTag": "CTL0003",
  "name": "mut strain name",
  "chromosome": "NC_010287.1",
  "mutant_type": {
    "alias": "chemical mutagenesis",
    "name": "chemically induced mutation",
    "id": "EFO_0000370",
    "key": 1
  },
  "coordinate": {
    "start": "2001"
  },
  "ref_base": "C",
  "variant_base": "G",
  "variant_type": {
    "alias": "Non-neutral",
    "name": "non-synonymous",
    "id": "SO:0001816"
  },
  "aa_effect": "R127C",
  "pub": {
    "uid": "324324",
    "pubdate": "1977 May",
    "epubdate": "",
    "source": "Am Surg",
    "authors": [
      {
        "name": "Manberger AR",
        "authtype": "Author",
        "clusterid": ""
      }
    ],
    "lastauthor": "Manberger AR",
    "title": "\"To care for him who shall have borne the battle\"--Abraham Lincoln.",
    "sorttitle": "to care for him who shall have borne the battle abraham lincoln",
    "volume": "43",
    "issue": "5",
    "pages": "255-8",
    "lang": [
      "eng"
    ],
    "nlmuniqueid": "0370522",
    "issn": "0003-1348",
    "essn": "1555-9823",
    "pubtype": [
      "Historical Article",
      "Journal Article"
    ],
    "recordstatus": "PubMed - indexed for MEDLINE",
    "pubstatus": "4",
    "articleids": [
      {
        "idtype": "pubmed",
        "idtypen": 1,
        "value": "324324"
      },
      {
        "idtype": "rid",
        "idtypen": 8,
        "value": "324324"
      },
      {
        "idtype": "eid",
        "idtypen": 8,
        "value": "324324"
      }
    ],
    "history": [
      {
        "pubstatus": "pubmed",
        "date": "1977/05/01 00:00"
      },
      {
        "pubstatus": "medline",
        "date": "1977/05/01 00:01"
      },
      {
        "pubstatus": "entrez",
        "date": "1977/05/01 00:00"
      }
    ],
    "references": [],
    "attributes": [],
    "pmcrefcount": "",
    "fulljournalname": "The American surgeon",
    "elocationid": "",
    "doctype": "citation",
    "srccontriblist": [],
    "booktitle": "",
    "medium": "",
    "edition": "",
    "publisherlocation": "",
    "publishername": "",
    "srcdate": "",
    "reportnumber": "",
    "availablefromurl": "",
    "locationlabel": "",
    "doccontriblist": [],
    "docdate": "",
    "bookname": "",
    "chapter": "",
    "sortpubdate": "1977/05/01 00:00",
    "sortfirstauthor": "Manberger AR",
    "vernaculartitle": ""
  }
}