In [1]:
import pymongo
import csv
import logging
import sys

In [2]:
logging.basicConfig(filename="carbon_logger.log", level= logging.INFO, format='%(asctime)s \t %(levelname)s \t %(message)s')

In [3]:
class Carbon:
    def __init__(self, fileName):
            self.fileName = fileName
            self.y = []                 #For storing refined dataset
            self.data_list =[]          #For storing original dataset
            logging.info(f"Initializing {fileName} dataset!!" )
            
    def data_reader(self):
        """Reads the data in CSV format"""
        try:
            with open(self.fileName, "r+") as f:
                carbon_data = csv.reader(f, delimiter ="\n")
                for i in carbon_data:
                    self.data_list.append(i)
                logging.info(f"Reading data from file {self.fileName} successful ")
            return self.data_list
            
        except Exception as e:
            logging.error("Problem occured while reading data file!!")
            logging.exception(e)
                
    def data_refiner(self):
        logging.info("Method data_refiner called")
        try:
            dat = iter(self.data_list)
            next(dat)
            for i in dat:
                x = i[0].split(";")
                for j in range(0, len(x)):
                    if j in [0,1]:
                        x[j] = int(x[j])
                    else:
                        x[j] = float(x[j].replace(",","."))
                self.y.append(x)
            logging.info("Dataset successfully refined!!")
            return self.y
        
        except Exception as e:
            logging.error("Problem occured while refining the dataset!")
            logging.exception(e)

In [4]:
class MongoDB(Carbon):
    def __init__(self, fileName):
        logging.info("Collection Established with the MongoDB.")
        self.client = pymongo.MongoClient("mongodb+srv://shivansh:t2092081@shivansh-db2.kktnl.mongodb.net/myFirstDatabase?retryWrites=true&w=majority")
        self.db = self.client.test
        Carbon.__init__(self, fileName)
        
    def db_and_collection(self, Db_Name , Collection_name):
        logging.info(f"Accessing database {Db_Name}")
        self.db = self.client[Db_Name]
        
        logging.info(f"Accessing collection {Collection_name} from database {Db_Name}")
        self.collection = self.db[Collection_name]
        
    def CheckExistence_DB(self,Db_Name,client):
        """It verifies the existence of Database"""
        try:
            DBlist = client.list_database_names()
            if Db_Name in DBlist:
                print(f"DB: '{Db_Name}' exists")
                return True
            print(f"DB: '{Db_Name}' not yet present OR no collection is present in the DB")
            return False
        except Exception as e:
            logging.error("Error occured in method CheckExistence_DB()")
            logging.exception(e)
    
    def CheckExistence_Collection(self,Db_Name,Collection_Name,db):
        """It verifies the existence of Collection in the given Database """
        try:
            collection_list = db.list_collection_names()
            if Collection_Name in collection_list:
                print(f"Collection:'{Collection_Name}' exists in Database:'{Db_Name}'")
                return True

            print(f"Collection:'{Collection_Name}' does not exists in Database:'{Db_Name}' OR no documents are present in the collection")
            return False
        except Exception as e:
            logging.error("Error occured in method CheckExistence_Collection()")
            logging.exception(e)

    def data_insertion(self, data):
        try:
            k = 0
            lis = []
            logging.info(f"Inserting data into collection '{self.collection}'")
            for i in data:
                dict2={
                   "_id" : k,
                   "Chiral_indice_n": i[0],
                   "Chiral_indice_m": i[1],
                   "Initial_atomic_coordinate_u": i[2],
                   "Initial_atomic_coordinate_v": i[3],
                   "Initial_atomic_coordinate_w": i[4],
                   "Calculated_atomic_coordinates_u'": i[5],
                   "Calculated_atomic_coordinates_v'": i[6],
                   "Calculated_atomic_coordinates w'": i[7] 
                }
                lis.append(dict2)
                k+=1
            logging.info(f"Data inserted successfully!!!")
            self.records = self.collection.insert_many(lis)
            return self.records
        except Exception as e:
            logging.error("Problem occured while inserting data")
            logging.exception(e)

In [5]:
#Creating object of the class MongoDB
filename = input("Enter the name of dataset or path of the dataset: ")        
obj = MongoDB(filename)             #carbon_nanotubes.csv

Enter the name of dataset or path of the dataset: carbon_nanotubes.csv


In [7]:
# Reading the original dataset
data = obj.data_reader()    
print("*****ORIGINAL DATASET******")
for i in data:
    print(i)

*****ORIGINAL DATASET******


In [2]:
#Reading the refined Dataset
data_refined = obj.data_refiner()
print("*****REFINED DATASET******")
for i in data_refined:
    print(i)

#### Creating Database and Collection in the MongoDB

In [10]:
database_name = "Carbon_Dataset"
collection_name = "Nanotubes"
obj.db_and_collection(database_name, collection_name)

#### Inserting entire data into MongoDB

In [None]:
records = obj.data_insertion(data_refined)

In [11]:
obj.CheckExistence_DB("Carbon_Dataset",obj.client)

DB: 'Carbon_Dataset' exists


True

In [12]:
obj.CheckExistence_Collection("Carbon_Dataset","Nanotubes",obj.db)

Collection:'Nanotubes' exists in Database:'Carbon_Dataset'


True

#### Performing find() Operation

In [13]:
#Getting the last document of the collection
a = obj.collection.find({"_id":10720})
for i in a:
    print(i)

{'_id': 10720, 'Chiral_indice_n': 12, 'Chiral_indice_m': 6, 'Initial_atomic_coordinate_u': 0.953664, 'Initial_atomic_coordinate_v': 0.698374, 'Initial_atomic_coordinate_w': 0.962699, "Calculated_atomic_coordinates_u'": 0.961243, "Calculated_atomic_coordinates_v'": 0.707812, "Calculated_atomic_coordinates w'": 0.962605}


In [14]:
obj.collection.find_one()

{'_id': 0,
 'Chiral_indice_n': 2,
 'Chiral_indice_m': 1,
 'Initial_atomic_coordinate_u': 0.679005,
 'Initial_atomic_coordinate_v': 0.701318,
 'Initial_atomic_coordinate_w': 0.017033,
 "Calculated_atomic_coordinates_u'": 0.721039,
 "Calculated_atomic_coordinates_v'": 0.730232,
 "Calculated_atomic_coordinates w'": 0.017014}

In [None]:
#  Getting entire data
all_rec = obj.collection.find()
for i, j in enumerate(all_rec) :
    print(f"{i}: {j}\n")

In [None]:
# Filtering the data for "Initial_atomic_coordinate_v" greater than 0.7
for i in obj.collection.find({"Initial_atomic_coordinate_v":{"$gt":0.7}}):
    print(i,"\n")

In [None]:
for i in obj.collection.find({"Chiral_indice_m":3}):
    print(i)

#### Performing Update Operation

In [None]:
old_data = {"Chiral_indice_m": 3}
new_data = {"$set": {"Chiral_indice_m": 13}}

In [None]:
obj.collection.update_one(old_data,new_data)
for i in obj.collection.find({"Chiral_indice_m":13}):
    print(i,"\n")

In [None]:
obj.collection.update_many(old_data,new_data)
for i in obj.collection.find({"Chiral_indice_m":13}):
    print(i,"\n")

In [15]:
# Printing only first 10 records of 'Chiral_indice_n'= 12
rg = obj.collection.find({'Chiral_indice_n': 12}).limit(10)
for i ,records in enumerate(rg):
    print(f"{records}\n")

{'_id': 9749, 'Chiral_indice_n': 12, 'Chiral_indice_m': 2, 'Initial_atomic_coordinate_u': 1.1, 'Initial_atomic_coordinate_v': 0.858083, 'Initial_atomic_coordinate_w': 0.020801, "Calculated_atomic_coordinates_u'": 0.896319, "Calculated_atomic_coordinates_v'": 0.861257, "Calculated_atomic_coordinates w'": 0.020769}

{'_id': 9750, 'Chiral_indice_n': 12, 'Chiral_indice_m': 2, 'Initial_atomic_coordinate_u': 0.764853, 'Initial_atomic_coordinate_v': 0.931723, 'Initial_atomic_coordinate_w': 0.009173, "Calculated_atomic_coordinates_u'": 0.768731, "Calculated_atomic_coordinates_v'": 0.938717, "Calculated_atomic_coordinates w'": 0.009119}

{'_id': 9751, 'Chiral_indice_n': 12, 'Chiral_indice_m': 2, 'Initial_atomic_coordinate_u': 0.83366, 'Initial_atomic_coordinate_v': 0.909094, 'Initial_atomic_coordinate_w': 0.040181, "Calculated_atomic_coordinates_u'": 0.83712, "Calculated_atomic_coordinates_v'": 0.91387, "Calculated_atomic_coordinates w'": 0.040099}

{'_id': 9752, 'Chiral_indice_n': 12, 'Chiral_

In [16]:
obj.collection.find_one_and_update({'Initial_atomic_coordinate_u':0.893567}, {"$set":{'Initial_atomic_coordinate_u':1.1}})

{'_id': 9799,
 'Chiral_indice_n': 12,
 'Chiral_indice_m': 2,
 'Initial_atomic_coordinate_u': 0.893567,
 'Initial_atomic_coordinate_v': 0.858083,
 'Initial_atomic_coordinate_w': 0.354135,
 "Calculated_atomic_coordinates_u'": 0.895224,
 "Calculated_atomic_coordinates_v'": 0.860137,
 "Calculated_atomic_coordinates w'": 0.354025}

In [17]:
rg = obj.collection.find({'Chiral_indice_n': 12}).limit(10)
for i ,records in enumerate(rg):
    print(f"{records}\n")

{'_id': 9749, 'Chiral_indice_n': 12, 'Chiral_indice_m': 2, 'Initial_atomic_coordinate_u': 1.1, 'Initial_atomic_coordinate_v': 0.858083, 'Initial_atomic_coordinate_w': 0.020801, "Calculated_atomic_coordinates_u'": 0.896319, "Calculated_atomic_coordinates_v'": 0.861257, "Calculated_atomic_coordinates w'": 0.020769}

{'_id': 9750, 'Chiral_indice_n': 12, 'Chiral_indice_m': 2, 'Initial_atomic_coordinate_u': 0.764853, 'Initial_atomic_coordinate_v': 0.931723, 'Initial_atomic_coordinate_w': 0.009173, "Calculated_atomic_coordinates_u'": 0.768731, "Calculated_atomic_coordinates_v'": 0.938717, "Calculated_atomic_coordinates w'": 0.009119}

{'_id': 9751, 'Chiral_indice_n': 12, 'Chiral_indice_m': 2, 'Initial_atomic_coordinate_u': 0.83366, 'Initial_atomic_coordinate_v': 0.909094, 'Initial_atomic_coordinate_w': 0.040181, "Calculated_atomic_coordinates_u'": 0.83712, "Calculated_atomic_coordinates_v'": 0.91387, "Calculated_atomic_coordinates w'": 0.040099}

{'_id': 9752, 'Chiral_indice_n': 12, 'Chiral_