**Folder structure:**

```
D:\Routelines
        \data
        \db
\data -> put here "NOCTable.csv" and CIF file
\db   -> here will be created sqlite db file
```

In [1]:
import os
import sys
import csv
import math
import mmap
import sqlite3

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm


print('Python v' + sys.version)
print('------------------------------------------------------------------------------')
print('Pandas v' + pd.__version__)
print('NumPy v' + np.__version__)
print('SQLite v' + sqlite3.sqlite_version)

Python v3.9.2 (tags/v3.9.2:1a79785, Feb 19 2021, 13:44:55) [MSC v.1928 64 bit (AMD64)]
------------------------------------------------------------------------------
Pandas v1.2.0
NumPy v1.19.4
SQLite v3.34.1


### Please set the project folder path here:

In [2]:
project = "D:/Routelines/"

Please check path to CIF file

In [3]:
cif_path = project + 'data/'
db_path = project + 'db/'

cif ='Bus_1.cif'
cif_file = cif_path + cif

print(cif_path)
print(cif_file)
print(db_path)

D:/Routelines/data/
D:/Routelines/data/Bus_1.cif
D:/Routelines/db/


Loading CIF file into Pandas dataframe:

In [4]:
def mapcount(filename):
    print("Counting number of lines...")
    f = open(filename, "r+")
    buf = mmap.mmap(f.fileno(), 0)
    lines = 0
    readline = buf.readline
    while readline():
        lines += 1
    print(f'{lines} lines in the file!')
    return lines

In [5]:
# Reading CIF file

def read_cif(cif_file):
    print("========================================================")
    print(f'CIF file size is {round(os.stat(cif_file).st_size / (1024 * 1024), 2)} MB')   
    count_lines = mapcount(cif_file)
    chunk_size=1000000
    chunks = []
    loops = math.ceil(count_lines/chunk_size)
    i=0
    with tqdm(total = loops, file = sys.stdout) as pbar:
        reader = pd.read_csv(cif_file, names=['CODE'], header=None, sep='!', iterator=True)
        while i <= loops:
            try:
                i+=1
                chunk = reader.get_chunk(chunk_size)
                chunks.append(chunk)
                pbar.set_description('Importing CIF data to Pandas dataframe')
                pbar.update(1)
            except StopIteration:
                loop = False
                cif_data = pd.concat(chunks, ignore_index=True)
                print("CIF data imported!")
    pbar.close()
    return cif_data

In [6]:
def GetPtStops(cif_data):
    print("--------------------------------------------------------")
    print("Extracting PTStops (QB) data...")    
    QB = cif_data['CODE'].str.extract('(^QB.*)').dropna()
    QB.columns = ['CODE']
    COLUMN_NAMES = ['NaptanID','Xcoord','Ycoord']
    PTStops = pd.DataFrame(columns=COLUMN_NAMES)
    PTStops['NaptanID'] = QB['CODE'].str.slice(start=3, stop=15).str.rstrip()
    PTStops['Xcoord'] = QB['CODE'].str.slice(start=15, stop=21)
    PTStops['Ycoord'] = QB['CODE'].str.slice(start=23, stop=29)
#     PTStops.to_sql('PTStops', conn, schema='rl', if_exists='replace', index=False, chunksize=None, method=None)
    print("PTStops - " + region + " finished!")
    return PTStops

In [7]:
cif_df = read_cif(cif_file)

# PTStops = GetPtStops(cif_df)
# print("Done!")


CIF file size is 1481.19 MB
Counting number of lines...
46748423 lines in the file!


  0%|          | 0/47 [00:00<?, ?it/s]

CIF data imported!
