# get the links from Chrome browser history

In [10]:
%pip install browserhistory

Note: you may need to restart the kernel to use updated packages.


In [11]:
import browserhistory as bh     # extract browser history from a user's local computer and write the data to csv files.
import sys      # manipulate different parts of the Python runtime environment.
import sqlite3  # integrate the SQLite database with Python
import csv      # implements classes to read and write tabular data in CSV format
import os       # interacting with the operating system

In [12]:
def get_database_paths() -> dict:
    """
    Get paths to the database of browsers and store them in a dictionary.
    It returns a dictionary: its key is the name of browser in str and its value is the path to database in str.
    """
    platform_code = sys.platform
    # if it is a macOS
    if platform_code == 'darwin':       # Darwin is the core Unix operating system of macOS 
        cwd_path = os.getcwd()          # returns current working directory of this program.
        cwd_path_list = cwd_path.split('/')
        # it creates string paths to broswer databases
        abs_chrome_path = os.path.join('/', cwd_path_list[1], cwd_path_list[2], 'Library', 'Application Support', 'Google/Chrome/Profile 1', 'History')     # macOS下，Chrome浏览器的浏览数据文件
    return abs_chrome_path

In [13]:
def get_browserhistory() -> list:
    path = get_database_paths()
    try:
        conn = sqlite3.connect(path)        # sqlite3 access browser history database file.
        cursor = conn.cursor()              # a Constructor create Cursor object.
        # require 150 entries like 'url', 'title', 'time'
        _SQL = """ 
            SELECT urls.url, urls.title, datetime((last_visit_time/1000000)+(strftime('%s', '1601-01-01')), 'unixepoch', 'localtime') as last_visit_time
                FROM urls 
                WHERE datetime(last_visit_time / 1000000 + (strftime('%s', '1601-01-01')), 'unixepoch') LIKE (substr(date('now', 'localtime', '-7 day'), 0, 9) || '%')
                ORDER BY last_visit_time DESC
                limit 150 
            """ 
        query_result = []
        try:
            cursor.execute(_SQL)                    # This routine executes an SQL statement.
            query_result = cursor.fetchall()        # This routine fetches all (remaining) rows of a query result, returning a list. An empty list is returned when no rows are available.
        # exceptions
        except sqlite3.OperationalError:            # 2. browser opening
            print('Please Completely Close your browser window')
        except Exception as err:                    # 3. other errors like SQL grammar mistake
            print(err)
        # close cursor and connector for realsing memory
        cursor.close()
        conn.close()
    except sqlite3.OperationalError:                # 1. access permission error
        print('Database permission denied')
        
    return query_result

In [14]:
def write_browserhistory_csv() -> None:
    """It writes csv files that contain the browser history in
    the current working directory. It will writes csv files base on
    the name of browsers the program detects."""
    query_result = get_browserhistory()
    # convert list to dict
    browserhistory = dict(zip(enumerate(query_result), query_result))

    with open('history.csv', mode='w', encoding='utf-8', newline='') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_ALL)
        if os.stat('history.csv').st_size == 0:
            csv_writer.writerow(['url', 'title', 'time'])
        for num, history in browserhistory.items():
            csv_writer.writerow(history)

In [15]:
write_browserhistory_csv()