## **BITS F407 - ARTIFICIAL INTELLIGENCE**
--------------------------------------------------------------------------------


**PROJECT TITLE: ENHANCING TEXT-PATTERN SEARCH IN SQL DATABASES**
--------------------------------------------------------------------------------

***Team number: 68***


---
**Full names of all students in the team: ANIRUDH BAGALKOTKER, SAKAR HIRDE**

---
**Id number of all students in the team: 2021A7PS2682H, 2021A3PS3203H**


## ***1. Import Dependencies***

In [None]:
%pip install pandas numpy matplotlib mysql-connector

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mysql.connector
import time

## ***2. Connect to MySQL Database***

In [None]:
mysqlConnector = mysql.connector.connect(
	host="localhost",
	user="root",
	password="root"
)

print(mysqlConnector)

mysqlCursor = mysqlConnector.cursor()

mysqlCursor.execute("SHOW DATABASES")

for database in mysqlCursor:
	print(database)

## ***3. Create Database***

In [None]:
mysqlCursor.execute("CREATE DATABASE IF NOT EXISTS AI_PROJ")

mysqlCursor.execute("USE AI_PROJ")

## ***4. Create Tables***

In [None]:
mysqlCursor.execute("CREATE TABLE IF NOT EXISTS `BOOKS` ( `BID` int(10) UNSIGNED NOT NULL, `ISBN` bigint(13) UNSIGNED DEFAULT NULL, `NAME` varchar(200) COLLATE utf8_unicode_ci NOT NULL, `MRP` int(6) UNSIGNED DEFAULT NULL, `DESCRIPTION` varchar(1000) COLLATE utf8_unicode_ci DEFAULT NULL, `IMG` varchar(3000) COLLATE utf8_unicode_ci DEFAULT NULL, `AUTHOR` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `FORMAT` varchar(25) COLLATE utf8_unicode_ci DEFAULT 'PAPERBACK' COMMENT 'PAPERBACK OR HARDCOVER', `PAGES` smallint(4) DEFAULT NULL, `WEIGHT` smallint(5) DEFAULT NULL, `REVIEW` tinyint(1) UNSIGNED NOT NULL DEFAULT '0') ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;")

mysqlCursor.execute("CREATE TABLE IF NOT EXISTS `INVENTORY` ( `PID` bigint(20) UNSIGNED NOT NULL, `BID` int(10) UNSIGNED NOT NULL, `SID` int(10) UNSIGNED NOT NULL, `COND` varchar(25) COLLATE utf8_unicode_ci NOT NULL DEFAULT 'NEW', `QTY` smallint(5) UNSIGNED NOT NULL DEFAULT '1', `CP` float(7,2) UNSIGNED NOT NULL, `SP` float(7,2) UNSIGNED NOT NULL, `DISCOUNT` float(3,1) UNSIGNED DEFAULT '0.0', `LANG` varchar(25) COLLATE utf8_unicode_ci DEFAULT 'ENGLISH', `PI` int(10) UNSIGNED NOT NULL DEFAULT '0', `CREATED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, `MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;")

We have Already Added the Tables through the MySQL Workbench (Using SQL) and we have generated some data and filled the data in Tables

## ***5. SQL In-Built Functions Query***

There are 4 SQL In-Built Functions Queries

1. **LIKE**
2. **SIMILAR TO**
3. **CONTAINS**
4. **MATCH**

Lets Apply LIKE and MATCH Queries and check the time taken for executing each of the query

In [None]:
def MatchQuery(searchString):
    start_time = time.time()
    sql = """
    SELECT DISTINCT 
        I.PID, I.BID, I.SID, I.COND, I.QTY, I.CP, I.SP, I.DISCOUNT, I.LANG, B.ISBN, B.NAME, 
        B.MRP, B.DESCRIPTION, B.IMG, B.AUTHOR, B.FORMAT, B.PAGES, B.WEIGHT, B.REVIEW, 
    MATCH(B.NAME, B.AUTHOR, B.DESCRIPTION) AGAINST (%s IN BOOLEAN MODE) 
    AS relevance FROM INVENTORY I JOIN BOOKS B ON I.BID = B.BID 
    WHERE MATCH(B.NAME, B.AUTHOR, B.DESCRIPTION) AGAINST (%s IN BOOLEAN MODE) 
    AND (B.NAME LIKE %s OR B.AUTHOR LIKE %s) ORDER BY relevance DESC 
    """
    mysqlCursor.execute(sql, (searchString, searchString, f"%{searchString}%", f"%{searchString}%"))
    for result in mysqlCursor:
        print(result)
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Time taken for MatchQuery: {elapsed_time} seconds")

def LikeQuery(searchString):
    start_time = time.time()
    sql = """
        SELECT DISTINCT I.PID, I.BID, I.SID, I.COND, I.QTY, I.CP, I.SP, I.DISCOUNT, I.LANG, 
            B.ISBN, B.NAME, B.MRP, B.DESCRIPTION, B.IMG, B.AUTHOR, B.FORMAT, 
            B.PAGES, B.WEIGHT, B.REVIEW
        FROM INVENTORY I
        JOIN BOOKS B ON I.BID = B.BID
        WHERE (B.NAME LIKE %s OR B.AUTHOR LIKE %s OR B.DESCRIPTION LIKE %s)
    """
    mysqlCursor.execute(sql, (f"%{searchString}%", f"%{searchString}%", f"%{searchString}%"))

    for result in mysqlCursor:
        print(result)
        
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Time taken for MatchQuery: {elapsed_time} seconds")


search = "Rich"
MatchQuery(search)
LikeQuery(search)

## ***6. AI Search Algorithms***

#### **6.1 Importing the Tables into a DataFrame**

In [None]:
mysqlCursor.execute("SELECT * FROM BOOKS")
books_data = mysqlCursor.fetchall()

# Create a pandas dataFrame for the 'BOOKS' table
books_df = pd.DataFrame(books_data, columns=["BID", "ISBN", "NAME", "MRP", "DESCRIPTION", "IMG", "AUTHOR", "FORMAT", "PAGES", "WEIGHT", "REVIEW"])

# Display the 'BOOKS' dataFrame
print("BOOKS Table:")
print(books_df)

# Execute a SELECT query on the 'INVENTORY' table
mysqlCursor.execute("SELECT * FROM INVENTORY")
inventory_data = mysqlCursor.fetchall()

# Create a pandas dataFrame for the 'INVENTORY' table
inventory_df = pd.DataFrame(inventory_data, columns=["PID", "BID", "SID", "COND", "QTY", "CP", "SP", "DISCOUNT", "LANG", "PI", "CREATED", "MODIFIED"])

# Display the 'INVENTORY' dataFrame
print("\nINVENTORY Table:")
print(inventory_df)

#### **6.2 Fuzzy Search (Levenshtein Distance)**

Fuzzy search techniques assist in locating outcomes that roughly match
the search query. Soundex and Levenshtein distance are two common methods.

We will use the Levenshtein distance function to calculate the distance
between two strings.

In [None]:
# Levenshtein distance function (Fuzzy Search)
def levenshtein_distance(str1, str2):
    if len(str1) < len(str2):
        return levenshtein_distance(str2, str1)

    if len(str2) == 0:
        return len(str1)

    previous_row = range(len(str2) + 1)
    for i, c1 in enumerate(str1):
        current_row = [i + 1]
        for j, c2 in enumerate(str2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]

search_term = 'Rich'

# Levenshtein distance to relevant columns in inventory_df
for col in ['COND', 'LANG']:
    inventory_df[f'{col}_levenshtein_distance'] = inventory_df[col].apply(lambda x: levenshtein_distance(search_term, str(x)))

# Levenshtein distance to relevant columns in books_df
for col in ['NAME', 'AUTHOR', 'DESCRIPTION']:
    books_df[f'{col}_levenshtein_distance'] = books_df[col].apply(lambda x: levenshtein_distance(search_term, str(x)))

threshold = 3
filtered_inventory_df = inventory_df[inventory_df[['COND_levenshtein_distance', 'LANG_levenshtein_distance']].max(axis=1) < threshold]
filtered_books_df = books_df[books_df[['NAME_levenshtein_distance', 'AUTHOR_levenshtein_distance', 'DESCRIPTION_levenshtein_distance']].max(axis=1) < threshold]

print("Filtered INVENTORY Table:")
print(filtered_inventory_df)

print("\nFiltered BOOKS Table:")
print(filtered_books_df)
