In [None]:
import sqlite3
import pandas as pd
from difflib import SequenceMatcher
import itertools


conn = sqlite3.connect("trades.db") 
cursor = conn.cursor()

# --- Step 2: Load the trades.sql file into the database ---
with open("trades.sql", "r") as f:
    sql_script = f.read()
cursor.executescript(sql_script)
conn.commit()

# --- Step 3: Read the customer table into a Pandas DataFrame ---
df = pd.read_sql_query("SELECT * FROM customer", conn)
print("Customer table:")
print(df.head())

# --- Step 4: Define similarity function ---
def record_similarity(r1, r2):
    """
    Simple similarity: average SequenceMatcher ratios
    across string-type fields only.
    """
    sims = []
    for col in df.columns:
        v1, v2 = str(r1[col]), str(r2[col])
        sims.append(SequenceMatcher(None, v1, v2).ratio())
    return sum(sims) / len(sims)

# --- Step 5: Compare all pairs of customers ---
similar_pairs = []
for i, j in itertools.combinations(df.index, 2):
    sim = record_similarity(df.loc[i], df.loc[j])
    if sim > 0.7:
        similar_pairs.append((i, j, sim))

# Report results
print("\nCustomers with similarity > 0.7:")
for i, j, sim in similar_pairs:
    print(f"Customer {i} and {j} → similarity {sim:.2f}")
