In [23]:
import pickle
from transformers import BertForSequenceClassification, BertTokenizer

# Load the model
with open('combined_model.pkl', 'rb') as f:
    model = pickle.load(f)

# Load the tokenizer
with open('tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

In [9]:
import os
import pandas as pd
import torch
import pickle
from fuzzywuzzy import fuzz

# Set the directory path here
DIRECTORY_PATH = "C:/Users/HP/Desktop/ecom/Inputs"

# Load the model and tokenizer
with open('combined_model.pkl', 'rb') as f:
    model = pickle.load(f)

with open('tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

def load_datasets(directory_path):
    all_dataframes = []
    for filename in os.listdir(directory_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(directory_path, filename)
            try:
                df = pd.read_csv(file_path, encoding='latin1')
                df['platform'] = os.path.splitext(filename)[0]  # Use filename (without extension) as platform name
                all_dataframes.append(df)
            except Exception as e:
                print(f"Error reading {filename}: {e}")
    
    if not all_dataframes:
        raise ValueError("No valid CSV files found in the specified directory.")
    
    return pd.concat(all_dataframes, ignore_index=True)

def predict(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1).item()
    return predicted_class, probabilities[0][predicted_class].item()

def find_matches(prokick_title, all_products, threshold=80):
    matches = []
    for index, row in all_products.iterrows():
        similarity = fuzz.ratio(prokick_title.lower(), row['title'].lower())
        if similarity >= threshold:
            predicted_class, confidence = predict(row['title'])
            if predicted_class == 1:  # 1 indicates availability
                matches.append((row['title'], row['platform'], confidence))
    return matches

def main():
    # Load all products once at the beginning
    all_products = load_datasets(DIRECTORY_PATH)
    
    while True:
        prokick_title = input("Enter the product title as seen on Prokick (or 'quit' to exit): ")
        if prokick_title.lower() == 'quit':
            break
        
        matches = find_matches(prokick_title, all_products)
        
        print(f"\nNumber of suitable matches found: {len(matches)}")
        if matches:
            print("\nAvailable as:")
            for title, platform, confidence in matches:
                print(f"- {title} (on {platform}, confidence: {confidence:.2f})")
        else:
            print("No matches found on other platforms.")
        print("\n")

if __name__ == "__main__":
    main()

Enter the product title as seen on Prokick (or 'quit' to exit):  li-ning xp90 iv strung badminton racquet, white/silver



Number of suitable matches found: 5

Available as:
- Li-Ning XIPHOS X1 Unstrung Badminton Racquet , Red/Silver (on Prokick, confidence: 1.00)
- Li-Ning XP80 IV Strung Badminton Racquet, Grey/Green (on Prokick, confidence: 1.00)
- Li-Ning XP70 IV Strung Badminton Racquet, Black/Gold (on Prokick, confidence: 1.00)
- Li-Ning XIPHOS X1 Unstrung Badminton Racquet , White/Silver (on Prokick, confidence: 1.00)
- Li-Ning XP90 IV Strung Badminton Racquet, White/Silver (on Prokick, confidence: 1.00)




Enter the product title as seen on Prokick (or 'quit' to exit):  victor a220 a all- round professional badminton shoes with u- shape 2.5



Number of suitable matches found: 6

Available as:
- Victor A220 A All- Round Professional Badminton Shoes with U- Shape 2.5 (on Instasports, confidence: 1.00)
- Victor A530 AB All Around Professional Badminton Shoes - V shape (on Instasports, confidence: 1.00)
- Victor S70 A Speed Series Professional Badminton Shoes with V - Shape 2.5 (on Instasports, confidence: 1.00)
- Victor A670 F All- Round Professional Badminton Shoes (on Instasports, confidence: 1.00)
- Victor A220 B All- Round Professional Badminton Shoes with U- Shape 2.5 (on Instasports, confidence: 1.00)
- Victor A930  D All- Round Professional Badminton Shoes (on Instasports, confidence: 1.00)




Enter the product title as seen on Prokick (or 'quit' to exit):  nivia carbonite 6.0 football stud - yellow



Number of suitable matches found: 9

Available as:
- Nivia Carbonite 6.0 Football Stud - Yellow (on Instasports, confidence: 1.00)
- Nivia Carbonite 6.0 Football Stud - Pink (on Instasports, confidence: 1.00)
- Nivia Carbonite 5.0 Pro Football Stud - Blue (on Instasports, confidence: 1.00)
- Nivia Carbonite 6.0 Football Stud - Black (on Instasports, confidence: 1.00)
- Nivia Carbonite 5.0 Pro Football Stud (on Instasports, confidence: 1.00)
- Nivia Carbonite 5.0 Pro Football Stud - Black (on Instasports, confidence: 1.00)
- Nivia Carbonite 5.0 Football Stud - Yellow (on Instasports, confidence: 1.00)
- Nivia Carbonite 5.0 Football Stud - Blue (on Instasports, confidence: 1.00)
- Nivia Carbonite 5.0 Football Stud - Black (on Instasports, confidence: 1.00)




Enter the product title as seen on Prokick (or 'quit' to exit):  new balance heritage 590 english willow cricket bat -sh



Number of suitable matches found: 25

Available as:
- New Balance Heritage 570 English Willow Cricket Bat -SH (on Instasports, confidence: 1.00)
- New Balance Heritage 570+ English Willow Cricket Bat -SH (on Instasports, confidence: 1.00)
- New Balance Heritage 590 English Willow Cricket Bat -SH (on Instasports, confidence: 1.00)
- New Balance Heritage 840 English Willow Cricket Bat -SH (on Instasports, confidence: 1.00)
- New Balance Burn 570 English Willow Cricket Bat -SH (on Instasports, confidence: 1.00)
- New Balance Burn 590 English Willow Cricket Bat -SH (on Instasports, confidence: 1.00)
- New Balance Burn 570+ English Willow Cricket Bat -SH (on Instasports, confidence: 1.00)
- New Balance Burn 840 English Willow Cricket Bat -SH (on Instasports, confidence: 1.00)
- New Balance TC 570+ English Willow Cricket Bat -SH (on Instasports, confidence: 1.00)
- New Balance TC 840 English Willow Cricket Bat -SH (on Instasports, confidence: 1.00)
- New Balance TC 740 English Willow Cricke

Enter the product title as seen on Prokick (or 'quit' to exit):  li-ning 260 wristband (black)



Number of suitable matches found: 1

Available as:
- Li-Ning 260 Wristband (Black) (on Amazon, confidence: 1.00)




Enter the product title as seen on Prokick (or 'quit' to exit):  exit



Number of suitable matches found: 0
No matches found on other platforms.




Enter the product title as seen on Prokick (or 'quit' to exit):  quit


In [None]:
import os
import pandas as pd
import torch
import pickle
from fuzzywuzzy import fuzz
from sklearn.metrics import f1_score, precision_score, recall_score

# Set the directory path here
DIRECTORY_PATH = "C:/Users/HP/Desktop/ecom/Inputs"

# Load the model and tokenizer
with open('combined_model.pkl', 'rb') as f:
    model = pickle.load(f)

with open('tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

def load_datasets(directory_path):
    all_dataframes = []
    for filename in os.listdir(directory_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(directory_path, filename)
            try:
                df = pd.read_csv(file_path, encoding='latin1')
                df['platform'] = os.path.splitext(filename)[0]
                all_dataframes.append(df)
            except Exception as e:
                print(f"Error reading {filename}: {e}")
    
    if not all_dataframes:
        raise ValueError("No valid CSV files found in the specified directory.")
    
    return pd.concat(all_dataframes, ignore_index=True)

def predict(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1).item()
    return predicted_class, probabilities[0][predicted_class].item()

def find_matches(prokick_title, all_products, threshold=80):
    matches = []
    for index, row in all_products.iterrows():
        similarity = fuzz.ratio(prokick_title.lower(), row['title'].lower())
        if similarity >= threshold:
            predicted_class, confidence = predict(row['title'])
            if predicted_class == 1:  # 1 indicates availability
                matches.append((row['title'], row['platform'], confidence))
    return matches

def evaluate_model(test_data):
    y_pred = []
    for _, row in test_data.iterrows():
        predicted_class, _ = predict(row['title'])
        y_pred.append(predicted_class)
    
    # Assuming class balance for demonstration
    # In a real scenario, you'd need a way to determine true labels
    y_true = [1] * (len(y_pred) // 2) + [0] * (len(y_pred) - len(y_pred) // 2)
    
    f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    
    print(f"F1 Score: {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print("Note: These metrics are based on assumed true labels and may not reflect actual performance.")

def main():
    # Load all products
    all_products = load_datasets(DIRECTORY_PATH)
    
    # Evaluate the model
    print("Evaluating model performance...")
    test_data = all_products.sample(n=min(1000, len(all_products)), random_state=42)
    evaluate_model(test_data)
    
    print("\nProduct Search")
    while True:
        prokick_title = input("Enter the product title as seen on Prokick (or 'quit' to exit): ")
        if prokick_title.lower() == 'quit':
            break
        
        matches = find_matches(prokick_title, all_products)
        
        print(f"\nNumber of suitable matches found: {len(matches)}")
        if matches:
            print("\nAvailable as:")
            for title, platform, confidence in matches:
                print(f"- {title} (on {platform}, confidence: {confidence:.2f})")
        else:
            print("No matches found on other platforms.")
        print("\n")

if __name__ == "__main__":
    main()

Evaluating model performance...


  _warn_prf(average, modifier, msg_start, len(result))


F1 Score: 0.3333
Precision: 0.2500
Recall: 0.5000
Note: These metrics are based on assumed true labels and may not reflect actual performance.

Product Search


Enter the product title as seen on Prokick (or 'quit' to exit):  li-ning super series 900 strung badminton racket with free full cover (84 grams, red/grey)



Number of suitable matches found: 2

Available as:
- Li-Ning Super Series 900 Strung Badminton Racket with Free Full Cover (84 Grams, Red/Grey) (on Amazon, confidence: 1.00)
- Li-Ning Carbon Fibre Super Series 900 Strung Badminton Racket with Full Cover (84 Grams, Grey/Copper) (on Amazon, confidence: 1.00)




Enter the product title as seen on Prokick (or 'quit' to exit):  yonex badminton full cover for astrox racquet



Number of suitable matches found: 1

Available as:
- Yonex Badminton Full Cover for Astrox Racquet (on Amazon, confidence: 1.00)




Enter the product title as seen on Prokick (or 'quit' to exit):  yonex arcsaber 71 light strung badminton racquet, 5ug4 - gold



Number of suitable matches found: 3

Available as:
- Yonex Arcsaber 71 Light Strung Badminton Racquet, 5UG4 - Gold (on Amazon, confidence: 1.00)
- Yonex Arcsaber 71 Light Strung Badminton Racquet, G4 - Gold (on Prokick, confidence: 1.00)
- Yonex Arcsaber 71 Light Strung Badminton Racquet 5U5 - White (on Prokick, confidence: 1.00)




Enter the product title as seen on Prokick (or 'quit' to exit):  scitron volcano preworkout - (pink lemonade)



Number of suitable matches found: 1

Available as:
- Scitron Volcano Preworkout - (Pink Lemonade) (on Instasports, confidence: 1.00)


