In [None]:
import requests
from bs4 import BeautifulSoup
import tempfile
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from scipy.stats import pearsonr
from tensorflow.keras.applications import VGG16
from IPython.display import display, Image
import easyocr

# Function to download and display an image
def download_and_display_image(url, keyword, index):
    try:
        # Add a scheme if missing
        if not url.startswith("http"):
            url = "https://" + url.lstrip("/")

        response = requests.get(url)
        if response.status_code == 200:
            with open(f"{keyword}_{index}.jpg", 'wb') as f:
                f.write(response.content)
            return f"{keyword}_{index}.jpg"
        else:
            print("Failed to download image")
            return None
    except Exception as e:
        print(f"Error occurred while downloading image: {str(e)}")
        return None

# Function to display an image
def display_image(image_path):
    display(Image(filename=image_path))

# Function to search and download images
def search_and_download(keyword, max_images=5):
    try:
        # Format the search query URL
        search_query = f"https://www.google.com/search?q={keyword}&tbm=isch"

        # Set headers to make the request look like it's coming from a browser
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

        # Send GET request to fetch the search results page
        response = requests.get(search_query, headers=headers)

        if response.status_code == 200:
            # Parse the HTML content of the page
            soup = BeautifulSoup(response.content, 'html.parser')

            # Find all img tags
            img_tags = soup.find_all('img')

            # Extract and filter image URLs
            img_urls = [img['src'] for img in img_tags if img.get('src') and img['src'].startswith('http')]

            downloaded_images = []
            for i, img_url in enumerate(img_urls[:max_images]):
                image_path = download_and_display_image(img_url, keyword, i+1)
                if image_path:
                    downloaded_images.append(image_path)
            return downloaded_images
        else:
            print("Failed to fetch search results")
            return None
    except Exception as e:
        print(f"Error occurred while searching and downloading: {str(e)}")
        return None

# Load and preprocess an image
def load_and_preprocess_image(image_path):
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=(224,224))
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = tf.expand_dims(img_array, axis=0)  # Add a batch dimension
    from tensorflow.keras.applications.vgg16 import preprocess_input
    img_array = preprocess_input(img_array)
    return img_array

# Load VGG16 model
model = VGG16(weights='imagenet', include_top=True, input_shape=(224,224, 3))

# Function to extract features from an image
def extract_features(image_path):
    img = load_and_preprocess_image(image_path)
    features = model.predict(img)
    return features.flatten()

# Function to calculate cosine similarity between features
def cosine_similarity(features1, features2):
    dot_product = np.dot(features1, features2)
    norm_features1 = np.linalg.norm(features1)
    norm_features2 = np.linalg.norm(features2)
    similarity = dot_product / (norm_features1 * norm_features2)
    return similarity

# Function to calculate Euclidean distance between features
def euclidean_distance(features1, features2):
    distance = np.linalg.norm(features1 - features2)
    return distance

# Function to calculate Pearson correlation coefficient between features
def pearson_correlation(features1, features2):
    correlation_coefficient, _ = pearsonr(features1, features2)
    return correlation_coefficient

# Function to extract text from an image using EasyOCR
def extract_text(image_path):
    reader = easyocr.Reader(['en'])
    results = reader.readtext(image_path)
    text = ' '.join([result[1] for result in results]) if results else ''
    return text

if __name__ == "__main__":
    keyword = input("Enter the brand name: ")
    test_image_path = "/content/Nike-SYMOBOL.jpg"  # Path to the test image
    test_text = extract_text(test_image_path)
    print("Text extracted from the test image:")
    print(test_text)

    real_image_paths = search_and_download(keyword)
    if real_image_paths:
        test_image_features = extract_features(test_image_path)
        real_images_features = [extract_features(real_image_path) for real_image_path in real_image_paths]

        # Compare each downloaded image with the test image
        similarity_scores = []
        for real_image_path, real_image_features in zip(real_image_paths, real_images_features):
            real_text = extract_text(real_image_path)
            print(f"Text extracted from the real image ({real_image_path}):")
            print(real_text)

            # Calculate similarity using different techniques
            cosine_sim = cosine_similarity(real_image_features, test_image_features)
            euclidean_dist = euclidean_distance(real_image_features, test_image_features)
            pearson_corr = pearson_correlation(real_image_features, test_image_features)
            text_similarity = 1 if real_text == test_text else 0

            # Update combined similarity score
            combined_similarity = 10 * (cosine_sim + (1 / (1 + euclidean_dist)) + pearson_corr + text_similarity) / 4

            # Append the similarity score and image path
            similarity_scores.append((combined_similarity, real_image_path))

        # Sort the similarity scores in descending order
        similarity_scores.sort(reverse=True)

        # Display the most similar image and determine if the test image is fake or real
        if similarity_scores:
            max_similarity_score, most_similar_image_path = similarity_scores[0]
            if max_similarity_score > 5:
                print("Test image is a real logo.")
            else:
                print("Test image is a fake logo.")
        else:
            print("No similar images found.")
    else:
        print("No images found for the given keyword.")