In [None]:
import os
import time
import requests
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
import numpy as np
from numpy.linalg import norm
from tqdm import tqdm
import pickle
from flask import Flask, jsonify, request, send_from_directory
from flask_cors import CORS
from selenium import webdriver
from selenium.webdriver import ChromeOptions, Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException, NoSuchElementException
from sklearn.neighbors import NearestNeighbors
import cv2
import base64
import re
from PIL import Image
from io import BytesIO

app = Flask(__name__)
CORS(app)

IMAGE_FOLDER = 'downloaded_images'
if not os.path.exists(IMAGE_FOLDER):
    os.makedirs(IMAGE_FOLDER)



resnet_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
resnet_model.trainable = False
resnet_model = tf.keras.Sequential([
    resnet_model,
    GlobalMaxPooling2D()
])

def fetch_image_urls(driver, css_selector, retry_count=3):
    for attempt in range(retry_count):
        try:
            image_elements = WebDriverWait(driver, 20).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, css_selector))
            )
            return [image.get_attribute('src') for image in image_elements]
        except (StaleElementReferenceException, TimeoutException, NoSuchElementException) as e:
            if attempt < retry_count - 1:
                print(f"{e} encountered. Retrying... (Attempt {attempt + 1})")
            else:
                raise

def download_images(image_urls):
    local_image_urls = []
    base64_pattern = re.compile(r'^data:image/(?P<ext>.+?);base64,(?P<data>.+)')
    index = 0  

    for url in image_urls:
        try:
            match = base64_pattern.match(url)
            if match:
                ext = match.group('ext')
                data = match.group('data')
                if ext.lower() == 'gif':
                    print(f"Skipping base64 GIF image at index {index}")
                    continue

                print(f"Processing base64 image at index {index}")
                img_data = base64.b64decode(data)
                image_path = os.path.join(IMAGE_FOLDER, f'image_{index}.{ext}')
                
            
                image = Image.open(BytesIO(img_data))
                image.save(image_path)
                
                local_image_urls.append(image_path)
                index += 1 
            else:
                response = requests.get(url)
                if response.status_code == 200:
                    if 'image/gif' in response.headers.get('content-type'):
                        print(f"Skipping GIF image from URL: {url}")
                        continue

                    print(f"Downloading image from {url}")
                    image_path = os.path.join(IMAGE_FOLDER, f'image_{index}.jpg')
                    with open(image_path, 'wb') as f:
                        f.write(response.content)
                    local_image_urls.append(image_path)
                    index += 1  
                else:
                    print(f"Failed to download image from {url}")
        except Exception as e:
            print(f"Error processing image at index {index}: {e}")
    
    return local_image_urls


def extract_features(img_path, resnet_model):
    img = cv2.imread(img_path)
    img_array = cv2.resize(img, (224, 224))
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    
    result = resnet_model.predict(preprocessed_img).flatten()
    normalized_result = result / norm(result)
    
    return normalized_result





@app.route('/', methods=['POST'])
def newp():
    feature_list = np.array(pickle.load(open('embeddings.pkl', 'rb')))
    filenames = pickle.load(open('filenames.pkl', 'rb'))
    data = request.json
    index = data["index"]
    img_path = None
    
   
    for ext in ['jpg', 'jpeg', 'png']:
        potential_path = f'downloaded_images/image_{index}.{ext}'
        if os.path.exists(potential_path):
            img_path = potential_path
            break
    
    if not img_path:
        return jsonify({"error": "Image not found"}), 404

    features = extract_features(img_path, resnet_model)
    if features is None:
        return jsonify({"error": "Failed to extract features from image"}), 500

    n_neighbors = min(4, len(feature_list))  
    neighbors = NearestNeighbors(n_neighbors=n_neighbors, algorithm='brute', metric='euclidean')
    neighbors.fit(feature_list)

    distances, indices = neighbors.kneighbors([features])

    similar_images = [f"image_{filenames[file].split('_')[-1].split('.')[0]}" for file in indices[0][0:n_neighbors]]

    return jsonify({"similar_images": similar_images})

@app.route('/fetch-images', methods=['POST'])
def fetch_images():
    data = request.get_json()
    usertag = data.get('usertag')
    if not usertag:
        return jsonify({"error": "No tag provided"}), 400

    options = ChromeOptions()
    options.add_argument("--start-maximized")
    options.add_argument("--disable-infobars")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])

    driver = webdriver.Chrome(options=options)
    url = "https://www.instagram.com/"
    driver.get(url)

    username = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'input[name="username"]')))
    username.send_keys('instrumental_freaakk')
    username.send_keys(Keys.ENTER)

    password = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'input[name="password"]')))
    password.send_keys('demonlord')
    password.send_keys(Keys.ENTER)

    time.sleep(10)

    explore_url = f"https://www.instagram.com/explore/tags/{usertag}/"
    driver.get(explore_url)

    time.sleep(5)

    instagram_image_urls = fetch_image_urls(driver, 'img[class="x5yr21d xu96u03 x10l6tqk x13vifvy x87ps6o xh8yej3"][crossorigin="anonymous"]')
    print(f"Instagram image URLs: {instagram_image_urls}")

    driver.quit()

    

    

    driver = webdriver.Chrome(options=options)
    news_url = f"https://www.google.com/search?q={usertag}+trending&tbm=isch"
    driver.get(news_url)

    time.sleep(5)
    

    news_image_urls = fetch_image_urls(driver, 'img[class="rg_i Q4LuWd"],img[class="YQ4gaf"]')
    print(f"News image URLs: {news_image_urls}")
    driver.quit()

    combined_image_urls = news_image_urls+instagram_image_urls
    print(f"Combined image URLs: {combined_image_urls}")

    local_image_urls = download_images(combined_image_urls)
    
  

    return jsonify({"result": "success", "urls": local_image_urls})

@app.route('/images/<filename>')
def serve_image(filename):
    return send_from_directory(IMAGE_FOLDER, filename)

if __name__ == '__main__':
    print("Server started")
    from werkzeug.serving import run_simple
    run_simple('localhost', 5000, app)


 * Running on http://localhost:5000
Press CTRL+C to quit


Server started


127.0.0.1 - - [11/Jul/2024 16:09:45] "OPTIONS /fetch-images HTTP/1.1" 200 -


Instagram image URLs: ['https://instagram.fstv8-1.fna.fbcdn.net/v/t51.29350-15/449097202_490689816779160_3702800054234264753_n.jpg?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi4xNDQweDE4MDAuc2RyLmYyOTM1MCJ9&_nc_ht=instagram.fstv8-1.fna.fbcdn.net&_nc_cat=100&_nc_ohc=A2A2Y9T-YLwQ7kNvgFh6rWP&edm=AGyKU4gBAAAA&ccb=7-5&ig_cache_key=MzM5ODU0ODkyMTM0NjMwMDk0Mw%3D%3D.2-ccb7-5&oh=00_AYCX1000fKGnD3hX7E9hKP15Qh6QBkVm63ziHIuuNd8Y_A&oe=6695890E&_nc_sid=2011ad', 'https://instagram.fstv8-1.fna.fbcdn.net/v/t51.29350-15/447769382_740791584671159_6321492072163461079_n.jpg?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi4xMTcweDExNzAuc2RyLmYyOTM1MCJ9&_nc_ht=instagram.fstv8-1.fna.fbcdn.net&_nc_cat=111&_nc_ohc=E3OVY9yc9DQQ7kNvgEiuNG6&edm=AGyKU4gBAAAA&ccb=7-5&ig_cache_key=MzM4NTg0NzUyMzE5NDE4NDczMw%3D%3D.2-ccb7-5&oh=00_AYBdAGW5fqeFrqrnMFW9YPfUD3i2YLjANFRQ1oBkzQNwig&oe=66958737&_nc_sid=2011ad', 'https://instagram.fstv8-1.fna.fbcdn.net/v/t51.29350-15/450640401_7551570674952079_3368164433

127.0.0.1 - - [11/Jul/2024 16:11:03] "POST /fetch-images HTTP/1.1" 200 -
127.0.0.1 - - [11/Jul/2024 16:13:37] "OPTIONS /fetch-images HTTP/1.1" 200 -


Instagram image URLs: ['https://instagram.fstv8-1.fna.fbcdn.net/v/t51.29350-15/431902107_1200996187538743_7518897575321408441_n.jpg?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi4xNDQweDE4MDAuc2RyLmYyOTM1MCJ9&_nc_ht=instagram.fstv8-1.fna.fbcdn.net&_nc_cat=107&_nc_ohc=rnV4GVomG_UQ7kNvgEB8D44&edm=AGyKU4gBAAAA&ccb=7-5&ig_cache_key=MzMxODM2MjI4MDA4NzU3NjQ4Ng%3D%3D.2-ccb7-5&oh=00_AYDRevUotxqmKc4EN1-uNkOrWmZzRsjzQubLUp_I_rROUw&oe=66956EE6&_nc_sid=2011ad', 'https://instagram.fstv8-2.fna.fbcdn.net/v/t51.29350-15/450523267_1012889323582073_5568250755048002655_n.jpg?stp=dst-jpg_e15&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi4xMDgweDE5MjAuc2RyLmYyOTM1MCJ9&_nc_ht=instagram.fstv8-2.fna.fbcdn.net&_nc_cat=105&_nc_ohc=Z6D_y__2QEkQ7kNvgGBbYuk&edm=AGyKU4gBAAAA&ccb=7-5&ig_cache_key=MzQwOTQ3MDA3NjcxMTkyNDU0NQ%3D%3D.2-ccb7-5&oh=00_AYDMiLuxu5J_lEZdJC-QIGHw_SG1u8sN2ss6b9tPPRInrA&oe=66957B35&_nc_sid=2011ad', 'https://instagram.fstv8-2.fna.fbcdn.net/v/t51.29350-15/450354921_512387907789873_843683348

127.0.0.1 - - [11/Jul/2024 16:14:51] "POST /fetch-images HTTP/1.1" 200 -
127.0.0.1 - - [11/Jul/2024 16:15:22] "OPTIONS /fetch-images HTTP/1.1" 200 -


Instagram image URLs: ['https://instagram.fstv8-1.fna.fbcdn.net/v/t51.29350-15/431902107_1200996187538743_7518897575321408441_n.jpg?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi4xNDQweDE4MDAuc2RyLmYyOTM1MCJ9&_nc_ht=instagram.fstv8-1.fna.fbcdn.net&_nc_cat=107&_nc_ohc=rnV4GVomG_UQ7kNvgEB8D44&edm=AGyKU4gBAAAA&ccb=7-5&ig_cache_key=MzMxODM2MjI4MDA4NzU3NjQ4Ng%3D%3D.2-ccb7-5&oh=00_AYDRevUotxqmKc4EN1-uNkOrWmZzRsjzQubLUp_I_rROUw&oe=66956EE6&_nc_sid=2011ad', 'https://instagram.fstv8-1.fna.fbcdn.net/v/t51.29350-15/420941296_1381877116052600_3839960516569938566_n.jpg?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi4xNDQweDE3MzYuc2RyLmYyOTM1MCJ9&_nc_ht=instagram.fstv8-1.fna.fbcdn.net&_nc_cat=108&_nc_ohc=vDE2TTwbyPgQ7kNvgHSVTuO&edm=AGyKU4gBAAAA&ccb=7-5&ig_cache_key=MzI4ODMxNTI0MzM1NDM4Nzg4Ng%3D%3D.2-ccb7-5&oh=00_AYAZC8CNnXHmeg3OLzj_aRRl2Fk0hnoUUB9LhiupUxZ_Kg&oe=6695A54C&_nc_sid=2011ad', 'https://instagram.fstv8-2.fna.fbcdn.net/v/t51.29350-15/450523267_1012889323582073_55682507

127.0.0.1 - - [11/Jul/2024 16:18:01] "POST /fetch-images HTTP/1.1" 200 -
