In [17]:
import pyaudio
import ipywidgets as widgets
from IPython.display import display
from threading import Thread
from queue import Queue
messages=Queue()
recordings=Queue()
channels=1
frame_rate=16000
record_seconds=20
audio_format=pyaudio.paInt16
sample_size=2

def record_microphone(chunk=12045):
    p=pyaudio.PyAudio()
    stream=p.open(format=audio_format,
                  channels=channels,
                  rate=frame_rate,
                  input=True,
                  input_device_index=2,
                  frames_per_buffer=chunk)
    frames=[]
    while not messages.empty():
        data=stream.read(chunk)
        frames.append(data)
        if len(frames)>=(frame_rate*record_seconds)/chunk:
            recordings.put(frames.copy())
            frames=[]
    stream.stop_stream()
    stream.close()
    p.terminate()

In [24]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import pipeline

def extract_keywords_from_chunks(chunks, keyword_extractor_model="bert-base-uncased"):

    keyword_extraction_pipeline = pipeline("feature-extraction", model=keyword_extractor_model)
    chunk_keywords = {}

    for i, chunk in enumerate(chunks):
        if chunk.strip():  # Process only non-empty chunks
            try:
                embeddings = keyword_extraction_pipeline(chunk)
                from collections import Counter
                import nltk
                from nltk.corpus import stopwords
                from nltk.tokenize import word_tokenize

                nltk.download('punkt', quiet=True)
                nltk.download('stopwords', quiet=True)

                stop_words = set(stopwords.words('english'))
                word_tokens = word_tokenize(chunk.lower())
                filtered_words = [w for w in word_tokens if not w in stop_words and w.isalnum()]
                word_counts = Counter(filtered_words)
                top_n = min(5, len(word_counts))  # Extract top 5 or fewer keywords
                keywords = [word for word, count in word_counts.most_common(top_n)]

                chunk_keywords[f"Chunk {i+1}"] = keywords

            except Exception as e:
                print(f"Error processing chunk {i+1}: {e}")
                chunk_keywords[f"Chunk {i+1}"] = []
        else:
            chunk_keywords[f"Chunk {i+1}"] = []

    return chunk_keywords

if __name__ == "__main__":
        # Sample chunkified data (replace with your actual chunks)
    text = """
    The quick brown fox jumps over the lazy dog. This is the first sentence.
    The lazy dog barks loudly at the fox. This is the second sentence, and it's quite important.
    Jumping foxes and barking dogs are common sights in the countryside.
    Countryside life is peaceful and quiet, unlike the busy city.
    The city never sleeps, with its constant noise and activity.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
    chunks = text_splitter.split_text(text)
# Initialize recognizer
    print("Generated Chunks:")
    for i, chunk in enumerate(chunks):
        print(f"Chunk {i+1}: '{chunk}'")
    print("-" * 30)

    # Extract keywords for each chunk
    keywords_per_chunk = extract_keywords_from_chunks(chunks)

    print("Keywords per Chunk:")
    for chunk_id, keywords in keywords_per_chunk.items():
        print(f"{chunk_id}: {keywords}")

Generated Chunks:
Chunk 1: 'The quick brown fox jumps over the lazy dog. This is the first sentence.'
Chunk 2: 'The lazy dog barks loudly at the fox. This is the second sentence, and it's quite important.'
Chunk 3: 'Jumping foxes and barking dogs are common sights in the countryside.'
Chunk 4: 'Countryside life is peaceful and quiet, unlike the busy city.'
Chunk 5: 'The city never sleeps, with its constant noise and activity.'
------------------------------


Device set to use cpu


Keywords per Chunk:
Chunk 1: ['quick', 'brown', 'fox', 'jumps', 'lazy']
Chunk 2: ['lazy', 'dog', 'barks', 'loudly', 'fox']
Chunk 3: ['jumping', 'foxes', 'barking', 'dogs', 'common']
Chunk 4: ['countryside', 'life', 'peaceful', 'quiet', 'unlike']
Chunk 5: ['city', 'never', 'sleeps', 'constant', 'noise']


In [45]:
import requests
from bs4 import BeautifulSoup
import os
from PIL import Image
from io import BytesIO

def scrape_image_from_web(keyword, output_dir="downloaded_images"):
    """
    Scrapes the first image from a Google Images search for a given keyword
    and saves it to the specified output directory.
    """
    os.makedirs(output_dir, exist_ok=True)
    search_url = f"https://www.google.com/search?q={keyword}&tbm=isch"

    try:
        response = requests.get(search_url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')
        img_tags = soup.find_all('img')

        if not img_tags:
            print("No images found on the page.")
            return None

        # The first image tag often contains the actual image
        img_url = img_tags[1].get('src')  # Try the second image tag first (index 1), it has a higher chance of being the full image url
        if not img_url or img_url.startswith('data:image'):
            img_url = img_tags[1].get('data-src') # Try the second image tag first (index 1), it has a higher chance of being the full image url

        if not img_url:
          print("Could not find the image URL.")
          return None


        # Download the image
        try:
            img_response = requests.get(img_url, stream=True, timeout=10)
            img_response.raise_for_status()
            image = Image.open(BytesIO(img_response.content))

            # Save the image
            filename = f"{keyword.replace(' ', '_')}.jpg"
            filepath = os.path.join(output_dir, filename)
            image.save(filepath, "JPEG")

            print(f"Image for '{keyword}' saved to '{filepath}'")
            return filepath
        except requests.exceptions.RequestException as e:
            print(f"Error downloading the image: {e}")
            return None
        except Exception as e:
          print(f"Error processing the image {e}")
          return None

    except requests.exceptions.RequestException as e:
        print(f"Error during the search request: {e}")
        return None
    except Exception as e:
      print(f"Other error occurred: {e}")
      return None

# --- Main execution ---
if __name__ == "__main__":
    keyword = "skeleton danger"  # Replace with your desired keyword
    image_path = scrape_image_from_web(keyword)

    if image_path:
        print(f"Image successfully scraped and saved to: {image_path}")
    else:
        print("Failed to scrape and save the image.")


Image for 'skeleton danger' saved to 'downloaded_images/skeleton_danger.jpg'
Image successfully scraped and saved to: downloaded_images/skeleton_danger.jpg


In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import requests
from PIL import Image
from io import BytesIO
import os
from collections import Counter
from bs4 import BeautifulSoup

# Download necessary NLTK data
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)

# --- Configuration ---
OUTPUT_DIR = "extracted_images"
SEARCH_API_KEY = "YOUR_GOOGLE_API_KEY"  # Replace with your Google Custom Search API key
SEARCH_ENGINE_ID = "YOUR_SEARCH_ENGINE_ID"  # Replace with your Custom Search Engine ID

def extract_nouns(text, num_keywords=3):
    """Extracts top N noun keywords using POS tagging"""
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    pos_tags = nltk.pos_tag(words)
    
    # Filter nouns (NN, NNS, NNP, NNPS)
    nouns = [word for (word, tag) in pos_tags 
            if tag in ['NN', 'NNS', 'NNP', 'NNPS'] 
            and word.isalnum() 
            and word not in stop_words]
    
    return [word for word, count in Counter(nouns).most_common(num_keywords)]

def scrape_image_from_web(keyword, output_dir="downloaded_images"):
    """
    Scrapes the first image from a Google Images search for a given keyword
    and saves it to the specified output directory.
    """
    os.makedirs(output_dir, exist_ok=True)
    search_url = f"https://www.google.com/search?q={keyword}&tbm=isch"

    try:
        response = requests.get(search_url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')
        img_tags = soup.find_all('img')

        if not img_tags:
            print("No images found on the page.")
            return None

        # The first image tag often contains the actual image
        img_url = img_tags[1].get('src')  # Try the second image tag first (index 1), it has a higher chance of being the full image url
        if not img_url or img_url.startswith('data:image'):
            img_url = img_tags[1].get('data-src') # Try the second image tag first (index 1), it has a higher chance of being the full image url

        if not img_url:
          print("Could not find the image URL.")
          return None


        # Download the image
        try:
            img_response = requests.get(img_url, stream=True, timeout=10)
            img_response.raise_for_status()
            image = Image.open(BytesIO(img_response.content))

            # Save the image
            filename = f"{keyword.replace(' ', '_')}.jpg"
            filepath = os.path.join(output_dir, filename)
            image.save(filepath, "JPEG")

            print(f"Image for '{keyword}' saved to '{filepath}'")
            return filepath
        except requests.exceptions.RequestException as e:
            print(f"Error downloading the image: {e}")
            return None
        except Exception as e:
          print(f"Error processing the image {e}")
          return None

    except requests.exceptions.RequestException as e:
        print(f"Error during the search request: {e}")
        return None
    except Exception as e:
      print(f"Other error occurred: {e}")
      return None


def process_chunks(chunks, output_dir=OUTPUT_DIR):
    """
    Processes text chunks to extract nouns and download related images
    """
    os.makedirs(output_dir, exist_ok=True)
    all_keywords = []
    
    for i, chunk in enumerate(chunks):
        if chunk.strip():
            keywords = extract_nouns(chunk)
            print(f"\nChunk {i+1}: '{chunk[:50]}...'")
            print(f"  Extracted Nouns: {keywords}")
            all_keywords.extend(keywords)
    
    # Remove duplicates while preserving order
    seen = set()
    unique_keywords = [k for k in all_keywords if not (k in seen or seen.add(k))]
    
    print("\nStarting image search for keywords:", unique_keywords)
    
    for keyword in unique_keywords:
        print(f"\nSearching images for: {keyword}")
        scrape_image_from_web(keyword, output_dir) # Using the image scraping function

if __name__ == "__main__":
    # Sample text
    text = """There is a heart. The heart has 4 parts. Heart has ventricles and arteries that supply blood in and out of the heart.It is the most important part of the 
    humman body. If heart stops everything else stops.

    """
    
    # Split text into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=30)
    chunks = text_splitter.split_text(text)
    
    # Process chunks and download images
    process_chunks(chunks)
    print("\nImage extraction process completed.")



Chunk 1: 'There is a heart. The heart has 4 parts. Heart has...'
  Extracted Nouns: ['heart', 'parts', 'ventricles']

Chunk 2: 'is the most important part of the...'
  Extracted Nouns: ['part']

Chunk 3: 'humman body. If heart stops everything else stops....'
  Extracted Nouns: ['humman', 'body', 'heart']

Starting image search for keywords: ['heart', 'parts', 'ventricles', 'part', 'humman', 'body']

Searching images for: heart
Image for 'heart' saved to 'extracted_images/heart.jpg'

Searching images for: parts
Image for 'parts' saved to 'extracted_images/parts.jpg'

Searching images for: ventricles
Image for 'ventricles' saved to 'extracted_images/ventricles.jpg'

Searching images for: part
Image for 'part' saved to 'extracted_images/part.jpg'

Searching images for: humman
Image for 'humman' saved to 'extracted_images/humman.jpg'

Searching images for: body
Image for 'body' saved to 'extracted_images/body.jpg'

Image extraction process completed.


In [7]:
import os
import requests
from PIL import Image
from io import BytesIO
from langchain.text_splitter import RecursiveCharacterTextSplitter
from collections import Counter
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from skimage.metrics import structural_similarity as ssim
from torchvision.models import resnet50, ResNet50_Weights
import torch
from torch.nn.functional import cosine_similarity
import numpy as np
from bs4 import BeautifulSoup

# --- Configuration ---
CHUNK_SIZE_T = 5
SSIM_THRESHOLD_TC = 0.75
SIMILARITY_THRESHOLD = 0.7
OUTPUT_DIR = "extracted_images"
FEATURE_EXTRACTION_LAYER = 'layer4'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
TARGET_SIZE = (224, 224)  # Target size for image resizing

# Download necessary NLTK data (run once)
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)


# --- Utility Functions ---
def extract_nouns(text, num_keywords=3):
    """Extract top nouns from text."""
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    pos_tags = nltk.pos_tag(words)
    nouns = [word for word, tag in pos_tags if tag.startswith('NN') and word.isalnum() and word not in stop_words]
    return [word for word, _ in Counter(nouns).most_common(num_keywords)]


def scrape_image_from_web(keyword, output_dir=OUTPUT_DIR):
    """Scrape an image from Google Images based on a keyword."""
    os.makedirs(output_dir, exist_ok=True)
    search_url = f"https://www.google.com/search?q={keyword}&tbm=isch"
    
    try:
        response = requests.get(search_url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        img_tags = soup.find_all('img')
        
        if not img_tags or len(img_tags) < 2:
            print(f"No suitable images found for '{keyword}'.")
            return None
        
        img_url = img_tags[1].get('src') or img_tags[1].get('data-src')
        if not img_url:
            print(f"Could not find valid image URL for '{keyword}'.")
            return None
        
        try:
            img_response = requests.get(img_url, stream=True, timeout=10)
            img_response.raise_for_status()
            image = Image.open(BytesIO(img_response.content)).convert('RGB').resize(TARGET_SIZE)
            filename = f"{keyword.replace(' ', '_')}.jpg"
            filepath = os.path.join(output_dir, filename)
            image.save(filepath, "JPEG")
            print(f"Image for '{keyword}' saved to '{filepath}'.")
            return filepath
        except Exception as e:
            print(f"Error downloading or processing image for '{keyword}': {e}")
            return None
    
    except Exception as e:
        print(f"Error during search for '{keyword}': {e}")
        return None


def calculate_ssim(image1, image2):
    """Calculate Structural Similarity Index (SSIM) between two images."""
    if image1 is None or image2 is None:
        return 0.0
    
    img1_gray = image1.convert('L')
    img2_gray = image2.convert('L')
    array1 = np.array(img1_gray)
    array2 = np.array(img2_gray)
    
    return ssim(array1, array2, data_range=array2.max() - array2.min())


def get_image_features(image, model, layer_name, transform):
    """Extract features from an image using a pre-trained model."""
    if image is None:
        return None
    
    image_t = transform(image).unsqueeze(0).to(DEVICE)
    features = None
    
    def hook(module, input, output):
        nonlocal features
        features = output.flatten(start_dim=1).detach().cpu().numpy()
    
    layer = getattr(model, layer_name)
    handle = layer.register_forward_hook(hook)
    model(image_t)
    handle.remove()
    
    return features


def calculate_cosine_similarity(features1, features2):
    """Calculate cosine similarity between two feature vectors."""
    if features1 is None or features2 is None:
        return 0.0
    
    features1_tensor = torch.tensor(features1).float()
    features2_tensor = torch.tensor(features2).float()
    
    return cosine_similarity(features1_tensor, features2_tensor).item()


# --- Processing Functions ---
def assess_chunk_coherence(image_paths):
    """Assess coherence of a chunk based on SSIM."""
    if len(image_paths) < 2:
        return 1.0
    
    images = [Image.open(path).convert('RGB').resize(TARGET_SIZE) for path in image_paths]
    
    ssim_scores = [
        calculate_ssim(images[i], images[i + 1]) for i in range(len(images) - 1)
    ]
    
    coherence_score = sum(1 for score in ssim_scores if score > SSIM_THRESHOLD_TC) / len(ssim_scores)
    
    return coherence_score


def assess_chunk_similarity(image_paths, model, transform):
    """Assess similarity of a chunk based on cosine similarity."""
    if len(image_paths) < 2:
        return 1.0
    
    images = [Image.open(path).convert('RGB').resize(TARGET_SIZE) for path in image_paths]
    
    features_list = [get_image_features(img, model, FEATURE_EXTRACTION_LAYER, transform) for img in images]
    
    similarities = [
        calculate_cosine_similarity(features_list[i], features_list[i + 1]) for i in range(len(features_list) - 1)
    ]
    
    return np.mean(similarities)


def process_chunks(chunks):
    """Process text chunks to extract images and evaluate coherence/similarity."""
    
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    weights = ResNet50_Weights.DEFAULT
    model = resnet50(weights=weights).to(DEVICE).eval()
    transform = weights.transforms()
    
    all_image_paths = []
    
    for i, chunk in enumerate(chunks):
        if chunk.strip():
            keywords = extract_nouns(chunk)
            print(f"\nProcessing Chunk {i+1}: '{chunk[:50]}...'")
            print(f"Extracted Keywords: {keywords}")
            
            chunk_image_paths = []
            
            for keyword in keywords:
                filepath = scrape_image_from_web(keyword)
                if filepath:
                    chunk_image_paths.append(filepath)
                    all_image_paths.append(filepath)
            
            # Evaluate chunk coherence and similarity
            coherence_score = assess_chunk_coherence(chunk_image_paths)
            similarity_score = assess_chunk_similarity(chunk_image_paths, model, transform)
            
            print(f"Coherence: {coherence_score:.4f}, Similarity: {similarity_score:.4f}")
            
            # Final evaluation of results
            status = "Good" if coherence_score >= SSIM_THRESHOLD_TC and similarity_score >= SIMILARITY_THRESHOLD else "Bad"
            print(f"Chunk Result: {status}")
        
        else:
            print(f"\nSkipping empty chunk {i+1}.")
    
if __name__ == "__main__":
    text_input = """
There is a heart. The heart has 4 parts. Heart has ventricles and arteries that supply blood in and out of the heart.
It is the most important part of the human body. If the heart stops everything else stops.
"""
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=30)
    chunks_to_process = text_splitter.split_text(text_input.strip())
    
    process_chunks(chunks_to_process)




Processing Chunk 1: 'There is a heart. The heart has 4 parts. Heart has...'
Extracted Keywords: ['heart', 'parts', 'ventricles']
Image for 'heart' saved to 'extracted_images/heart.jpg'.
Image for 'parts' saved to 'extracted_images/parts.jpg'.
Image for 'ventricles' saved to 'extracted_images/ventricles.jpg'.
Coherence: 0.0000, Similarity: 0.0496
Chunk Result: Bad

Processing Chunk 2: 'It is the most important part of the human body. I...'
Extracted Keywords: ['part', 'body', 'heart']
Image for 'part' saved to 'extracted_images/part.jpg'.
Image for 'body' saved to 'extracted_images/body.jpg'.
Image for 'heart' saved to 'extracted_images/heart.jpg'.
Coherence: 0.0000, Similarity: 0.1173
Chunk Result: Bad


In [2]:
import os
import requests
import numpy as np
import nltk
import torch
from PIL import Image
from bs4 import BeautifulSoup
from collections import Counter
from torchvision.models import resnet50, ResNet50_Weights
from torch.nn.functional import cosine_similarity
from torchvision import transforms
from io import BytesIO
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from skimage.metrics import structural_similarity as ssim
from langchain.text_splitter import RecursiveCharacterTextSplitter

# --- Config ---
CHUNK_SIZE_T = 150
CHUNK_OVERLAP = 30
SSIM_THRESHOLD = 0.75
SIMILARITY_THRESHOLD = 0.7
TARGET_SIZE = (224, 224)
OUTPUT_DIR = "extracted_images"
FEATURE_LAYER = 'layer4'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Init ---
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)

# --- Helper Functions ---

def extract_nouns(text, num_keywords=3):
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    tagged = nltk.pos_tag(words)
    nouns = [word for (word, tag) in tagged if tag.startswith("NN") and word not in stop_words and word.isalnum()]
    return [word for word, _ in Counter(nouns).most_common(num_keywords)]

def get_or_scrape_image(keyword):
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    fname = f"{keyword.replace(' ', '_')}.jpg"
    fpath = os.path.join(OUTPUT_DIR, fname)
    if os.path.exists(fpath):
        return fpath
    try:
        url = f"https://www.google.com/search?q={keyword}&tbm=isch"
        headers = {'User-Agent': 'Mozilla/5.0'}
        res = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(res.text, 'html.parser')
        img_url = soup.find_all('img')[1].get('src')
        img_data = requests.get(img_url, timeout=10).content
        img = Image.open(BytesIO(img_data)).convert('RGB').resize(TARGET_SIZE)
        img.save(fpath)
        return fpath
    except:
        return None

def calculate_ssim(img1, img2):
    return ssim(np.array(img1.convert('L')), np.array(img2.convert('L')), data_range=255)

def get_image_features(image, model, transform):
    image_t = transform(image).unsqueeze(0).to(DEVICE)
    features = []

    def hook(module, input, output):
        features.append(output.flatten(start_dim=1).detach().cpu().numpy())

    handle = model._modules.get(FEATURE_LAYER).register_forward_hook(hook)
    model(image_t)
    handle.remove()
    return features[0]

def calculate_cosine_similarity(f1, f2):
    t1 = torch.tensor(f1).float()
    t2 = torch.tensor(f2).float()
    return cosine_similarity(t1, t2).item()

def fuse_chunks(image_paths):
    images = [Image.open(p).resize(TARGET_SIZE).convert('L') for p in image_paths]
    arrays = [np.array(im) for im in images]
    avg_array = np.mean(arrays, axis=0).astype(np.uint8)
    return Image.fromarray(avg_array).convert('RGB')

def print_similarity(title, ssim_score, cosine_score, border=False):
    print(f"[{title}]")
    print(f"  SSIM: {ssim_score:.4f}")
    print(f"  Cosine Similarity: {cosine_score:.4f}")
    print("-" * 40 if not border else "=" * 40)

# --- Main Processing ---
def main():
    text = """let’s talk about the circulatory system. The heart plays a central role in pumping blood throughout the body. Its basic structure includes the right cardiac muscle, which pushes deoxygenated blood toward the lungs, and the left cardiac muscle, which pumps oxygenated blood to the body. Blood enters through the inferior vena cava, travels to the lungs via the pulmonary artery, and returns through the veins once it’s oxygenated. Finally, the aorta carries this oxygen-rich blood to all parts of the body."""

    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE_T, chunk_overlap=CHUNK_OVERLAP)
    chunks = splitter.split_text(text.strip())

    print("=== CHUNKS AND EXTRACTED NOUNS ===")
    chunk_keywords = []
    for i, chunk in enumerate(chunks):
        keywords = extract_nouns(chunk)
        chunk_keywords.append(keywords)
        print(f"\nCHUNK {i + 1}: {chunk}")
        print(f"  Extracted Nouns: {keywords}")

    weights = ResNet50_Weights.DEFAULT
    model = resnet50(weights=weights).to(DEVICE).eval()
    transform = weights.transforms()

    chunk_images = []
    chunk_representations = []

    print("\n=== PROCESSING INDIVIDUAL CHUNKS ===\n")
    for idx, (chunk, keywords) in enumerate(zip(chunks, chunk_keywords)):
        image_paths = [get_or_scrape_image(k) for k in keywords if k]
        image_paths = [p for p in image_paths if p]

        if len(image_paths) < 1:
            print(f"CHUNK {idx + 1}: Insufficient images to process.")
            continue

        images = [Image.open(p).resize(TARGET_SIZE).convert('RGB') for p in image_paths]
        features = [get_image_features(img, model, transform) for img in images]

        if images:
            chunk_representations.append((images[0], images[-1], features[0], features[-1]))
            chunk_images.append(image_paths)

            if len(images) >= 2:
                print(f"CHUNK {idx + 1} Comparing: {keywords[0]} vs {keywords[-1]}")
                ssim_score = calculate_ssim(images[0], images[-1])
                cosine_score = calculate_cosine_similarity(features[0], features[-1])
                status = "GOOD" if ssim_score >= SSIM_THRESHOLD and cosine_score >= SIMILARITY_THRESHOLD else "BAD"
                print_similarity(f"CHUNK {idx + 1} (Internal)", ssim_score, cosine_score)
                print(f"  Chunk Comparison Status: {status}")
                print("-" * 40)
            else:
                print_similarity(f"CHUNK {idx + 1} (Single Image)", 1.0, 1.0)
                print(f"  Chunk Comparison Status: GOOD")
                print("-" * 40)

    all_images_flat = [item for sublist in chunk_images for item in sublist]
    fused_img = None
    fused_features = None

    if all_images_flat:
        fused_img = fuse_chunks(all_images_flat)
        fused_features = get_image_features(fused_img, model, transform)
        print_similarity("FUSED CHUNK (Representation)", 1.0, 1.0, border=True)

        print("\n=== EVALUATING CHUNKS AGAINST FUSED REPRESENTATION ===\n")
        for idx, (img_start, img_end, feat_start, feat_end) in enumerate(chunk_representations):
            print(f"CHUNK {idx + 1} Keywords: {chunk_keywords[idx]}")
            ssim_start = calculate_ssim(img_start, fused_img)
            cosine_start = calculate_cosine_similarity(feat_start, fused_features)
            print_similarity(f"CHUNK {idx + 1} (Start vs Fused)", ssim_start, cosine_start)

            ssim_end = calculate_ssim(img_end, fused_img)
            cosine_end = calculate_cosine_similarity(feat_end, fused_features)
            print_similarity(f"CHUNK {idx + 1} (End vs Fused)", ssim_end, cosine_end)

        if chunk_representations:
            avg_ssim_to_fused_start = np.mean([calculate_ssim(rep[0], fused_img) for rep in chunk_representations])
            avg_cosine_to_fused_start = np.mean([calculate_cosine_similarity(rep[2], fused_features) for rep in chunk_representations])
            avg_ssim_to_fused_end = np.mean([calculate_ssim(rep[1], fused_img) for rep in chunk_representations])
            avg_cosine_to_fused_end = np.mean([calculate_cosine_similarity(rep[3], fused_features) for rep in chunk_representations])

            overall_ssim_to_fused = (avg_ssim_to_fused_start + avg_ssim_to_fused_end) / 2
            overall_cosine_to_fused = (avg_cosine_to_fused_start + avg_cosine_to_fused_end) / 2

            fused_status = "GOOD" if overall_ssim_to_fused >= SSIM_THRESHOLD and overall_cosine_to_fused >= SIMILARITY_THRESHOLD else "BAD"
            print("\n=== FUSED CHUNK EVALUATION ===")
            print(f"Average SSIM to Fused: {overall_ssim_to_fused:.4f}")
            print(f"Average Cosine Similarity to Fused: {overall_cosine_to_fused:.4f}")
            print(f"Fused Chunk Status: {fused_status}")
        else:
            print("\n=== FUSED CHUNK EVALUATION ===")
            print("No chunks were processed to evaluate the fused chunk.")
    else:
        print("\n=== FUSED CHUNK EVALUATION ===")
        print("No images were available to create a fused chunk.")

if __name__ == "__main__":
    main()


=== CHUNKS AND EXTRACTED NOUNS ===

CHUNK 1: let’s talk about the circulatory system. The heart plays a central role in pumping blood throughout the body. Its basic structure includes the right
  Extracted Nouns: ['talk', 'circulatory', 'system']

CHUNK 2: structure includes the right cardiac muscle, which pushes deoxygenated blood toward the lungs, and the left cardiac muscle, which pumps oxygenated
  Extracted Nouns: ['muscle', 'structure', 'cardiac']

CHUNK 3: which pumps oxygenated blood to the body. Blood enters through the inferior vena cava, travels to the lungs via the pulmonary artery, and returns
  Extracted Nouns: ['blood', 'pumps', 'body']

CHUNK 4: pulmonary artery, and returns through the veins once it’s oxygenated. Finally, the aorta carries this oxygen-rich blood to all parts of the body.
  Extracted Nouns: ['artery', 'returns', 'veins']

=== PROCESSING INDIVIDUAL CHUNKS ===

CHUNK 1 Comparing: talk vs system
[CHUNK 1 (Internal)]
  SSIM: 0.0981
  Cosine Similarity: 0.05

In [6]:
import os
import requests
import numpy as np
import nltk
import torch
from PIL import Image
from bs4 import BeautifulSoup
from collections import Counter
from torchvision.models import resnet50, ResNet50_Weights
from torch.nn.functional import cosine_similarity
from torchvision import transforms
from io import BytesIO
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from skimage.metrics import structural_similarity as ssim
from langchain.text_splitter import RecursiveCharacterTextSplitter
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, random_split
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from sklearn.metrics import precision_score, recall_score, f1_score

# --- Config ---
CHUNK_SIZE_T = 150
CHUNK_OVERLAP = 30
SSIM_THRESHOLD = 0.75
SIMILARITY_THRESHOLD = 0.7
TARGET_SIZE = (224, 224)
OUTPUT_DIR = "extracted_images"
FEATURE_LAYER = 'layer4'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CIFAR10_BATCH_SIZE = 32  # Reduced batch size
CIFAR10_LR = 0.001
CIFAR10_EPOCHS = 1
CIFAR10_ROOT = "./data"

# --- Init ---
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)

# --- Helper Functions ---
def extract_nouns(text, num_keywords=3):
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    tagged = nltk.pos_tag(words)
    nouns = [word for (word, tag) in tagged if tag.startswith("NN") and word not in stop_words and word.isalnum()]
    return [word for word, _ in Counter(nouns).most_common(num_keywords)]

def get_or_scrape_image(keyword):
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    fname = f"{keyword.replace(' ', '_')}.jpg"
    fpath = os.path.join(OUTPUT_DIR, fname)
    if os.path.exists(fpath):
        return fpath
    try:
        url = f"https://www.google.com/search?q={keyword}&tbm=isch"
        headers = {'User-Agent': 'Mozilla/5.0'}
        res = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(res.text, 'html.parser')
        img_url = soup.find_all('img')[1].get('src')
        img_data = requests.get(img_url, timeout=10).content
        img = Image.open(BytesIO(img_data)).convert('RGB').resize(TARGET_SIZE)
        img.save(fpath)
        return fpath
    except:
        return None

def calculate_ssim(img1, img2):
    return ssim(np.array(img1.convert('L')), np.array(img2.convert('L')), data_range=255)

def get_image_features(image, model, transform):
    image_t = transform(image).unsqueeze(0).to(DEVICE)
    features = []

    def hook(module, input, output):
        features.append(output.flatten(start_dim=1).detach().cpu().numpy())

    handle = model._modules.get(FEATURE_LAYER).register_forward_hook(hook)
    model(image_t)
    handle.remove()
    return features[0]

def calculate_cosine_similarity(f1, f2):
    t1 = torch.tensor(f1).float()
    t2 = torch.tensor(f2).float()
    return cosine_similarity(t1, t2).item()

def fuse_chunks(image_paths):
    images = [Image.open(p).resize(TARGET_SIZE).convert('L') for p in image_paths]
    arrays = [np.array(im) for im in images]
    avg_array = np.mean(arrays, axis=0).astype(np.uint8)
    return Image.fromarray(avg_array).convert('RGB')

def print_similarity(title, ssim_score, cosine_score, border=False):
    print(f"[{title}]")
    print(f"  SSIM: {ssim_score:.4f}")
    print(f"  Cosine Similarity: {cosine_score:.4f}")
    print("-" * 40 if not border else "=" * 40)

# --- CIFAR-10 Training and Evaluation ---
def train_and_evaluate_cifar10(model):
    # Load CIFAR-10 dataset with separate transforms
    cifar10_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    dataset = CIFAR10(root=CIFAR10_ROOT, train=True, download=True, transform=cifar10_transform)

    # Verify dataset is not empty
    if len(dataset) == 0:
        print("CIFAR-10 dataset is empty or not correctly loaded.")
        return

    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_dataset, batch_size=CIFAR10_BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=CIFAR10_BATCH_SIZE, shuffle=False)

    # Define loss and optimizer
    criterion = CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=CIFAR10_LR)

    # Training loop
    for epoch in range(CIFAR10_EPOCHS):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Validation loop
    model.eval()
    correct = 0
    total = 0
    predicted_labels = []
    actual_labels = []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            predicted_labels.extend(predicted.cpu().numpy())
            actual_labels.extend(labels.cpu().numpy())

    accuracy = correct / total
    precision = precision_score(actual_labels, predicted_labels, average='macro')
    recall = recall_score(actual_labels, predicted_labels, average='macro')
    f1 = f1_score(actual_labels, predicted_labels, average='macro')

    print("\n=== CIFAR-10 Evaluation ===")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

# --- Main Processing ---
def main():
    text = """let’s talk about the circulatory system. The heart plays a central role in pumping blood throughout the body. Its basic structure includes the right cardiac muscle, which pushes deoxygenated blood toward the lungs, and the left cardiac muscle, which pumps oxygenated blood to the body. Blood enters through the inferior vena cava, travels to the lungs via the pulmonary artery, and returns through the veins once it’s oxygenated. Finally, the aorta carries this oxygen-rich blood to all parts of the body."""

    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE_T, chunk_overlap=CHUNK_OVERLAP)
    chunks = splitter.split_text(text.strip())

    print("=== CHUNKS AND EXTRACTED NOUNS ===")
    chunk_keywords = []
    for i, chunk in enumerate(chunks):
        keywords = extract_nouns(chunk)
        chunk_keywords.append(keywords)
        print(f"\nCHUNK {i + 1}: {chunk}")
        print(f"  Extracted Nouns: {keywords}")

    weights = ResNet50_Weights.DEFAULT
    model = resnet50(weights=weights).to(DEVICE)
    model.fc = torch.nn.Linear(model.fc.in_features, 10)
    transform = weights.transforms()

    chunk_images = []
    chunk_representations = []

    print("\n=== PROCESSING INDIVIDUAL CHUNKS ===\n")
    for idx, (chunk, keywords) in enumerate(zip(chunks, chunk_keywords)):
        image_paths = [get_or_scrape_image(k) for k in keywords if k]
        image_paths = [p for p in image_paths if p]

        if len(image_paths) < 1:
            print(f"CHUNK {idx + 1}: Insufficient images to process.")
            continue

        images = [Image.open(p).resize(TARGET_SIZE).convert('RGB') for p in image_paths]
        features = [get_image_features(img, model, transform) for img in images]

        if images:
            chunk_representations.append((images[0], images[-1], features[0], features[-1]))
            chunk_images.append(image_paths)

            if len(images) >= 2:
                print(f"CHUNK {idx + 1} Comparing: {keywords[0]} vs {keywords[-1]}")
                ssim_score = calculate_ssim(images[0], images[-1])
                cosine_score = calculate_cosine_similarity(features[0], features[-1])
                status = "GOOD" if ssim_score >= SSIM_THRESHOLD and cosine_score >= SIMILARITY_THRESHOLD else "BAD"
                print_similarity(f"CHUNK {idx + 1} (Internal)", ssim_score, cosine_score)
                print(f"  Chunk Comparison Status: {status}")
                print("-" * 40)
            else:
                print_similarity(f"CHUNK {idx + 1} (Single Image)", 1.0, 1.0)
                print(f"  Chunk Comparison Status: GOOD")
                print("-" * 40)

    all_images_flat = [item for sublist in chunk_images for item in sublist]
    fused_img = None
    fused_features = None

    if all_images_flat:
        fused_img = fuse_chunks(all_images_flat)
        fused_features = get_image_features(fused_img, model, transform)
        print_similarity("FUSED CHUNK (Representation)", 1.0, 1.0, border=True)

        print("\n=== EVALUATING CHUNKS AGAINST FUSED REPRESENTATION ===\n")
        for idx, (img_start, img_end, feat_start, feat_end) in enumerate(chunk_representations):
            print(f"CHUNK {idx + 1} Keywords: {chunk_keywords[idx]}")
            ssim_start = calculate_ssim(img_start, fused_img)
            cosine_start = calculate_cosine_similarity(feat_start, fused_features)
            print_similarity(f"CHUNK {idx + 1} (Start vs Fused)", ssim_start, cosine_start)

            ssim_end = calculate_ssim(img_end, fused_img)
            cosine_end = calculate_cosine_similarity(feat_end, fused_features)
            print_similarity(f"CHUNK {idx + 1} (End vs Fused)", ssim_end, cosine_end)

        if chunk_representations:
            avg_ssim_to_fused_start = np.mean([calculate_ssim(rep[0], fused_img) for rep in chunk_representations])
            avg_cosine_to_fused_start = np.mean([calculate_cosine_similarity(rep[2], fused_features) for rep in chunk_representations])
            avg_ssim_to_fused_end = np.mean([calculate_ssim(rep[1], fused_img) for rep in chunk_representations])
            avg_cosine_to_fused_end = np.mean([calculate_cosine_similarity(rep[3], fused_features) for rep in chunk_representations])

            overall_ssim_to_fused = (avg_ssim_to_fused_start + avg_ssim_to_fused_end) / 2
            overall_cosine_to_fused = (avg_cosine_to_fused_start + avg_cosine_to_fused_end) / 2
            fused_status = "GOOD" if overall_ssim_to_fused >= SSIM_THRESHOLD and overall_cosine_to_fused >= SIMILARITY_THRESHOLD else "BAD"
            print("\n=== FUSED CHUNK EVALUATION ===")
            print(f"Average SSIM to Fused: {overall_ssim_to_fused:.4f}")
            print(f"Average Cosine Similarity to Fused: {overall_cosine_to_fused:.4f}")
            print(f"Fused Chunk Status: {fused_status}")
        else:
            print("\n=== FUSED CHUNK EVALUATION ===")
            print("No chunks were processed to evaluate the fused chunk.")
    else:
        print("\n=== FUSED CHUNK EVALUATION ===")
        print("No images were available to create a fused chunk.")

    # Train and evaluate CIFAR-10
    train_and_evaluate_cifar10(model)

if __name__ == "__main__":
    main()


=== CHUNKS AND EXTRACTED NOUNS ===

CHUNK 1: let’s talk about the circulatory system. The heart plays a central role in pumping blood throughout the body. Its basic structure includes the right
  Extracted Nouns: ['talk', 'circulatory', 'system']

CHUNK 2: structure includes the right cardiac muscle, which pushes deoxygenated blood toward the lungs, and the left cardiac muscle, which pumps oxygenated
  Extracted Nouns: ['muscle', 'structure', 'cardiac']

CHUNK 3: which pumps oxygenated blood to the body. Blood enters through the inferior vena cava, travels to the lungs via the pulmonary artery, and returns
  Extracted Nouns: ['blood', 'pumps', 'body']

CHUNK 4: pulmonary artery, and returns through the veins once it’s oxygenated. Finally, the aorta carries this oxygen-rich blood to all parts of the body.
  Extracted Nouns: ['artery', 'returns', 'veins']

=== PROCESSING INDIVIDUAL CHUNKS ===

CHUNK 1 Comparing: talk vs system
[CHUNK 1 (Internal)]
  SSIM: 0.0981
  Cosine Similarity: 0.05