In [0]:
%pip install opencv-python

In [0]:
from pyspark.sql.types import StructType, BinaryType, StructField
import requests
import cv2
import numpy as np
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt
from pyspark.sql.window import Window
from pyspark.sql.functions import length, lit, col, row_number

In [0]:
photos_df = spark.table("nasa_rover_gold.photos")

In [0]:
def fetch_image(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.content
    else:
        raise Exception(f"Failed to fetch image from {url}")

In [0]:
def display_image(image_data):
    # Convert image data to a numpy array
    image_np = np.frombuffer(image_data, np.uint8)
    # Decode the numpy array into an image
    image = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
    # Convert BGR to RGB for display with matplotlib
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Use matplotlib to display the image
    plt.imshow(image_rgb)
    plt.axis('off')  # Hide axis
    plt.show()

In [0]:
filtered_photo_df = photos_df.filter(col("photo_id") == 592224)


In [0]:
img_url = filtered_photo_df.select("img_src").collect()[0][0]

In [0]:
print(img_url)

In [0]:
image_data = fetch_image(img_url)
# Define schema
schema = StructType([
    StructField("image_data", BinaryType(), False)
])

# Create a DataFrame with the image data
image_df = spark.createDataFrame([(image_data,)], schema)

# Show DataFrame
image_df.show()

In [0]:
display_image(image_data)

In [0]:
# Convert image data to a numpy array
image_np = np.frombuffer(image_data, np.uint8)
# Decode the numpy array into an image
image = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

df_with_size = image_df.withColumn("filesize", length("image_data"))

mean_brightness = np.mean(gray_image)
std_contrast = np.std(gray_image)

final_df = df_with_size.withColumn("mean_brightness", lit(mean_brightness)).withColumn("std_contrast", lit(std_contrast))

final_df.display()

In [0]:
image_stats = final_df.select("filesize", "mean_brightness", "std_contrast")
image_stats.display()

In [0]:
def plot_color_histogram(image):
    # Compute histograms for each channel
    color = ('r', 'g', 'b')  # Red, Green, Blue
    plt.figure(figsize=(12, 6))

    for i, col in enumerate(color):
        # Calculate histogram
        histogram = cv2.calcHist([image], [i], None, [256], [0, 256])
        plt.plot(histogram, color=col)
        plt.xlim([0, 256])
    
    plt.title('Color Histogram')
    plt.xlabel('Pixel Value')
    plt.ylabel('Frequency')
    plt.grid()
    plt.show()

# Plot the histogram
plot_color_histogram(image_rgb)


In [0]:
from scipy.fftpack import fft2, fftshift

def compute_frequency_noise(image):
    # Convert to grayscale if needed

    
    # Apply Fourier Transform
    f_transform = fft2(gray_image)
    f_transform_shifted = fftshift(f_transform)
    
    # Compute magnitude spectrum
    magnitude_spectrum = np.abs(f_transform_shifted)
    
    # Compute noise level from magnitude spectrum
    noise_level = np.mean(magnitude_spectrum)
    return noise_level

# Load the image


# Compute frequency domain noise
frequency_noise = compute_frequency_noise(image_rgb)
print(f"Frequency Domain Noise Level: {frequency_noise}")


In [0]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from scipy.fftpack import fft2, fftshift

def plot_frequency_domain(image):
    # Convert to grayscale if needed
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply Fourier Transform
    f_transform = fft2(gray_image)
    f_transform_shifted = fftshift(f_transform)
    
    # Compute magnitude spectrum
    magnitude_spectrum = np.abs(f_transform_shifted)
    
    # Plot magnitude spectrum
    plt.figure(figsize=(12, 6))
    plt.imshow(np.log1p(magnitude_spectrum), cmap='gray', aspect='auto')
    
    # Add titles and labels
    plt.title('Magnitude Spectrum of Frequency Domain')
    plt.xlabel('Frequency X')
    plt.ylabel('Frequency Y')
    
    # Add colorbar to show the magnitude values
    cbar = plt.colorbar()
    cbar.set_label('Magnitude')

    plt.grid(True)
    plt.show()



plot_frequency_domain(image_rgb)