In [1]:
import numpy as np
import pywt
import librosa
from PIL import Image
import sys
import soundfile as sf

In [2]:
audio_path="secret.mp3"
stego_path="stego.wav"
secret_image_path="secret.jpg"
extracted_image_path="extracted.jpg"

DELTA=0.001
DWT_LEVEL=3
WAVELET="db4"
seed_left=42
seed_right=44

In [3]:
def image_to_bits(image_path):
  img=Image.open(image_path).convert('RGB')
  arr=np.array(img,dtype=np.uint8)
  img_shape=arr.shape
  flatten_arr=arr.flatten()
  flatten_arr_bits=np.unpackbits(flatten_arr)
  grouped_flattened=np.char.add(flatten_arr_bits[::2].astype(str),flatten_arr_bits[1::2].astype(str))
  return grouped_flattened,img_shape

In [4]:
def bits_to_image(bit_array,img_shape):
    bit_array=np.array(bit_array)
    bit_array=np.packbits(bit_array)
    bit_array.reshape(-1,3)
    
    return bit_array.reshape(img_shape)

In [5]:
def coeff_modification(coeff,embed_value,delta=DELTA):
    dither=[i*delta/8 for i in range(1,8,2)]
    d=0
    if(embed_value=="00"):
        d= dither[0]
    if(embed_value=="01"):
        d= dither[1]
    if(embed_value=="10"):
        d= dither[2]
    if(embed_value=="11"):
        d= dither[3]
    
    return delta*np.round((coeff-d)/delta)+d

In [6]:
def dwt_on_audio(audio_path,wavelet=WAVELET,dwt_level=DWT_LEVEL):
    audio_stereo,sr=librosa.load(audio_path,mono=False)
    audio_left=audio_stereo[0]
    audio_right=audio_stereo[1]
    if(len(audio_left)%2!=0):
        audio_left=audio_left[:-1]
        audio_right=audio_right[:-1]
    
    
    coeffs_left=pywt.wavedec(audio_left,wavelet,level=dwt_level,mode="periodization")

    coeffs_right=pywt.wavedec(audio_right,wavelet,level=dwt_level,mode="periodization")

    
    return coeffs_left,coeffs_right,sr



In [7]:
def embedding_function(coeffs,embedding_location,embed_array):
    return_coeffs=coeffs
    for i,index in enumerate(embedding_location):
        return_coeffs[index]=(coeff_modification(coeffs[index],embed_array[i],DELTA))
    return return_coeffs

In [8]:
def data_split(embedding_data,coeff_len):
    embedding_data=np.array(embedding_data)
    
    cumulative_len=[]
    for i in range(1,len(coeff_len)):
        cumulative_len.append(sum(coeff_len[:i]))
    split_data=np.split(embedding_data,cumulative_len)
    return split_data


In [9]:
image_data,img_shape=image_to_bits(secret_image_path)

coeffs_left,coeffs_right,sr=dwt_on_audio(audio_path,WAVELET,DWT_LEVEL)
d_coeffs_left=(coeffs_left[1:])
d_coeffs_right=(coeffs_right[1:])

level_arr=np.arange(0,DWT_LEVEL)

possible_locations=sum([arr.shape[0] for arr in d_coeffs_left])

payload_len=len(image_data)

coeff_len=[]

if (payload_len>possible_locations*2):
    sys.exit(payload_len)

for arr in d_coeffs_left:
    coeff_len.append(len(arr))


embed_location_left=[]
embed_location_right=[]
embed_proportion=[]
np.random.seed(seed_left)

for i in level_arr:
    size=int(round((payload_len/2)*1/DWT_LEVEL))
    embed_proportion.append(size)
    embed_location_left.append(np.random.randint(1,coeff_len[i],size))#proportion to be changed

np.random.seed(seed_right)


for i in level_arr:
    size=int(round((payload_len/2)*1/DWT_LEVEL))

    embed_location_right.append(np.random.randint(1,coeff_len[i],size ))

image_data_left=image_data[:int(len(image_data)/2)]
image_data_right=image_data[int(len(image_data)/2):]


split_data_left=data_split(image_data_left,embed_proportion)
split_data_right=data_split(image_data_right,embed_proportion)


modified_coeffs_left=[]
modified_coeffs_right=[]

modified_coeffs_left.append(coeffs_left[0])
modified_coeffs_right.append(coeffs_right[0])


for i in level_arr:
    modified_coeffs_left.append(embedding_function(d_coeffs_left[i],embed_location_left[i],split_data_left[i]))
    modified_coeffs_right.append(embedding_function(d_coeffs_right[i],embed_location_right[i],split_data_right[i]))


stego_audio_left=pywt.waverec(modified_coeffs_left,WAVELET)
stego_audio_right=pywt.waverec(modified_coeffs_right,WAVELET)

min_len = min( len(stego_audio_left), len(stego_audio_right))
reconstructed_stereo = np.column_stack((stego_audio_left[:min_len], stego_audio_right[:min_len]))

sf.write(stego_path,reconstructed_stereo,sr)



ValueError: coefficient shape mismatch

In [None]:
def extract_coeff(stego_coeff_val):
    dither=[i*DELTA/8 for i in range(1,8,2)]
    q=[(DELTA*round((stego_coeff_val-d)/DELTA)+d) for d in dither]
    err_val=[abs(stego_coeff_val-q_val) for q_val in q]
    min_index=err_val.index(min(err_val))
    possible_value=["00","01","10","11"]
    return possible_value[min_index]

    

In [None]:
def extraction_payload(stego_coeff,embedding_location):
    extracted_data=[]
    # for idx,coeff in enumerate(stego_coeff):
    #     for index in embedding_location[idx]:
    #         extracted_data.append(extract_coeff(coeff[index]))

    extracted_data=[extract_coeff(coeff[index]) for idx,coeff in enumerate(stego_coeff) for index in embedding_location[idx]]
    flat_extracted = [int(digit) for binary in extracted_data for digit in binary]
    return flat_extracted

        

In [None]:
#extraction part
stego_coeff_left,stego_coeff_right,sr=dwt_on_audio(stego_path,WAVELET,DWT_LEVEL)
stego_d_left=stego_coeff_left[1:]
stego_d_right=stego_coeff_right[1:]

extracted_img_left=[extraction_payload(stego_d_left,embed_location_left)]

extracted_img_right=[extraction_payload(stego_d_right,embed_location_right)]

extracted_img_arr=extracted_img_left+extracted_img_right
extracted_img_arr=np.array(extracted_img_arr)

extracted_img_flattened=extracted_img_arr.flatten() #here the division proportion is same so flatten can be used


extracted_image=bits_to_image(extracted_img_flattened,img_shape)

stego_img=Image.fromarray(extracted_image)
stego_img.save(extracted_image_path)










Original Level 0, Shape: (318755,)
Original Level 1, Shape: (637510,)
Original Level 2, Shape: (1275019,)
Original Level 3, Shape: (2550038,)
Level 0, Shape: (318755,)
Level 1, Shape: (318755,)
Level 2, Shape: (637510,)
Level 3, Shape: (1275019,)
Level 4, Shape: (2550038,)
