In [89]:
import numpy as np
import pywt
import librosa
from scipy.io import wavfile
from PIL import Image
import sys
import soundfile as sf

In [90]:
audio_path="demo.wav"
stego_path="stego.wav"
secret_image_path="secret.jpg"
extracted_image_path="extracted.jpg"

DELTA=0.001
DWT_LEVEL=3
WAVELET="haar"
seed_left=42
seed_right=44

In [91]:
def image_to_bits(image_path):
  img=Image.open(image_path).convert('RGB')
  arr=np.array(img,dtype=np.uint8)
  height,width,channels=arr.shape
  flatten_arr=arr.flatten()
  flatten_arr_bits=np.unpackbits(flatten_arr,)
  grouped_flattened=np.char.add(flatten_arr_bits[::2].astype(str),flatten_arr_bits[1::2].astype(str))
  return grouped_flattened

In [92]:
def bits_to_image(bit_array,img_shape):
    bit_array=np.array(bit_array)
    return bit_array.reshape(img_shape)

In [93]:
def coeff_modification(coeff,embed_value,delta=DELTA):
    dither=[i*delta/8 for i in range(1,8,2)]
    d=0
    if(embed_value=="00"):
        d= dither[0]
    if(embed_value=="01"):
        d= dither[1]
    if(embed_value=="10"):
        d= dither[2]
    if(embed_value=="11"):
        d= dither[3]
    
    return delta*np.round((coeff-d)/delta)+d

In [94]:
def dwt_on_audio(audio_path,wavelet=WAVELET,dwt_level=DWT_LEVEL):
    audio_stereo,sr=librosa.load(audio_path,mono=False)
    audio_left=audio_stereo[0]
    audio_right=audio_stereo[1]
    if(len(audio_left)%2!=0):
        audio_left=audio_left[:-1]
        audio_right=audio_right[:-1]
    
    
    coeffs_left=pywt.wavedec(audio_left,wavelet,level=dwt_level,mode="periodization")

    coeffs_right=pywt.wavedec(audio_right,wavelet,level=dwt_level,mode="periodization")

    
    return coeffs_left,coeffs_right,sr



In [95]:
def embedding_function(coeffs,embedding_location,embed_array):
    return_coeffs=coeffs
    for i,index in enumerate(embedding_location):
        return_coeffs[index]=(coeff_modification(coeffs[index],embed_array[i],DELTA))
    return return_coeffs

In [96]:
def data_split(embedding_data,coeff_len):
    embedding_data=np.array(embedding_data)
    
    cumulative_len=[]
    for i in range(1,len(coeff_len)):
        cumulative_len.append(sum(coeff_len[:i]))
    split_data=np.split(embedding_data,cumulative_len)
    return split_data
        


In [104]:
image_data=image_to_bits(secret_image_path)
coeffs_left,coeffs_right,sr=dwt_on_audio(audio_path,WAVELET,DWT_LEVEL)
d_coeffs_left=(coeffs_left[1:])
d_coeffs_right=(coeffs_right[1:])

level_arr=np.arange(0,DWT_LEVEL)

possible_locations=sum([arr.shape[0] for arr in d_coeffs_left])

payload_len=len(image_data)
print(payload_len)

coeff_len=[]

#print(level_arr)

if (payload_len>possible_locations*2):
    sys.exit(payload_len)

for arr in d_coeffs_left:
    coeff_len.append(len(arr))


embed_location_left=[]
embed_location_right=[]
embed_proportion=[]
np.random.seed(seed_left)

for i in level_arr:
    size=int(round((payload_len/2)*1/DWT_LEVEL))
    embed_proportion.append(size)
    embed_location_left.append(np.random.randint(1,coeff_len[i],size))#proportion to be changed

np.random.seed(seed_right)
for i in level_arr:
    size=int(round((payload_len/2)*1/DWT_LEVEL))

    embed_location_right.append(np.random.randint(1,coeff_len[i],size ))

image_data_left=image_data[:int(len(image_data)/2)]
image_data_right=image_data[int(len(image_data)/2):]


split_data_left=data_split(image_data_left,embed_proportion)
split_data_right=data_split(image_data_right,embed_proportion)


modified_coeffs_left=[]
modified_coeffs_right=[]

modified_coeffs_left.append(coeffs_left[0])
modified_coeffs_right.append(coeffs_right[0])


for i in level_arr:
    modified_coeffs_left.append(embedding_function(d_coeffs_left[i],embed_location_left[i],split_data_left[i]))
    modified_coeffs_right.append(embedding_function(d_coeffs_right[i],embed_location_right[i],split_data_right[i]))

print(modified_coeffs_left)

#embedding part completed
stego_audio_left=pywt.waverec(modified_coeffs_left,WAVELET)
stego_audio_right=pywt.waverec(modified_coeffs_right,WAVELET)

min_len = min( len(stego_audio_left), len(stego_audio_right))
reconstructed_stereo = np.column_stack((stego_audio_left[:min_len], stego_audio_right[:min_len]))

sf.write(stego_path,reconstructed_stereo,sr)



19200
[array([-2.5115414e-02,  1.2909654e-02, -7.3460117e-04, ...,
        6.6544175e-09,  5.3930873e-09,  5.3169041e-10], dtype=float32), array([ 6.4559486e-03,  5.2446658e-03, -2.8306047e-02, ...,
       -1.2894787e-09,  1.3846639e-09,  8.3153101e-10], dtype=float32), array([ 8.1895245e-03, -3.0550333e-03, -1.4881852e-02, ...,
        5.2224902e-10,  3.9444367e-10,  1.9238562e-10], dtype=float32), array([ 3.0554186e-03,  1.1655008e-03,  8.1843790e-04, ...,
       -9.6714281e-09, -1.0411925e-08, -9.7812745e-09], dtype=float32)]


In [None]:
#extraction part
