In [26]:
import cv2 
import pydub 
import pandas as pd
import numpy as np
import librosa as lib
from scipy.io import wavfile as wav
from scipy.signal import stft
import IPython
import soundfile as sf

# Question: 1

In this question, you are tasked with enhancing the resolution of a video. The goal is to improve the quality of individual frames. You are expected to use basic algorithms for achieving this goal. 

### Task 1: Frame Extraction

Extract frames from the video using OpenCV.

### Task 2: Resolution Enhancement

Apply the following enhancement algorithms to scale the extracted frames by a factor of 2:

1) Nearest-neighbor Interpolation <br>
2) Bilinear Interpolation <br>
3) Bicubic Interpolation <br>

Explore these approaches by your self. These are just builtin parameters in resize function.
https://theailearner.com/2018/11/15/image-interpolation-using-opencv-python/

### Task 3: Video Reconstruction

After enhancing the frames, reconstruct the video by merging the enhanced frames while ensuring that the frame rate of the reconstructed video matches that of the original video. Generate a separate video for each interpolation method.

<b>Bonus</b>: Apply a self-selected algorithm to improve video quality. 

In [79]:
vid = cv2.VideoCapture('Q1.mp4')
fps = int(vid.get(cv2.CAP_PROP_FPS))
heightframe = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
widthframe = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
vid_type = cv2.VideoWriter_fourcc(*'mp4v')
enhance_file = cv2.VideoWriter('intercubic.mp4', vid_type, 30, (widthframe * 2, heightframe * 2))

while True:
    ret, frame = vid.read()
    if ret:
        outframe = cv2.resize(frame, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
        enhance_file.write(outframe)
        cv2.imshow('Frame', outframe)
        if cv2.waitKey(30) & 0xFF == 27:
            break
    else:
        break
vid.release()
enhance_file.release()
cv2.destroyAllWindows()

In [41]:
# orgframearr

In [80]:

vid = cv2.VideoCapture('Q1.mp4')
vid_type = cv2.VideoWriter_fourcc(*'XVID')
enhance_file = cv2.VideoWriter('near_neigh.mp4',vid_type,30,(widthframe*2,heightframe*2))
while True:
    ret,frame = vid.read()
    if ret == True:
        outframe = cv2.resize(frame,None, fx = 2, fy = 2, interpolation = cv2.INTER_NEAREST)
        enhance_file.write(outframe)
    else:
        break
    if cv2.waitKey(30) & 0xFF == 27:
        break
vid.release()
enhance_file.release()
cv2.destroyAllWindows()

In [85]:

vid = cv2.VideoCapture('Q1.mp4')
vid_type = cv2.VideoWriter_fourcc(*'XVID')
enhance_file = cv2.VideoWriter('linear.mp4',vid_type,29,(widthframe*2,heightframe*2))
while True:
    ret,frame = vid.read()
    if ret == True:
        b = cv2.resize(frame,None, fx = 2, fy = 2, interpolation = cv2.INTER_LINEAR)
        enhance_file.write(b)
    else:
        break
    if cv2.waitKey(30) & 0xFF == 27:
        break
vid.release()
enhance_file.release()
cv2.destroyAllWindows()

In [94]:
#### I dont know name of this exact algo but this looked like it produced better results than others

vid = cv2.VideoCapture('Q1.mp4')
vid_type = cv2.VideoWriter_fourcc(*'XVID')
enhance_file = cv2.VideoWriter('AREA.mp4',vid_type,29,(widthframe*10,heightframe*10))
while True:
    ret,frame = vid.read()
    if ret == True:
        b = cv2.resize(frame,None, fx = 10, fy = 10, interpolation = cv2.INTER_LANCZOS4)  ## this one
        enhance_file.write(b)
    else:
        break
    if cv2.waitKey(30) & 0xFF == 27:
        break
vid.release()
enhance_file.release()
cv2.destroyAllWindows()

## 

# Question: 2

In this question, you are tasked with enhancing the audio quality of the video. Follow the given procedure to increase audio quality.

### Step 1: Short-Time Fourier Transform (STFT)
Compute the Short-Time Fourier Transform (STFT) of the audio signal. This operation transforms the audio into the frequency domain over short time intervals.

### Step 2: Magnitude and Phase Extraction
From the STFT, get the magnitude and phase using the np.abs() and np.angle() functions.

### Step 3: Noise Profile Creation
Load the noisy audio and calculate its STFT and magnitude from the STFT. Afterward, compute the average magnitude of the audio along axis=1 to generate a noise profile. This profile is essential for identifying and removing noise.

### Step 4: Adjusting with a Hyperparameter
Multiply the noise profile array by a hyperparameter represented as alpha. Experiment with various values of alpha to fine-tune the results. A good starting point is to set alpha to 2.

### Step 5: Audio Denoising
Subtract the mean noise array from the original audio (You may need to adjust the dimensions of the mean noise array to match with original audio). Ensure that any negative values in the resulting array are replaced with 0. This step effectively reduces noise in the audio.

### Step 6: Incorporating Phase Information
Multiply the modified audio by the complex exponential of the phase information obtained in step 3, which can be expressed as np.exp(1.0j * phase).

### Step 7: Inverse Short-Time Fourier Transform (ISTFT)
Reconstruct the audio by performing the Inverse Short Time Fourier Transform (ISTFT) on the modified audio signal using librosa. Save the resulting audio file.

In [56]:
ndata , nsr = lib.load("Q2-Noise.wav")
odata, osr  = lib.load('Q2.wav')

In [202]:

Nst  = lib.stft(ndata)
Ost = lib.stft(odata)
omag = np.abs(Ost)
nmag = np.abs(Nst)                   ## absolute values of 
oang  =np.angle(Ost)             # original audio angle
nang = np.angle(Nst)        # noise angle
# print(nmag)
# mean mag
m_mag = np.mean(nmag,1)
hyperparam = m_mag* 4
# print(hyperparam.shape)
# print(omag.shape)
# print(oang.shape)

In [203]:
# omag = omag.reshape(587,1025)
# omag.shape            ### this comes with some noise

In [209]:
trans = omag.T     # taking transpose so shapes are equal when subtracting
# print(trans.shape)      # 587
dnoise = []
for i in range(len(trans)):
    t = trans[i] - hyperparam      ## as hyper param is already in a simgle dimension we will iterate over trans to minus it from magnitude of orignial file
    dnoise.append(t)
dnoise = np.array(dnoise)
print(dnoise.shape)
dnoise = dnoise.T
dnoise[dnoise<0] = 0
# dnoise

(587, 1025)
(587, 1025)


In [205]:
# oang = oang.reshape(587,1025)

In [210]:
phase = dnoise*np.exp(1.0j*oang)

numpy.ndarray

In [207]:
mod = lib.istft(phase)
sf.write('dnoisefile.wav',mod,osr)

# Question: 3

For this task, use whisper inference to generate text from the audio file. Use any translation library to translate the text into another language, and then utilize a TTS system to produce audio from the translated text. Supported Languages are :English, Urdu, Arabic