In [16]:
import av
import numpy as np
import matplotlib.pyplot as plt
import logging

logger = logging.getLogger(__name__)

class VideoCodec:
    def __init__(self, block_size=16, search_radius=8):
        self.block_size = block_size
        self.search_radius = search_radius
        
    def read_video(self, input_path):
        """Read video file and extract RGB frames"""
        try:
            container = av.open(input_path)
            frames = []
            total_frames = container.streams.video[0].frames
            # Get video parameters for saving later
            self.fps = container.streams.video[0].average_rate
            self.width = container.streams.video[0].width
            self.height = container.streams.video[0].height
            
            logger.info(f"Reading {total_frames} frames...")
            
            for i, frame in enumerate(container.decode(video=0)):
                # Convert frame to numpy array in RGB format
                img = frame.to_ndarray(format='rgb24')
                # Ensure correct data type
                img = img.astype(np.float32)
                frames.append(img)
                
                # Log progress
                if i % 10 == 0:
                    logger.info(f"Processed {i}/{total_frames} frames")
            
            logger.info("Video reading completed")
            return frames
            
        except Exception as e:
            logger.error(f"Error reading video: {str(e)}")
            raise
    
    def logarithmic_search(self, curr_block, ref_frame, x, y, channel):
        """Logarithmic search for motion estimation for a single channel"""
        best_x, best_y = x, y
        best_sat = float('inf')
        step = self.search_radius
        
        while step >= 1:
            for dx in [-step, 0, step]:
                for dy in [-step, 0, step]:
                    new_x = x + dx
                    new_y = y + dy
                    
                    if (new_x < 0 or new_x + self.block_size > ref_frame.shape[1] or
                        new_y < 0 or new_y + self.block_size > ref_frame.shape[0]):
                        continue
                        
                    ref_block = ref_frame[new_y:new_y+self.block_size, 
                                        new_x:new_x+self.block_size, channel]
                    sat = np.sum(np.abs(curr_block - ref_block))
                    
                    if sat < best_sat:
                        best_sat = sat
                        best_x = new_x
                        best_y = new_y
            
            step //= 2
            
        return best_x - x, best_y - y
    
    def motion_estimation(self, curr_frame, ref_frame):
        """Perform motion estimation using logarithmic search for RGB frames"""
        height, width = curr_frame.shape[:2]
        mv_x = np.zeros((height//self.block_size, width//self.block_size))
        mv_y = np.zeros((height//self.block_size, width//self.block_size))
        
        # Use luminance (approximated from RGB) for motion estimation
        curr_y = 0.299 * curr_frame[:,:,0] + 0.587 * curr_frame[:,:,1] + 0.114 * curr_frame[:,:,2]
        ref_y = 0.299 * ref_frame[:,:,0] + 0.587 * ref_frame[:,:,1] + 0.114 * ref_frame[:,:,2]
        
        for i in range(0, height-self.block_size+1, self.block_size):
            for j in range(0, width-self.block_size+1, self.block_size):
                curr_block = curr_y[i:i+self.block_size, j:j+self.block_size]
                dx, dy = self.logarithmic_search(curr_block, ref_y, j, i, None)
                mv_x[i//self.block_size, j//self.block_size] = dx
                mv_y[i//self.block_size, j//self.block_size] = dy
                
        return mv_x, mv_y
    
    def motion_compensation(self, ref_frame, mv_x, mv_y):
        """Perform motion compensation using motion vectors for RGB frames"""
        height, width = ref_frame.shape[:2]
        compensated = np.zeros_like(ref_frame)
        
        for i in range(mv_x.shape[0]):
            for j in range(mv_x.shape[1]):
                ref_x = j * self.block_size + int(mv_x[i, j])
                ref_y = i * self.block_size + int(mv_y[i, j])
                
                if (ref_x >= 0 and ref_x + self.block_size <= width and
                    ref_y >= 0 and ref_y + self.block_size <= height):
                    compensated[i*self.block_size:(i+1)*self.block_size,
                            j*self.block_size:(j+1)*self.block_size] = \
                        ref_frame[ref_y:ref_y+self.block_size,
                                ref_x:ref_x+self.block_size]
                        
        return compensated
    
    def quantize(self, data, Q):
        """Quantize the data with step size Q"""
        return np.round(data / Q) * Q
    
    def compute_psnr(self, original, compressed):
        """Compute PSNR between original and compressed RGB frames"""
        mse = np.mean((original - compressed) ** 2)
        if mse < 1e-10:
            return 100.0
        max_pixel = 255.0
        psnr = 20 * np.log10(max_pixel / np.sqrt(mse))
        return psnr
    
    def save_video(self, frames, output_path):
        """Save reconstructed frames as AVI video"""
        try:
            # Открываем выходной контейнер
            container = av.open(output_path, mode='w')
            
            # Создаем видеопоток с корректными параметрами
            stream = container.add_stream('mpeg4', rate=int(self.fps))
            stream.width = self.width
            stream.height = self.height
            stream.pix_fmt = 'yuv420p'
            
            # Установим битрейт
            stream.bit_rate = 1000000  # 1 Mbps
            # Установим другие важные параметры
            stream.time_base = 1/self.fps
            
            logger.info(f"Saving {len(frames)} frames to {output_path}")
            
            for i, frame_data in enumerate(frames):
                # Ensure frame data is in uint8 format and correct range
                frame_data = np.clip(frame_data, 0, 255).astype(np.uint8)
                
                # Create VideoFrame
                frame = av.VideoFrame.from_ndarray(frame_data, format='rgb24')
                frame.pts = i
                
                for packet in stream.encode(frame):
                    container.mux(packet)
                
                if i % 10 == 0:
                    logger.info(f"Saved frame {i}/{len(frames)}")
            
            # Flush the stream
            for packet in stream.encode(None):
                container.mux(packet)
            
            # Close the container
            container.close()
            logger.info("Video saving completed")
            
        except Exception as e:
            logger.error(f"Error saving video: {str(e)}")
            raise

    def encode_video(self, input_path, q_values):
        """Encode video with different quantization steps"""
        frames = self.read_video(input_path)
        results = []
        
        for Q in q_values:
            logger.info(f"Processing with quantization step Q={Q}")
            psnr_values = []
            compression_ratios = []
            diff_frames = []
            reconstructed_frames = []
            
            for i in range(1, len(frames)):
                # Motion estimation
                mv_x, mv_y = self.motion_estimation(frames[i], frames[i-1])
                
                # Motion compensation
                predicted = self.motion_compensation(frames[i-1], mv_x, mv_y)
                
                # Residual
                residual = frames[i] - predicted
                
                # Quantization with scaling for each channel
                scaled_residual = residual / 255.0
                quantized_residual = self.quantize(scaled_residual, Q/255.0)
                residual_reconstructed = quantized_residual * 255.0
                
                # Reconstruction
                reconstructed = predicted + residual_reconstructed
                reconstructed = np.clip(reconstructed, 0, 255)
                
                # Calculate metrics
                psnr = self.compute_psnr(frames[i], reconstructed)
                psnr_values.append(psnr)
                
                # Store frames
                diff_frames.append(residual)
                reconstructed_frames.append(reconstructed)
                
                # Estimate compression ratio (considering all channels)
                orig_size = frames[i].size * 8
                mv_bits = (mv_x.size + mv_y.size) * 8
                residual_bits = np.count_nonzero(quantized_residual) * 8
                compressed_size = mv_bits + residual_bits
                compression_ratio = orig_size / (compressed_size + 1)
                compression_ratios.append(compression_ratio)
                
                if i % 10 == 0:
                    logger.info(f"Processed frame {i}/{len(frames)-1} for Q={Q}")
            
            results.append({
                'Q': Q,
                'psnr': np.mean(psnr_values),
                'compression_ratio': np.mean(compression_ratios),
                'diff_frames': diff_frames,
                'reconstructed_frames': reconstructed_frames
            })
        
        return results

def main():
    # Initialize codec
    codec = VideoCodec(block_size=16, search_radius=8)
    
    # Input and output video paths
    input_path = "lr1_1.avi"
    output_path = "output.avi"  # Изменено имя выходного файла
    
    # Define quantization steps
    q_values = [1, 4, 8, 16, 32]
    
    print("Starting video encoding...")
    
    # Encode video
    results = codec.encode_video(input_path, q_values)
    
    # Save reconstructed video for Q=8
    middle_result = next(r for r in results if r['Q'] == 8)
    codec.save_video(middle_result['reconstructed_frames'], output_path)
    
    # Plot results
    codec.plot_results(results)
    
    # Print metrics
    print("\nEncoding Results:")
    print("Q\tPSNR (dB)\tCompression Ratio")
    print("-" * 40)
    for result in results:
        print(f"{result['Q']}\t{result['psnr']:.2f}\t\t{result['compression_ratio']:.2f}")

if __name__ == "__main__":
    main()

Starting video encoding...


Error saving video: [Errno 22] Invalid argument


ValueError: [Errno 22] Invalid argument