In [15]:
import pandas as pd
import numpy as np

In [None]:
input_file = "" # Your Path
df = pd.read_csv(input_file)
df.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import os

def visualize_bin_distribution(input_file):
    """ check the viewCount Distribution """
    
    if not os.path.exists(input_file):
        print(f"[ERROR] Input File '{input_file}'Not Found. check path")
        return

    try:
        # 1. data load
        df = pd.read_csv(input_file, encoding='utf-8-sig', dtype={'videoId': str})

        # 2. duplicate row delete
        df.drop_duplicates(subset=['videoId'], inplace=True)
        
        bin_counts = df['view_bin'].value_counts().reset_index()
        bin_counts.columns = ['view_bin', 'count']
        
        # 4. view_bin oreder
        ordered_bins = [
            '0~1000', '1000~10000', '10000~100000', 
            '100000~500000', '500000~1000000', '1000000~10000000'
        ]
        
        bin_counts['view_bin'] = pd.Categorical(
            bin_counts['view_bin'], 
            categories=ordered_bins, 
            ordered=True
        )
        bin_counts = bin_counts.sort_values('view_bin')
        
        plt.figure(figsize=(14, 8))
        
        bars = plt.bar(bin_counts['view_bin'], bin_counts['count'], color='#1E88E5')

        for bar in bars:
            yval = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2, yval + 100, 
                     int(yval), ha='center', va='bottom', fontsize=12)

        plt.title("Data Distribution by View Bin", fontsize=16)
        plt.xlabel('View Bin', fontsize=14)
        plt.ylabel('Count', fontsize=14)
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        plt.xticks(rotation=20)
        plt.tight_layout()
        
        # save file
        output_path = 'view_bin_distribution.png'
        plt.savefig(output_path)
        print(f"\n[INFO] visualization result saved in '{output_path}' .")
        plt.show()

    except Exception as e:
        print(f"[ERROR]: {e}")

In [None]:
visualize_bin_distribution(input_file)