In [4]:
import pandas as pd

# Load dataset with correct file path format
df = pd.read_csv("/content/youtube.csv")  # Use 'r' or '\\'

# Convert 'Subscriptions' to numeric
df["Subscriptions"] = pd.to_numeric(df["Subscriptions"].str.extract(r'(\d+)')[0], errors='coerce')

# Convert 'Uploads' to numeric (removes commas)
df["Uploads"] = pd.to_numeric(df["Uploads"].str.replace(',', '', regex=True), errors='coerce')

# Convert 'Views' to numeric (handling commas and missing values correctly)
df["Views"] = (
    df["Views"]
    .str.replace(",", "", regex=True)  # Remove commas
    .replace("--", None)  # Convert "--" to None
    .replace("", None)  # Convert empty strings to None
    .astype("Int64")  # Convert to nullable integer type
)

# Print corrected datatypes
print(df.dtypes)


Rank              object
Grade             object
Ch_name           object
Uploads          float64
Subscriptions      int64
Views              Int64
dtype: object


In [5]:
df[(df['Ch_name'] == 'T-Series') | (df['Ch_name'] == 'SAB TV') | (df['Ch_name'] == 'Zee TV')]

Unnamed: 0,Rank,Grade,Ch_name,Uploads,Subscriptions,Views
0,1st,A++,T-Series,14297.0,135,104724369854
7,8th,A++,Zee TV,98621.0,43,41544259461
10,11th,A++,SAB TV,23812.0,29,25597492503


In [7]:
df.groupby(['Ch_name','Subscriptions', 'Views']).agg(Count=('Subscriptions','count')).sort_values(by="Subscriptions", ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Count
Ch_name,Subscriptions,Views,Unnamed: 3_level_1
ببجي بالعربي,991,141803352,1
Телеканал Звезда,990,181693400,1
3D Music India,960,85756622,1
VSRAP,947,84033455,1
Siyah Giyen Genç,947,212758305,1
MUSIC BANGLA TV,943,227019598,1
SO LY DA,908,227038807,1
BillieEilishVEVO,876,4369175365,1
Odia E News,838,265026133,1
Max Steel,825,188365,1


In [8]:
df.groupby('Grade').agg(Average_subscriber=('Subscriptions', "mean"))

Unnamed: 0_level_0,Average_subscriber
Grade,Unnamed: 1_level_1
A,75.404598
A+,40.061224
A++,48.818182


In [9]:
df[df['Grade'] == 'A++'].sort_values(by='Views', ascending=False).head(5)

Unnamed: 0,Rank,Grade,Ch_name,Uploads,Subscriptions,Views
0,1st,A++,T-Series,14297.0,135,104724369854
1,2nd,A++,Cocomelon - Nursery Rhymes,517.0,78,57054290512
4,5th,A++,SET India,37017.0,69,52149505781
7,8th,A++,Zee TV,98621.0,43,41544259461
6,7th,A++,Movieclips,35226.0,36,35055807085
