# Netflix Data Cleaning Task

### Step 1: Import Libraries and Load Dataset

In [None]:

import pandas as pd

# Load dataset
df = pd.read_csv("../dataset/netflix_titles.csv")
print(df.shape)
df.head()


### Step 2: Handle Missing Values

In [None]:

for col in ["director", "cast", "country", "rating", "duration"]:
    df[col] = df[col].fillna("Unknown")

df["date_added"] = df["date_added"].fillna(df["date_added"].mode()[0])
df.isnull().sum()


### Step 3: Remove Duplicates

In [None]:

print("Before:", df.shape)
df = df.drop_duplicates()
print("After:", df.shape)


### Step 4: Standardize Text Values

In [None]:

for col in ["type", "country", "rating"]:
    df[col] = df[col].str.strip().str.lower()
df.head()


### Step 5: Convert Date Formats and Data Types

In [None]:

df["date_added"] = pd.to_datetime(df["date_added"], errors="coerce")
df["release_year"] = df["release_year"].astype(int)
df.dtypes


### Step 6: Rename Columns

In [None]:

df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df.head()


### Step 7: Save Cleaned Dataset

In [None]:

df.to_csv("../dataset/netflix_titles_cleaned.csv", index=False)
print("Cleaned dataset saved!")
