In [1]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("netflix_titles.csv")

# 1. Drop duplicate entries
df = df.drop_duplicates()

# 2. Drop rows with null values
df = df.dropna()

# 3. Extract only the first country (before comma), replace 'country' column
df['country'] = df['country'].apply(lambda x: x.split(',')[0].strip())

# 4. Split 'listed_in' genres into separate columns
# First, split genres by comma
genres_split = df['listed_in'].str.split(',', expand=True)

# Strip any extra whitespace
genres_split = genres_split.applymap(lambda x: x.strip() if isinstance(x, str) else x)

# Rename the genre columns
genres_split.columns = [f'genre_{i+1}' for i in range(genres_split.shape[1])]

# Drop the original 'listed_in' column and join new genre columns
df = df.drop(columns=['listed_in'])
df = pd.concat([df, genres_split], axis=1)

# 5. Drop any newly formed null rows
df = df.dropna()

# 6. Save to new CSV
df.to_csv("cleaned_netflix_titles.csv", index=False)

print("Data cleaned and saved to 'cleaned_netflix_titles.csv'")


Data cleaned and saved to 'cleaned_netflix_titles.csv'


  genres_split = genres_split.applymap(lambda x: x.strip() if isinstance(x, str) else x)
