In [1]:
import pandas as pd
import ast

In [2]:
# Load the dataset
genres_df = pd.read_csv('genres.csv')

# Ensure the 'Genres' column is in string format
genres_df['Genres'] = genres_df['Genres'].astype(str)

In [3]:
# Function to convert string representation of list to actual list
def parse_genre_list(genre_str):
    try:
        genre_list = ast.literal_eval(genre_str)
        if isinstance(genre_list, list):
            return genre_list
        else:
            return []
    except (ValueError, SyntaxError):
        return []

# Apply the function to the 'Genres' column
genres_df['Genres'] = genres_df['Genres'].apply(parse_genre_list)

# Flatten the list of lists into a single list of genres
all_genres = [genre for sublist in genres_df['Genres'] for genre in sublist]

In [4]:
# Remove duplicates and sort the list
unique_genres = sorted(set(all_genres))

# Create a DataFrame with unique genres
unique_genres_df = pd.DataFrame(unique_genres, columns=['Unique Genres'])

In [5]:
unique_genres_df.to_csv('cleaned_genres.csv', index=False)

print("Unique genres have been saved to 'cleaned_genres.csv'.")

Unique genres have been saved to 'cleaned_genres.csv'.
