In [1]:
import tmdbsimple as tmdb
import keys
import requests
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report
from nltk.stem import WordNetLemmatizer, PorterStemmer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
import re

In [2]:
tmdb.API_KEY = keys.second_moviedb_token
tmdb.REQUESTS_TIMEOUT = 5
tmdb.REQUESTS_SESSION = requests.Session()

In [81]:
movie = tmdb.Movies(11)
response = movie.info()

In [87]:
response['belongs_to_collection']['name']

'Star Wars Collection'

In [107]:
films = {
        'adult':[],
        'backdrop_path':[],
        'belongs_to_collection':[],
        'budget':[],
        'genres':[],
        'homepage':[],
        'id':[],
        'imdb_id':[],
        'origin_country':[],
        'original_language':[],
        'original_title':[],
        'overview':[],
        'popularity':[],
        'poster_path':[],
        'production_companies':[],
        'production_countries':[],
        'release_date':[],
        'revenue':[],
        'runtime':[],
        'spoken_languages':[],
        'status':[],
        'tagline':[],
        'title':[],
        'video':[],
        'vote_average':[],
        'vote_count':[]
        }

In [108]:
def add_films_to_df(response):
    films['adult'].append(response['adult'])
    films['backdrop_path'].append(f'https://image.tmdb.org/t/p/w500{response["backdrop_path"]}')
    films['belongs_to_collection'].append(response['belongs_to_collection'])
    films['budget'].append(response['budget'])
    films['genres'].append([genre['name'] for genre in response['genres']])
    films['homepage'].append(response['homepage'])
    films['id'].append(response['id'])
    films['imdb_id'].append(response['imdb_id'])
    films['origin_country'].append(response['origin_country'])
    films['original_language'].append(response['original_language'])
    films['original_title'].append(response['original_title'])
    films['overview'].append(response['overview'])
    films['popularity'].append(response['popularity'])
    films['poster_path'].append(f'https://image.tmdb.org/t/p/w500{response["poster_path"]}')
    films['production_companies'].append([company['name'] for company in response['production_companies']])
    films['production_countries'].append([country['name'] for country in response['production_countries']])
    films['release_date'].append(response['release_date'])
    films['revenue'].append(response['revenue'])
    films['runtime'].append(response['runtime'])
    films['spoken_languages'].append([language['english_name'] for language in response['spoken_languages']])
    films['status'].append(response['status'])
    films['tagline'].append(response['tagline'])
    films['title'].append(response['title'])
    films['video'].append(response['video'])
    films['vote_average'].append(response['vote_average'])
    films['vote_count'].append(response['vote_count'])

In [None]:
for i in range(13001800):
    try:
        movie = tmdb.Movies((1 + i))
        response = movie.info()
        add_films_to_df(response)
    except Exception as e:
        if e.response.status_code == 404:
            # Handle the 404 error (movie not found) - can log or just pass
            print(f"Movie ID {1 + i} not found.")
        else:
            # Raise the exception if it's not a 404 error
            raise
    

In [130]:
df = pd.DataFrame(films)

In [5]:
df

Unnamed: 0,id,budget,genres,imdb_id,original_language,original_title,overview,popularity,production_companies,production_countries,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,production_companies_number,production_countries_number,spoken_languages_number
0,2,0,Drama|Crime,tt0094675,fi,Ariel,Taisto Kasurinen is a Finnish coal miner whose...,0.823904,Villealfa Filmproduction Oy,Finland,...,69.0,suomi,Released,,Ariel,7.1,40,2,1,2
1,3,0,Drama|Comedy,tt0092149,fi,Varjoja paratiisissa,"An episode in the life of Nikander, a garbage ...",0.47445,Villealfa Filmproduction Oy,Finland,...,76.0,English,Released,,Shadows in Paradise,7.0,32,1,1,3
2,5,4000000,Crime|Comedy,tt0113101,en,Four Rooms,It's Ted the Bellhop's first night on the job....,1.698,Miramax Films,United States of America,...,98.0,English,Released,Twelve outrageous guests. Four scandalous requ...,Four Rooms,6.5,485,2,1,1
3,6,0,Action|Thriller|Crime,tt0107286,en,Judgment Night,"While racing to a boxing match, Frank, Mike, J...",1.32287,Universal Pictures,Japan,...,110.0,English,Released,Don't move. Don't whisper. Don't even breathe.,Judgment Night,6.5,69,3,2,1
4,8,42000,Documentary,tt0825671,en,Life in Loops (A Megacities RMX),Timo Novotny labels his new project an experim...,0.054716,inLoops,Austria,...,80.0,English,Released,A Megacities remix.,Life in Loops (A Megacities RMX),6.4,4,1,1,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
329039,469215,0,Documentary|History|TV Movie,tt5430190,en,The Day Hitler Died,The story of Hitler’s final hours told by peop...,0.0045,History Channel,United States of America,...,46.0,Deutsch,Released,"April 30th, 1945",The Day Hitler Died,7.0,1,1,1,2
329040,469216,0,,,de,Mobb Deep - Life of the Infamous: The Videos,,0.00045,Sony Music Entertainmant,United States of America,...,,English,Released,,Mobb Deep - Life of the Infamous: The Videos,9.0,1,1,1,1
329041,469217,0,Drama|Comedy,,en,#idiot,"In the modern day pursuit of fame and fortune,...",1.37217,none,Malaysia,...,11.0,English,Released,,#idiot,0.0,0,0,1,1
329042,469218,0,Drama|Comedy,tt6391664,en,Conspiracy P.I.E,Director Levi A. Taylor invites you on a nosta...,0.0096,none,United States of America,...,12.0,English,Released,,Conspiracy P.I.E,0.0,0,0,1,1


In [6]:
df = df.drop_duplicates()

In [7]:
df['genres'] = df['genres'].str.split('|')

In [8]:
df['release_date'] = pd.to_datetime(df['release_date'], format='mixed')

In [9]:
# Replace commas with periods
df['popularity'] = df['popularity'].str.replace(',', '.')

# Remove any leading/trailing whitespace (if any)
df['popularity'] = df['popularity'].str.strip()

# Convert the 'popularity' column to float
df = df.astype({'popularity': 'float64'})

# Machine Learning Model  

##  Collect and Prepare Data 

In [10]:
prediction_df = df[['genres', 'overview']].copy()
prediction_df = prediction_df.drop_duplicates(subset='overview')

In [11]:
prediction_df = prediction_df[prediction_df['overview'] != "No overview found."]
prediction_df = prediction_df[prediction_df['overview'] != "        "]
prediction_df = prediction_df[prediction_df['overview'] != " "]
prediction_df = prediction_df[prediction_df['overview'] != "//"]

In [12]:
prediction_df = prediction_df.dropna(subset=['genres', 'overview'])

In [13]:
prediction_df

Unnamed: 0,genres,overview
0,"[Drama, Crime]",Taisto Kasurinen is a Finnish coal miner whose...
1,"[Drama, Comedy]","An episode in the life of Nikander, a garbage ..."
2,"[Crime, Comedy]",It's Ted the Bellhop's first night on the job....
3,"[Action, Thriller, Crime]","While racing to a boxing match, Frank, Mike, J..."
4,[Documentary],Timo Novotny labels his new project an experim...
...,...,...
329037,[Comedy],Spanked onto this program is the outrageous be...
329039,"[Documentary, History, TV Movie]",The story of Hitler’s final hours told by peop...
329041,"[Drama, Comedy]","In the modern day pursuit of fame and fortune,..."
329042,"[Drama, Comedy]",Director Levi A. Taylor invites you on a nosta...


In [14]:
prediction_df.isnull().sum()

genres      0
overview    0
dtype: int64

In [15]:
genres_list = prediction_df['genres'].explode()

unique_genres = genres_list.unique()
unique_genres

array(['Drama', 'Crime', 'Comedy', 'Action', 'Thriller', 'Documentary',
       'Adventure', 'Science Fiction', 'Animation', 'Family', 'Romance',
       'Mystery', 'Music', 'Horror', 'Fantasy', 'War', 'History',
       'Western', 'Foreign', 'TV Movie'], dtype=object)

In [16]:
df_predict = prediction_df.copy()

In [17]:
def preprocess_text(text):
    lemmatizer = WordNetLemmatizer()
    stemmer = PorterStemmer()
    processed_text = text.lower()

        # Remove HTML tags
    processed_text = re.sub(r'<.*?>', '', processed_text)

    # Remove urls
    processed_text = re.sub(r'http\S+', '', processed_text)

    # Remove hashtags and @ symbols
    processed_text = re.sub(r'#', '', processed_text)
    processed_text = re.sub(r'@', '', processed_text)

   # # Tokenize the text
   #  tokens = word_tokenize(processed_text)
   # 
   #  # Remove punctuation
   #  tokens = [token for token in tokens if token.isalpha()]
   # 
   #  # Remove stop words
   #  tokens = [token for token in tokens if token not in stop_words]

    # Apply lemmatizing
   # tokens = [lemmatizer.lemmatize(token) for token in tokens]

    # Apply stemming
  #  tokens = [stemmer.stem(token) for token in tokens]

    # # Join the tokens back into a string
    # processed_text = ' '.join(tokens)

    return processed_text

In [18]:
# Apply preprocessing to overview
df_predict['overview_clean'] = df_predict['overview'].apply(preprocess_text)

In [19]:
df_predict = df_predict.sort_values(by='overview_clean', ascending=False)

# Delete the first 399 rows
df_predict = df_predict.iloc[399:]

df_predict = df_predict.sort_values(by='overview_clean', ascending=True)

# Delete the first 30 rows
df_predict = df_predict.iloc[30:]

In [20]:
mlb = MultiLabelBinarizer()
genres_binarized = mlb.fit_transform(df_predict['genres'])

In [21]:
# One-hot encode genres
genres_df = df_predict['genres'].explode().reset_index()
genres_df = pd.get_dummies(genres_df['genres']).groupby(genres_df['index']).sum()

df_predict = df_predict.drop(columns=['genres', 'overview']).join(genres_df)


In [22]:
df_predict

Unnamed: 0,overview_clean,Action,Adventure,Animation,Comedy,Crime,Documentary,Drama,Family,Fantasy,...,History,Horror,Music,Mystery,Romance,Science Fiction,TV Movie,Thriller,War,Western
105152,"""a feature-length deluge of incessant, brillia...",0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
297821,"""a feeling of listlessness and dissatisfaction...",0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
93524,"""a film about coffee"" is a love letter to, and...",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
215348,"""a film about musicologists who can’t dance, n...",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
287812,"""a film by juan-luis buñuel shot in 1964 docum...",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45124,ульяна тулина необычный человек — во взрослом ...,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
43532,уникальный синтез боевика и любовной истории. ...,1,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
44949,успешный бизнесмен антон - отец-одиночка. дела...,0,0,0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
44540,фантастико-приключенческий фильм по новеллам с...,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0


## Select and Train the Model 

In [23]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(df_predict['overview_clean'], genres_binarized, test_size=0.1, random_state=42)

In [24]:
# Vectorize overviews
tfidf = TfidfVectorizer(max_features=10000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

In [25]:
# Train model
model = MultiOutputClassifier(LogisticRegression())
model.fit(X_train_tfidf, y_train)

## Evaluate the Model 

In [26]:
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred, zero_division=0,  target_names=mlb.classes_))

Accuracy: 0.29
                 precision    recall  f1-score   support

         Action       0.73      0.33      0.46      1930
      Adventure       0.64      0.11      0.18       945
      Animation       0.86      0.35      0.50      1216
         Comedy       0.72      0.41      0.52      4166
          Crime       0.53      0.18      0.27      1116
    Documentary       0.86      0.67      0.75      2748
          Drama       0.69      0.59      0.64      6464
         Family       0.68      0.16      0.26       994
        Fantasy       0.56      0.08      0.14       622
        Foreign       0.00      0.00      0.00       532
        History       0.45      0.05      0.10       375
         Horror       0.78      0.44      0.56      1527
          Music       0.84      0.52      0.64      1236
        Mystery       0.51      0.08      0.14       627
        Romance       0.62      0.19      0.30      1851
Science Fiction       0.80      0.33      0.46       806
       TV Movie

## Deploy the Model 

In [27]:
# Example new movie overviews
new_overviews = [
    "A young boy discovers he has magical powers and attends a school for wizards.",
    "A team of explorers travel through a wormhole in space in an attempt to ensure humanity's survival.",
    "A group of friends reunite for a wedding, but complications arise when a past romance is rekindled.",
    "Three partisans bound by a strong friendship return home after the war, but the clash with everyday reality puts a strain on their bond.",
    'An insomniac office worker and a devil-may-care soap maker form an underground fight club that evolves into much more.',
    'A year after disposing of the body of a man they accidentally killed, a group of dumb teenagers are stalked by a bumbling serial killer.',
    'After a wealthy San Francisco banker is given an opportunity to participate in a mysterious game, his life is turned upside down as he begins to question if it might really be a concealed conspiracy to destroy him.',
    'A woman, accidentally caught in a dark deal.',
    "Follow the intergalactic adventures of Capt, Jean-Luc Picard and his loyal crew aboard the all-new USS Enterprise NCC-1701D, as they explore new worlds."
]


In [28]:
# def predict_genres_from_list(overviews):    
# 
#     processed_overviews = [preprocess_text(overview) for overview in new_overviews]
#     
#     new_overviews_tfidf = tfidf.transform(processed_overviews)
#     
#     predicted_genres = model.predict(new_overviews_tfidf)
#     
#     predicted_genres_readable = mlb.inverse_transform(predicted_genres)
#     
#     if hasattr(model, 'predict_proba'):
#         predicted_probabilities = model.predict_proba(new_overviews_tfidf)
#     
#         for i, (overview, genres, probabilities) in enumerate(zip(processed_overviews, predicted_genres_readable, predicted_probabilities)):
#             if not genres: 
#                 print("No genres")
#                 max_prob_index = probabilities.argmax() 
#                 genre_with_max_prob = mlb.classes_[max_prob_index]  
#                 genres = [genre_with_max_prob]
#                 predicted_genres_readable[i] = genres  
#             
#             print(f"Overview: {overview}")
#             print(f"Predicted Genres: {', '.join(genres)}\n")
#     
#     else:
# 
#         for i, (overview, genres) in enumerate(zip(processed_overviews, predicted_genres_readable)):
#             if not genres:  
#                 max_prob_index = predicted_genres[i].argmax()
#                 genre_with_max_prob = mlb.classes_[max_prob_index]
#                 genres = [genre_with_max_prob]
#                 predicted_genres_readable[i] = genres  
#             
#             print(f"Overview: {overview}")
#             print(f"Predicted Genres: {', '.join(genres)}\n")


In [29]:
def get_genres(my_overview):  
    
    overviews = []
    
    overviews.append(my_overview)
    
    processed_overviews = [preprocess_text(overview) for overview in overviews]
    
    new_overviews_tfidf = tfidf.transform(processed_overviews)
    
    predicted_genres = model.predict(new_overviews_tfidf)
    
    predicted_genres_readable = mlb.inverse_transform(predicted_genres)
    
    if hasattr(model, 'predict_proba'):
        predicted_probabilities = model.predict_proba(new_overviews_tfidf)
    
        for i, (overview, genres, probabilities) in enumerate(zip(processed_overviews, predicted_genres_readable, predicted_probabilities)):
            if not genres: 
                max_prob_index = probabilities.argmax()  
                genre_with_max_prob = mlb.classes_[max_prob_index]  
                genres = [genre_with_max_prob]
                predicted_genres_readable[i] = genres  
            
            new_genres = '|'.join(genres)
            list_genres = new_genres.split('|')
            return list_genres
    
    else:

        for i, (overview, genres) in enumerate(zip(processed_overviews, predicted_genres_readable)):
            if not genres:  
                max_prob_index = predicted_genres[i].argmax()
                genre_with_max_prob = mlb.classes_[max_prob_index]
                genres = [genre_with_max_prob]
                predicted_genres_readable[i] = genres  
            
            new_genres = '|'.join(genres)
            list_genres = new_genres.split('|')
            return list_genres

In [30]:
df_for_fill = df.copy()

In [31]:
df_for_fill = df_for_fill.drop_duplicates(subset='overview')
df_for_fill = df_for_fill.dropna(subset=['overview'])

In [32]:
df_for_fill = df_for_fill[df_for_fill['overview'] != " "]
df_for_fill = df_for_fill[df_for_fill['overview'] != "        "]
df_for_fill = df_for_fill[df_for_fill['overview'] != "//"]

In [33]:
df_for_fill = df_for_fill.drop(columns=['id', 'tagline', 'spoken_languages', 'imdb_id', 'original_title', 'production_companies_number', 'spoken_languages_number', 'production_countries_number'])

In [34]:
df_for_fill.info()

<class 'pandas.core.frame.DataFrame'>
Index: 269145 entries, 0 to 329043
Data columns (total 14 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   budget                269145 non-null  int64         
 1   genres                173454 non-null  object        
 2   original_language     268968 non-null  object        
 3   overview              269145 non-null  object        
 4   popularity            269145 non-null  float64       
 5   production_companies  269145 non-null  object        
 6   production_countries  269145 non-null  object        
 7   release_date          251637 non-null  datetime64[ns]
 8   revenue               269145 non-null  int64         
 9   runtime               269145 non-null  float64       
 10  status                267814 non-null  object        
 11  title                 269145 non-null  object        
 12  vote_average          269145 non-null  float64       
 13  vote

In [35]:
df_for_fill.isnull().sum()

budget                      0
genres                  95691
original_language         177
overview                    0
popularity                  0
production_companies        0
production_countries        0
release_date            17508
revenue                     0
runtime                     0
status                   1331
title                       0
vote_average                0
vote_count                  0
dtype: int64

In [36]:
df_for_fill

Unnamed: 0,budget,genres,original_language,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,status,title,vote_average,vote_count
0,0,"[Drama, Crime]",fi,Taisto Kasurinen is a Finnish coal miner whose...,0.823904,Villealfa Filmproduction Oy,Finland,1988-10-21,0,69.0,Released,Ariel,7.1,40
1,0,"[Drama, Comedy]",fi,"An episode in the life of Nikander, a garbage ...",0.474450,Villealfa Filmproduction Oy,Finland,1986-10-16,0,76.0,Released,Shadows in Paradise,7.0,32
2,4000000,"[Crime, Comedy]",en,It's Ted the Bellhop's first night on the job....,1.698000,Miramax Films,United States of America,1995-12-25,4300000,98.0,Released,Four Rooms,6.5,485
3,0,"[Action, Thriller, Crime]",en,"While racing to a boxing match, Frank, Mike, J...",1.322870,Universal Pictures,Japan,1993-10-15,12136938,110.0,Released,Judgment Night,6.5,69
4,42000,[Documentary],en,Timo Novotny labels his new project an experim...,0.054716,inLoops,Austria,2006-01-01,0,80.0,Released,Life in Loops (A Megacities RMX),6.4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
329037,0,[Comedy],en,Spanked onto this program is the outrageous be...,0.000300,none,none,2003-08-03,0,60.0,Released,The Tom Green Show: Early Exposure - Raw Meat ...,0.0,0
329039,0,"[Documentary, History, TV Movie]",en,The story of Hitler’s final hours told by peop...,0.004500,History Channel,United States of America,2016-01-21,0,46.0,Released,The Day Hitler Died,7.0,1
329041,0,"[Drama, Comedy]",en,"In the modern day pursuit of fame and fortune,...",1.372170,none,Malaysia,2017-02-08,0,11.0,Released,#idiot,0.0,0
329042,0,"[Drama, Comedy]",en,Director Levi A. Taylor invites you on a nosta...,0.009600,none,United States of America,2016-10-11,0,12.0,Released,Conspiracy P.I.E,0.0,0


## Use the Model to Fill Missing Values in Genres

In [37]:
df_for_fill['genres'] = df_for_fill['genres'].fillna('')

df_for_fill['genres'] = df_for_fill.apply(lambda row: get_genres(row['overview']) if not row['genres'] else row['genres'], axis=1)


In [38]:
df_for_fill.isnull().sum()

budget                      0
genres                      0
original_language         177
overview                    0
popularity                  0
production_companies        0
production_countries        0
release_date            17508
revenue                     0
runtime                     0
status                   1331
title                       0
vote_average                0
vote_count                  0
dtype: int64

In [39]:
df_for_fill

Unnamed: 0,budget,genres,original_language,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,status,title,vote_average,vote_count
0,0,"[Drama, Crime]",fi,Taisto Kasurinen is a Finnish coal miner whose...,0.823904,Villealfa Filmproduction Oy,Finland,1988-10-21,0,69.0,Released,Ariel,7.1,40
1,0,"[Drama, Comedy]",fi,"An episode in the life of Nikander, a garbage ...",0.474450,Villealfa Filmproduction Oy,Finland,1986-10-16,0,76.0,Released,Shadows in Paradise,7.0,32
2,4000000,"[Crime, Comedy]",en,It's Ted the Bellhop's first night on the job....,1.698000,Miramax Films,United States of America,1995-12-25,4300000,98.0,Released,Four Rooms,6.5,485
3,0,"[Action, Thriller, Crime]",en,"While racing to a boxing match, Frank, Mike, J...",1.322870,Universal Pictures,Japan,1993-10-15,12136938,110.0,Released,Judgment Night,6.5,69
4,42000,[Documentary],en,Timo Novotny labels his new project an experim...,0.054716,inLoops,Austria,2006-01-01,0,80.0,Released,Life in Loops (A Megacities RMX),6.4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
329037,0,[Comedy],en,Spanked onto this program is the outrageous be...,0.000300,none,none,2003-08-03,0,60.0,Released,The Tom Green Show: Early Exposure - Raw Meat ...,0.0,0
329039,0,"[Documentary, History, TV Movie]",en,The story of Hitler’s final hours told by peop...,0.004500,History Channel,United States of America,2016-01-21,0,46.0,Released,The Day Hitler Died,7.0,1
329041,0,"[Drama, Comedy]",en,"In the modern day pursuit of fame and fortune,...",1.372170,none,Malaysia,2017-02-08,0,11.0,Released,#idiot,0.0,0
329042,0,"[Drama, Comedy]",en,Director Levi A. Taylor invites you on a nosta...,0.009600,none,United States of America,2016-10-11,0,12.0,Released,Conspiracy P.I.E,0.0,0


In [40]:
copy_df = df_for_fill.copy()

In [41]:
genres_df = copy_df['genres'].explode().reset_index()
genres_df = pd.get_dummies(genres_df['genres']).groupby(genres_df['index']).sum()
copy_df = copy_df.join(genres_df)

In [42]:
copy_df

Unnamed: 0,budget,genres,original_language,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,...,History,Horror,Music,Mystery,Romance,Science Fiction,TV Movie,Thriller,War,Western
0,0,"[Drama, Crime]",fi,Taisto Kasurinen is a Finnish coal miner whose...,0.823904,Villealfa Filmproduction Oy,Finland,1988-10-21,0,69.0,...,0,0,0,0,0,0,0,0,0,0
1,0,"[Drama, Comedy]",fi,"An episode in the life of Nikander, a garbage ...",0.474450,Villealfa Filmproduction Oy,Finland,1986-10-16,0,76.0,...,0,0,0,0,0,0,0,0,0,0
2,4000000,"[Crime, Comedy]",en,It's Ted the Bellhop's first night on the job....,1.698000,Miramax Films,United States of America,1995-12-25,4300000,98.0,...,0,0,0,0,0,0,0,0,0,0
3,0,"[Action, Thriller, Crime]",en,"While racing to a boxing match, Frank, Mike, J...",1.322870,Universal Pictures,Japan,1993-10-15,12136938,110.0,...,0,0,0,0,0,0,0,1,0,0
4,42000,[Documentary],en,Timo Novotny labels his new project an experim...,0.054716,inLoops,Austria,2006-01-01,0,80.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
329037,0,[Comedy],en,Spanked onto this program is the outrageous be...,0.000300,none,none,2003-08-03,0,60.0,...,0,0,0,0,0,0,0,0,0,0
329039,0,"[Documentary, History, TV Movie]",en,The story of Hitler’s final hours told by peop...,0.004500,History Channel,United States of America,2016-01-21,0,46.0,...,1,0,0,0,0,0,1,0,0,0
329041,0,"[Drama, Comedy]",en,"In the modern day pursuit of fame and fortune,...",1.372170,none,Malaysia,2017-02-08,0,11.0,...,0,0,0,0,0,0,0,0,0,0
329042,0,"[Drama, Comedy]",en,Director Levi A. Taylor invites you on a nosta...,0.009600,none,United States of America,2016-10-11,0,12.0,...,0,0,0,0,0,0,0,0,0,0


In [43]:
copy_df.to_csv('MovieGenresDummies.csv')