In [1]:
import pandas as pd
f1 = pd.read_csv('movies_metadata.csv', low_memory=False)
f2 = pd.read_csv('credits.csv')

In [2]:
f1.shape

(45466, 24)

In [3]:
f2.shape

(45476, 3)

In [4]:
f1['budget'] = pd.to_numeric(f1['budget'], errors='coerce')
f1['revenue'] = pd.to_numeric(f1['revenue'], errors='coerce')
f1['id'] = pd.to_numeric(f1['id'], errors='coerce')

In [5]:
f1.dropna(subset=['id', 'budget', 'revenue'], inplace=True)

In [6]:
f1 = f1[(f1['budget'] > 1000) & (f1['revenue'] > 1000)]
f1['id'] = f1['id'].astype(int)

In [7]:
f1['id'] = pd.to_numeric(f1['id'], errors='coerce')
f1['vote_count'] = pd.to_numeric(f1['vote_count'], errors='coerce')
f1.dropna(inplace=True)
f1['id'] = f1['id'].astype(int)

In [8]:
f1.shape

(470, 24)

In [9]:
f2['id'] = pd.to_numeric(f2['id'], errors='coerce')
f2.dropna(subset=['id'], inplace=True)
f2['id'] = f2['id'].astype(int)

In [10]:
f2.shape

(45476, 3)

In [11]:
df = f1.merge(f2, on='id')

In [12]:
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,cast,crew
0,False,"{'id': 645, 'name': 'James Bond Collection', '...",58000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.mgm.com/view/movie/757/Goldeneye/,710,tt0113189,en,GoldenEye,James Bond must unmask the mysterious head of ...,...,130.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,No limits. No fears. No substitutes.,GoldenEye,False,6.6,1194.0,"[{'cast_id': 1, 'character': 'James Bond', 'cr...","[{'credit_id': '52fe426ec3a36847f801e14b', 'de..."
1,False,"{'id': 43563, 'name': 'Friday Collection', 'po...",3500000.0,"[{'id': 35, 'name': 'Comedy'}]",http://www.newline.com/properties/friday.html,10634,tt0113118,en,Friday,Craig and Smokey are two guys in Los Angeles h...,...,91.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,A lot can go down between thursday and saturda...,Friday,False,7.0,513.0,"[{'cast_id': 1, 'character': 'Craig Jones', 'c...","[{'credit_id': '52fe43999251416c75016ad5', 'de..."
2,False,"{'id': 10924, 'name': 'From Dusk Till Dawn Col...",19000000.0,"[{'id': 27, 'name': 'Horror'}, {'id': 28, 'nam...",http://www.miramax.com/movie/from-dusk-till-dawn/,755,tt0116367,en,From Dusk Till Dawn,Seth Gecko and his younger brother Richard are...,...,108.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,One night is all that stands between them and ...,From Dusk Till Dawn,False,6.9,1644.0,"[{'cast_id': 1, 'character': 'Seth Gecko', 'cr...","[{'credit_id': '52fe4271c3a36847f801f22d', 'de..."
3,False,"{'id': 439053, 'name': 'Brooklyn Cigar Store C...",2000000.0,"[{'id': 35, 'name': 'Comedy'}]",http://miramax.com/movie/blue-in-the-face/,5894,tt0112541,en,Blue in the Face,"Auggie runs a small tobacco shop in Brooklyn, ...",...,83.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Welcome to the planet Brooklyn.,Blue in the Face,False,6.8,28.0,"[{'cast_id': 22, 'character': 'Auggie Wren', '...","[{'credit_id': '52fe442ac3a36847f80861c1', 'de..."
4,False,"{'id': 286162, 'name': 'Power Rangers Collecti...",15000000.0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.powerrangers.com/,9070,tt0113820,en,Mighty Morphin Power Rangers: The Movie,Power up with six incredible teens who out-man...,...,92.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Power Is On!,Mighty Morphin Power Rangers: The Movie,False,5.2,153.0,"[{'cast_id': 2, 'character': 'Kimberly Hart / ...","[{'credit_id': '52fe44d8c3a36847f80ad707', 'de..."


<h1> Parsing JSON ones</h1>

In [13]:
import ast
import numpy as np

def parse_json_list(x):
    if isinstance(x, list):
        return x
    
    if isinstance(x, str):
        try:
            return [i['name'] for i in ast.literal_eval(x)]
        except:
            return []
            
    return []

def extract_crew_info(x):

    director = np.nan
    producers = []
    composers = []
    
    if isinstance(x, list):
        crew_list = x
    elif isinstance(x, str):
        try:
            crew_list = ast.literal_eval(x)
        except:
            return pd.Series([np.nan, [], []])
    else:
        return pd.Series([np.nan, [], []])
        
    for i in crew_list:
        if i['job'] == 'Director':
            director = i['name']
        elif i['job'] == 'Producer':
            producers.append(i['name'])
        elif i['job'] == 'Original Music Composer':
            composers.append(i['name'])
            
    return pd.Series([director, producers, composers])

In [14]:
df[['director', 'producers', 'composers']] = df['crew'].apply(extract_crew_info)

In [15]:
df['genres'] = df['genres'].apply(parse_json_list)
df['top_cast'] = df['cast'].apply(parse_json_list).apply(lambda x: x[:3]) # keeping only top 3 cast actors

In [16]:
df['release_date'] = pd.to_datetime(df['release_date'], errors='coerce')
df['release_month'] = df['release_date'].dt.month

In [17]:
keep_cols = [
    'title', 'budget', 'revenue', 
    'genres', 'top_cast', 
    'director', 'producers', 'composers',
    'vote_average', 'release_month', 'runtime', 'vote_count'
]
df = df[keep_cols].copy()

In [18]:
df.head()

Unnamed: 0,title,budget,revenue,genres,top_cast,director,producers,composers,vote_average,release_month,runtime,vote_count
0,GoldenEye,58000000.0,352194034.0,"[Adventure, Action, Thriller]","[Pierce Brosnan, Sean Bean, Izabella Scorupco]",Martin Campbell,"[Barbara Broccoli, Michael G. Wilson, Anthony ...",[Eric Serra],6.6,11,130.0,1194.0
1,Friday,3500000.0,28215918.0,[Comedy],"[Ice Cube, Chris Tucker, Nia Long]",F. Gary Gray,[],[],7.0,4,91.0,513.0
2,From Dusk Till Dawn,19000000.0,25836616.0,"[Horror, Action, Thriller, Crime]","[George Clooney, Quentin Tarantino, Harvey Kei...",Robert Rodriguez,"[Gianni Nunnari, Meir Teper]",[Graeme Revell],6.9,1,108.0,1644.0
3,Blue in the Face,2000000.0,1275000.0,[Comedy],"[Harvey Keitel, Lou Reed, Michael J. Fox]",Wayne Wang,"[Peter Newman, Diana Phillips, Hisami Kuroiwa,...",[],6.8,9,83.0,28.0
4,Mighty Morphin Power Rangers: The Movie,15000000.0,66000000.0,"[Action, Adventure, Science Fiction, Family, F...","[Amy Jo Johnson, Jason David Frank, David Yost]",Bryan Spicer,"[Suzanne Todd, Haim Saban]",[Graeme Revell],5.2,6,92.0,153.0


In [19]:
df['vote_count'] = df['vote_count'].fillna(0)

In [20]:
file = 'movies_cleaned.csv'
df.to_csv(file, index=False)

In [23]:
indian_hits = [
    # ==================== BOLLYWOOD (Hindi) ====================
    # --- SHAH RUKH KHAN ---
    {"title": "Jawan", "budget": 36000000, "revenue": 140000000, "runtime": 169, "genres": "['Action', 'Thriller']", "top_cast": "['Shah Rukh Khan', 'Nayanthara', 'Vijay Sethupathi']", "director": "Atlee", "composers": "['Anirudh Ravichander']", "vote_average": 7.5, "vote_count": 5000, "release_month": 9},
    {"title": "Pathaan", "budget": 30000000, "revenue": 130000000, "runtime": 146, "genres": "['Action', 'Adventure']", "top_cast": "['Shah Rukh Khan', 'Deepika Padukone']", "director": "Siddharth Anand", "composers": "['Vishal-Shekhar']", "vote_average": 7.0, "vote_count": 4500, "release_month": 1},
    {"title": "Chennai Express", "budget": 11000000, "revenue": 53000000, "runtime": 141, "genres": "['Action', 'Comedy']", "top_cast": "['Shah Rukh Khan', 'Deepika Padukone']", "director": "Rohit Shetty", "composers": "['Vishal-Shekhar']", "vote_average": 6.8, "vote_count": 3000, "release_month": 8},

    # --- SALMAN KHAN ---
    {"title": "Bajrangi Bhaijaan", "budget": 12000000, "revenue": 120000000, "runtime": 163, "genres": "['Drama', 'Comedy']", "top_cast": "['Salman Khan', 'Kareena Kapoor Khan', 'Nawazuddin Siddiqui']", "director": "Kabir Khan", "composers": "['Pritam']", "vote_average": 8.1, "vote_count": 12000, "release_month": 7},
    {"title": "Sultan", "budget": 11000000, "revenue": 90000000, "runtime": 170, "genres": "['Drama', 'Action']", "top_cast": "['Salman Khan', 'Anushka Sharma']", "director": "Ali Abbas Zafar", "composers": "['Vishal-Shekhar']", "vote_average": 7.2, "vote_count": 6000, "release_month": 7},
    {"title": "Tiger Zinda Hai", "budget": 25000000, "revenue": 85000000, "runtime": 161, "genres": "['Action', 'Thriller']", "top_cast": "['Salman Khan', 'Katrina Kaif']", "director": "Ali Abbas Zafar", "composers": "['Vishal-Shekhar']", "vote_average": 7.0, "vote_count": 5500, "release_month": 12},

    # --- AAMIR KHAN ---
    {"title": "Dangal", "budget": 9000000, "revenue": 240000000, "runtime": 161, "genres": "['Drama', 'Action']", "top_cast": "['Aamir Khan', 'Sakshi Tanwar']", "director": "Nitesh Tiwari", "composers": "['Pritam']", "vote_average": 8.3, "vote_count": 18000, "release_month": 12},
    {"title": "PK", "budget": 10000000, "revenue": 100000000, "runtime": 153, "genres": "['Comedy', 'Drama']", "top_cast": "['Aamir Khan', 'Anushka Sharma']", "director": "Rajkumar Hirani", "composers": "['Shantanu Moitra']", "vote_average": 8.1, "vote_count": 15000, "release_month": 12},
    {"title": "3 Idiots", "budget": 7000000, "revenue": 60000000, "runtime": 170, "genres": "['Comedy', 'Drama']", "top_cast": "['Aamir Khan', 'Kareena Kapoor Khan']", "director": "Rajkumar Hirani", "composers": "['Shantanu Moitra']", "vote_average": 8.4, "vote_count": 19000, "release_month": 12},

    # --- RANBIR KAPOOR ---
    {"title": "Animal", "budget": 12000000, "revenue": 110000000, "runtime": 201, "genres": "['Action', 'Crime']", "top_cast": "['Ranbir Kapoor', 'Rashmika Mandanna']", "director": "Sandeep Reddy Vanga", "composers": "['Pritam']", "vote_average": 7.1, "vote_count": 4200, "release_month": 12},
    {"title": "Sanju", "budget": 12000000, "revenue": 75000000, "runtime": 161, "genres": "['Drama', 'Biography']", "top_cast": "['Ranbir Kapoor', 'Paresh Rawal']", "director": "Rajkumar Hirani", "composers": "['A.R. Rahman']", "vote_average": 7.7, "vote_count": 5000, "release_month": 6},
    {"title": "Brahmastra", "budget": 50000000, "revenue": 54000000, "runtime": 167, "genres": "['Fantasy', 'Action']", "top_cast": "['Ranbir Kapoor', 'Alia Bhatt']", "director": "Ayan Mukerji", "composers": "['Pritam']", "vote_average": 6.0, "vote_count": 3000, "release_month": 9},

    # --- HRITHIK ROSHAN ---
    {"title": "War", "budget": 20000000, "revenue": 65000000, "runtime": 154, "genres": "['Action', 'Thriller']", "top_cast": "['Hrithik Roshan', 'Tiger Shroff']", "director": "Siddharth Anand", "composers": "['Vishal-Shekhar']", "vote_average": 6.5, "vote_count": 4000, "release_month": 10},
    {"title": "Krrish 3", "budget": 15000000, "revenue": 40000000, "runtime": 152, "genres": "['Action', 'Sci-Fi']", "top_cast": "['Hrithik Roshan', 'Priyanka Chopra']", "director": "Rakesh Roshan", "composers": "['Rajesh Roshan']", "vote_average": 5.5, "vote_count": 2000, "release_month": 11},
    {"title": "Fighter", "budget": 30000000, "revenue": 45000000, "runtime": 166, "genres": "['Action', 'Thriller']", "top_cast": "['Hrithik Roshan', 'Deepika Padukone']", "director": "Siddharth Anand", "composers": "['Vishal-Shekhar']", "vote_average": 7.0, "vote_count": 3000, "release_month": 1},

    # ==================== KOLLYWOOD (Tamil) ====================
    # --- THALAPATHY VIJAY ---
    {"title": "Leo", "budget": 35000000, "revenue": 74000000, "runtime": 164, "genres": "['Action', 'Thriller']", "top_cast": "['Thalapathy Vijay', 'Sanjay Dutt']", "director": "Lokesh Kanagaraj", "composers": "['Anirudh Ravichander']", "vote_average": 7.6, "vote_count": 4000, "release_month": 10},
    {"title": "Master", "budget": 18000000, "revenue": 36000000, "runtime": 179, "genres": "['Action', 'Thriller']", "top_cast": "['Thalapathy Vijay', 'Vijay Sethupathi']", "director": "Lokesh Kanagaraj", "composers": "['Anirudh Ravichander']", "vote_average": 7.8, "vote_count": 3500, "release_month": 1},
    {"title": "Mersal", "budget": 16000000, "revenue": 38000000, "runtime": 170, "genres": "['Action', 'Thriller']", "top_cast": "['Thalapathy Vijay', 'Samantha Ruth Prabhu']", "director": "Atlee", "composers": "['A.R. Rahman']", "vote_average": 7.5, "vote_count": 3200, "release_month": 10},

    # --- RAJINIKANTH ---
    {"title": "Jailer", "budget": 24000000, "revenue": 78000000, "runtime": 168, "genres": "['Action', 'Comedy']", "top_cast": "['Rajinikanth', 'Mohanlal']", "director": "Nelson Dilipkumar", "composers": "['Anirudh Ravichander']", "vote_average": 7.4, "vote_count": 3800, "release_month": 8},
    {"title": "2.0", "budget": 60000000, "revenue": 95000000, "runtime": 147, "genres": "['Sci-Fi', 'Action']", "top_cast": "['Rajinikanth', 'Akshay Kumar']", "director": "S. Shankar", "composers": "['A.R. Rahman']", "vote_average": 6.5, "vote_count": 2500, "release_month": 11},
    {"title": "Kabali", "budget": 12000000, "revenue": 40000000, "runtime": 153, "genres": "['Action', 'Crime']", "top_cast": "['Rajinikanth', 'Radhika Apte']", "director": "Pa. Ranjith", "composers": "['Santhosh Narayanan']", "vote_average": 6.7, "vote_count": 2100, "release_month": 7},

    # --- AJITH KUMAR (Thala) ---
    {"title": "Viswasam", "budget": 12000000, "revenue": 26000000, "runtime": 156, "genres": "['Action', 'Drama']", "top_cast": "['Ajith Kumar', 'Nayanthara']", "director": "Siva", "composers": "['D. Imman']", "vote_average": 7.0, "vote_count": 1500, "release_month": 1},
    {"title": "Thunivu", "budget": 16000000, "revenue": 25000000, "runtime": 146, "genres": "['Action', 'Heist']", "top_cast": "['Ajith Kumar', 'Manju Warrier']", "director": "H. Vinoth", "composers": "['Ghibran']", "vote_average": 6.8, "vote_count": 1800, "release_month": 1},
    {"title": "Valimai", "budget": 18000000, "revenue": 20000000, "runtime": 179, "genres": "['Action', 'Thriller']", "top_cast": "['Ajith Kumar', 'Huma Qureshi']", "director": "H. Vinoth", "composers": "['Yuvan Shankar Raja']", "vote_average": 6.2, "vote_count": 1600, "release_month": 2},

    # --- KAMAL HAASAN ---
    {"title": "Vikram", "budget": 15000000, "revenue": 60000000, "runtime": 174, "genres": "['Action', 'Thriller']", "top_cast": "['Kamal Haasan', 'Vijay Sethupathi', 'Fahadh Faasil']", "director": "Lokesh Kanagaraj", "composers": "['Anirudh Ravichander']", "vote_average": 8.4, "vote_count": 8000, "release_month": 6},
    {"title": "Indian 2", "budget": 30000000, "revenue": 18000000, "runtime": 180, "genres": "['Action', 'Drama']", "top_cast": "['Kamal Haasan', 'Siddharth']", "director": "S. Shankar", "composers": "['Anirudh Ravichander']", "vote_average": 5.0, "vote_count": 1000, "release_month": 7},
    {"title": "Vishwaroopam", "budget": 11000000, "revenue": 25000000, "runtime": 148, "genres": "['Action', 'Thriller']", "top_cast": "['Kamal Haasan', 'Pooja Kumar']", "director": "Kamal Haasan", "composers": "['Shankar-Ehsaan-Loy']", "vote_average": 7.9, "vote_count": 4000, "release_month": 1},

    # ==================== TOLLYWOOD (Telugu) ====================
    # --- PRABHAS ---
    {"title": "Baahubali 2", "budget": 30000000, "revenue": 220000000, "runtime": 167, "genres": "['Action', 'Fantasy']", "top_cast": "['Prabhas', 'Rana Daggubati']", "director": "S.S. Rajamouli", "composers": "['M.M. Keeravani']", "vote_average": 8.2, "vote_count": 12000, "release_month": 4},
    {"title": "Salaar: Part 1", "budget": 32000000, "revenue": 85000000, "runtime": 175, "genres": "['Action', 'Thriller']", "top_cast": "['Prabhas', 'Prithviraj Sukumaran']", "director": "Prashanth Neel", "composers": "['Ravi Basrur']", "vote_average": 6.8, "vote_count": 3000, "release_month": 12},
    {"title": "Kalki 2898 AD", "budget": 72000000, "revenue": 140000000, "runtime": 180, "genres": "['Sci-Fi', 'Action']", "top_cast": "['Prabhas', 'Amitabh Bachchan', 'Deepika Padukone']", "director": "Nag Ashwin", "composers": "['Santhosh Narayanan']", "vote_average": 7.8, "vote_count": 5000, "release_month": 6},

    # --- ALLU ARJUN ---
    {"title": "Pushpa 2: The Rule", "budget": 60000000, "revenue": 210000000, "runtime": 200, "genres": "['Action', 'Crime']", "top_cast": "['Allu Arjun', 'Rashmika Mandanna']", "director": "Sukumar", "composers": "['Devi Sri Prasad']", "vote_average": 8.5, "vote_count": 15000, "release_month": 12},
    {"title": "Pushpa: The Rise", "budget": 20000000, "revenue": 45000000, "runtime": 179, "genres": "['Action', 'Drama']", "top_cast": "['Allu Arjun', 'Fahadh Faasil']", "director": "Sukumar", "composers": "['Devi Sri Prasad']", "vote_average": 7.6, "vote_count": 6000, "release_month": 12},
    {"title": "Ala Vaikunthapurramuloo", "budget": 12000000, "revenue": 35000000, "runtime": 165, "genres": "['Action', 'Comedy']", "top_cast": "['Allu Arjun', 'Pooja Hegde']", "director": "Trivikram Srinivas", "composers": "['Thaman S']", "vote_average": 7.3, "vote_count": 2800, "release_month": 1},

    # --- MAHESH BABU ---
    {"title": "Sarileru Neekevvaru", "budget": 9000000, "revenue": 31000000, "runtime": 169, "genres": "['Action', 'Comedy']", "top_cast": "['Mahesh Babu', 'Rashmika Mandanna']", "director": "Anil Ravipudi", "composers": "['Devi Sri Prasad']", "vote_average": 6.8, "vote_count": 2000, "release_month": 1},
    {"title": "Sarkaru Vaari Paata", "budget": 7000000, "revenue": 22000000, "runtime": 162, "genres": "['Action', 'Drama']", "top_cast": "['Mahesh Babu', 'Keerthy Suresh']", "director": "Parasuram", "composers": "['Thaman S']", "vote_average": 6.2, "vote_count": 1500, "release_month": 5},
    {"title": "Guntur Kaaram", "budget": 24000000, "revenue": 21000000, "runtime": 159, "genres": "['Action', 'Drama']", "top_cast": "['Mahesh Babu', 'Sreeleela']", "director": "Trivikram Srinivas", "composers": "['Thaman S']", "vote_average": 5.8, "vote_count": 1200, "release_month": 1},

    # --- JR. NTR ---
    {"title": "RRR", "budget": 65000000, "revenue": 160000000, "runtime": 187, "genres": "['Action', 'Drama']", "top_cast": "['N.T. Rama Rao Jr.', 'Ram Charan']", "director": "S.S. Rajamouli", "composers": "['M.M. Keeravani']", "vote_average": 8.0, "vote_count": 15000, "release_month": 3},
    {"title": "Aravinda Sametha", "budget": 11000000, "revenue": 20000000, "runtime": 162, "genres": "['Action', 'Drama']", "top_cast": "['N.T. Rama Rao Jr.', 'Pooja Hegde']", "director": "Trivikram Srinivas", "composers": "['Thaman S']", "vote_average": 7.5, "vote_count": 2200, "release_month": 10},
    {"title": "Janatha Garage", "budget": 6000000, "revenue": 16000000, "runtime": 162, "genres": "['Action', 'Drama']", "top_cast": "['N.T. Rama Rao Jr.', 'Mohanlal']", "director": "Koratala Siva", "composers": "['Devi Sri Prasad']", "vote_average": 7.3, "vote_count": 2500, "release_month": 9},

    # --- RAM CHARAN ---
    {"title": "Rangasthalam", "budget": 8000000, "revenue": 28000000, "runtime": 170, "genres": "['Action', 'Drama']", "top_cast": "['Ram Charan', 'Samantha Ruth Prabhu']", "director": "Sukumar", "composers": "['Devi Sri Prasad']", "vote_average": 8.2, "vote_count": 3500, "release_month": 3},
    {"title": "Magadheera", "budget": 5000000, "revenue": 18000000, "runtime": 166, "genres": "['Action', 'Fantasy']", "top_cast": "['Ram Charan', 'Kajal Aggarwal']", "director": "S.S. Rajamouli", "composers": "['M.M. Keeravani']", "vote_average": 7.8, "vote_count": 3000, "release_month": 7},

    # ==================== KANNADA (Sandalwood) ====================
    # --- YASH ---
    {"title": "KGF: Chapter 2", "budget": 12000000, "revenue": 150000000, "runtime": 168, "genres": "['Action', 'Drama']", "top_cast": "['Yash', 'Sanjay Dutt']", "director": "Prashanth Neel", "composers": "['Ravi Basrur']", "vote_average": 8.4, "vote_count": 8000, "release_month": 4},
    {"title": "KGF: Chapter 1", "budget": 10000000, "revenue": 30000000, "runtime": 156, "genres": "['Action', 'Drama']", "top_cast": "['Yash', 'Srinidhi Shetty']", "director": "Prashanth Neel", "composers": "['Ravi Basrur']", "vote_average": 8.2, "vote_count": 5000, "release_month": 12},
    
    # --- RISHAB SHETTY ---
    {"title": "Kantara", "budget": 2000000, "revenue": 50000000, "runtime": 148, "genres": "['Action', 'Thriller']", "top_cast": "['Rishab Shetty', 'Kishore']", "director": "Rishab Shetty", "composers": "['B. Ajaneesh Loknath']", "vote_average": 8.3, "vote_count": 7000, "release_month": 9},
]

In [24]:
import os
if os.path.exists(file):
    df = pd.read_csv(file)
    new_df = pd.DataFrame(indian_hits)
    df = pd.concat([df, new_df], ignore_index=True)
    df.to_csv(file, index=False)

In [25]:
import matplotlib.pyplot as plt
import seaborn as sns

In [26]:
df_clean = pd.read_csv('movies_cleaned.csv')