# Building a Movie Recommendation System

## Importing The Necessary Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from warnings import filterwarnings
filterwarnings('ignore')

# Loading The Dataset

In [27]:
movies = pd.read_csv('latest_movies_2024.csv', parse_dates=['release_date'])

In [28]:
movies.shape

(4704, 11)

In [29]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4704 entries, 0 to 4703
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    4704 non-null   object 
 1   id            4697 non-null   object 
 2   title         4697 non-null   object 
 3   overview      3873 non-null   object 
 4   release_date  4664 non-null   object 
 5   popularity    4687 non-null   object 
 6   vote_average  4687 non-null   object 
 7   vote_count    4687 non-null   object 
 8   Unnamed: 8    11 non-null     float64
 9   Unnamed: 9    1 non-null      float64
 10  Unnamed: 10   1 non-null      float64
dtypes: float64(3), object(8)
memory usage: 404.4+ KB


In [30]:
movies.head()

Unnamed: 0.1,Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,0,957452,The Crow,Soulmates Eric and Shelly are brutally murdere...,2024-08-21,4065.642,5.314,287,,,
1,1,519182,Despicable Me 4,"Gru and Lucy and their girls—Margo, Edith and ...",2024-06-20,1743.127,7.169,1670,,,
2,2,365177,Borderlands,"Returning to her home planet, an infamous boun...",2024-08-07,1689.252,5.898,566,,,
3,3,917496,Beetlejuice Beetlejuice,"After a family tragedy, three generations of t...",2024-09-04,1434.179,7.122,641,,,
4,4,646097,Rebel Ridge,A former Marine confronts corruption in a smal...,2024-08-27,1107.009,7.04,633,,,


In [31]:
movies.drop(columns=['Unnamed: 0', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10'], inplace=True)

In [32]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4704 entries, 0 to 4703
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            4697 non-null   object
 1   title         4697 non-null   object
 2   overview      3873 non-null   object
 3   release_date  4664 non-null   object
 4   popularity    4687 non-null   object
 5   vote_average  4687 non-null   object
 6   vote_count    4687 non-null   object
dtypes: object(7)
memory usage: 257.4+ KB


In [33]:
movies.duplicated().sum()

75

In [35]:
movies[movies.duplicated(keep=False)]

Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count
59,1051891,Thelma,When 93-year-old Thelma Post gets duped by a p...,2024-06-21,193.644,7.106,109
60,1051891,Thelma,When 93-year-old Thelma Post gets duped by a p...,2024-06-21,193.644,7.106,109
85,814889,Never Let Go,As an evil takes over the world beyond their f...,2024-09-18,95.913,4.7,3
106,814889,Never Let Go,As an evil takes over the world beyond their f...,2024-09-18,95.913,4.7,3
199,1340571,Lord of the Wolves,When his housemate dies under mysterious circu...,2024-09-20,21.136,0,0
...,...,...,...,...,...,...,...
4692,1359978,Dolors,,2024-09-21,1.4,0,0
4693,1359976,Do It For The Gram,A professor at a prestigious college faces lif...,2024-09-20,1.4,0,0
4694,1359963,Harsh Look,A look into the Boy's average day.,2024-09-21,1.4,0,0
4695,1359954,FEAR,a story about a teenagers who got bullied in s...,2024-09-05,1.4,0,0


In [37]:
movies.drop_duplicates(inplace=True)

In [38]:
movies.duplicated().sum()

0

In [39]:
movies.columns

Index(['id', 'title', 'overview', 'release_date', 'popularity', 'vote_average',
       'vote_count'],
      dtype='object')

In [40]:
movies[['title', 'overview', 'release_date', 'popularity', 'vote_average',
       'vote_count']].duplicated().sum()

0

In [42]:
movies['release_date'].unique()

array(['2024-08-21', '2024-06-20', '2024-08-07', '2024-09-04',
       '2024-08-27', '2024-09-10', '2024-09-11', '2024-08-15',
       '2024-08-13', '2024-04-30', '2024-04-01', '2024-09-07',
       '2024-07-10', '2024-04-23', '2024-08-22', '1988-03-30',
       '2024-07-03', '2024-09-12', '2024-05-31', '2024-04-11',
       '2024-07-31', '2024-03-15', '2024-08-16', '2024-03-14',
       '2024-08-02', '2024-09-05', '2024-05-30', '2024-08-23',
       '2024-09-06', '2023-06-28', '2024-08-01', '2024-03-19',
       '2024-08-28', '2009-02-05', '2024-08-20', '2024-03-22',
       '2024-07-19', '2003-12-17', '2024-04-19', '2001-12-18',
       '2019-10-01', '2024-08-29', '2024-04-24', '2024-05-15',
       '2024-01-18', '2024-01-24', '2024-06-21', '2024-04-26',
       '2024-07-04', '2024-09-19', '2024-08-18', '2023-05-31',
       '2024-07-26', '2024-09-20', '2023-11-03', '2024-04-16',
       '1994-06-24', '2024-04-03', '2024-05-16', '2021-10-13',
       '2024-08-30', '2024-04-04', '2024-03-08', '2004-

In [41]:
movies.head()

Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count
0,957452,The Crow,Soulmates Eric and Shelly are brutally murdere...,2024-08-21,4065.642,5.314,287
1,519182,Despicable Me 4,"Gru and Lucy and their girls—Margo, Edith and ...",2024-06-20,1743.127,7.169,1670
2,365177,Borderlands,"Returning to her home planet, an infamous boun...",2024-08-07,1689.252,5.898,566
3,917496,Beetlejuice Beetlejuice,"After a family tragedy, three generations of t...",2024-09-04,1434.179,7.122,641
4,646097,Rebel Ridge,A former Marine confronts corruption in a smal...,2024-08-27,1107.009,7.04,633


In [43]:
pd.DataFrame({'count': movies.isna().sum(), 'percent': movies.isna().sum()/len(movies)})

Unnamed: 0,count,percent
id,1,0.000216
title,1,0.000216
overview,809,0.174768
release_date,34,0.007345
popularity,11,0.002376
vote_average,11,0.002376
vote_count,11,0.002376


In [44]:
movies[movies['title'].isna()]

Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count
2460,,,,,,,


In [22]:
movies.iloc[2460]

id              NaN
title           NaN
overview        NaN
release_date    NaN
popularity      NaN
vote_average    NaN
vote_count      NaN
Name: 2460, dtype: object

In [45]:
movies.drop(movies[movies['title'].isna()].index, inplace=True)

In [46]:
pd.DataFrame({'count': movies.isna().sum(), 'percent': movies.isna().sum()/len(movies)})

Unnamed: 0,count,percent
id,0,0.0
title,0,0.0
overview,808,0.174589
release_date,33,0.007131
popularity,10,0.002161
vote_average,10,0.002161
vote_count,10,0.002161


In [47]:
movies[movies['overview'].isna()]

Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count
94,1087421,Odio el verano,,2024-08-23,86.451,6.5,2
191,1230477,Given The Movie : Hiiragi Mix,,2024-01-27,37.15,0,0
229,997086,My Next Life as a Villainess: All Routes Lead ...,,2023-12-08,40.545,7.7,5
297,1352344,Anima Kronos,,2024-09-21,15.28,0,0
308,1352843,Olli,,2024-09-22,23.653,0,0
...,...,...,...,...,...,...,...
4666,1360236,Toms romantiska resa,,2024-09-22,1.4,0,0
4671,1360180,Shelpek,,2024-09-23,1.4,0,0
4675,1360101,Der Irland-Krimi: Gnadentod,,2024-09-19,1.4,0,0
4685,1359989,"Jean-Louis Aubert, le chant des possibles",,2024-09-23,1.4,8,1


In [48]:
movies['overview'].fillna("", inplace=True)

In [49]:
movies.isna().sum()

id               0
title            0
overview         0
release_date    33
popularity      10
vote_average    10
vote_count      10
dtype: int64

## Top Popular Movies

In [52]:
movies.popularity.dtype

dtype('O')

In [56]:
movies.popularity.unique()

array(['4065.642', '1743.127', '1689.252', ..., '0.006', '0.001',
       ' a play of the seen and not seen'], dtype=object)

In [58]:
movies[movies.popularity == ' a play of the seen and not seen'].to_dict()

{'id': {4596: '1244588'},
 'title': {4596: 'Is All Around'},
 'overview': {4596: 'A pictorial sense of space presented through frames'},
 'release_date': {4596: ' an illusion between two-dimensionality and three-dimensionality'},
 'popularity': {4596: ' a play of the seen and not seen'},
 'vote_average': {4596: ' a curiosity to the ‘left out’. Is All Around is an abstract impression of Zoey Benschop’s artistic process while making her 2022 work ‘Raamwerk’.'},
 'vote_count': {4596: '2024-09-21'}}

In [67]:
movies[movies.id =='1244588']['overview']

4596    A pictorial sense of space presented through f...
Name: overview, dtype: object

In [68]:
movies[movies.id =='1244588']['overview'] = '''A pictorial sense of space presented through frames, 
an illusion between two-dimensionality and three-dimensionality, a play of the seen and not seen, a curiosity to the ‘left out’. 
Is All Around is an abstract impression of Zoey Benschop’s artistic process while making her 2022 work ‘Raamwerk’.'''

In [70]:
movies[movies.id =='1244588'].to_dict()

{'id': {4596: '1244588'},
 'title': {4596: 'Is All Around'},
 'overview': {4596: 'A pictorial sense of space presented through frames, \nan illusion between two-dimensionality and three-dimensionality, a play of the seen and not seen, a curiosity to the ‘left out’. \nIs All Around is an abstract impression of Zoey Benschop’s artistic process while making her 2022 work ‘Raamwerk’.'},
 'release_date': {4596: ' an illusion between two-dimensionality and three-dimensionality'},
 'popularity': {4596: ' a play of the seen and not seen'},
 'vote_average': {4596: ' a curiosity to the ‘left out’. Is All Around is an abstract impression of Zoey Benschop’s artistic process while making her 2022 work ‘Raamwerk’.'},
 'vote_count': {4596: '2024-09-21'}}

In [87]:
movies[movies.id =='1244588']['popularity']

4596     a play of the seen and not seen
Name: popularity, dtype: object

In [99]:
print(movies.loc[movies['id'] == '1244588', 'popularity'].values)

[' a play of the seen and not seen']


In [100]:
movies.loc[
    (movies['id'] == '1244588') & (movies['popularity'] == " a play of the seen and not seen"),
    'popularity'
] = np.nan

In [101]:
movies[movies.id =='1244588']

Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count
4596,1244588,Is All Around,A pictorial sense of space presented through f...,an illusion between two-dimensionality and th...,,a curiosity to the ‘left out’. Is All Around ...,2024-09-21


In [104]:
movies[movies.id =='1244588']['vote_average'].values

array([' a curiosity to the ‘left out’. Is All Around is an abstract impression of Zoey Benschop’s artistic process while making her 2022 work ‘Raamwerk’.'],
      dtype=object)

In [108]:
movies.loc[
    (movies['id'] == '1244588') & (movies['vote_average'] == ''' a curiosity to the ‘left out’. 
    Is All Around is an abstract impression of Zoey Benschop’s artistic process while making her 2022 work ‘Raamwerk’.'''),
    'vote_average'] = np.nan

In [114]:
movies.loc[movies['id'] =='1244588','vote_average'] = np.nan

In [115]:
movies.loc[movies['id'] =='1244588','vote_average']

4596    NaN
Name: vote_average, dtype: object

In [116]:
movies[movies.id =='1244588'].to_dict()

{'id': {4596: '1244588'},
 'title': {4596: 'Is All Around'},
 'overview': {4596: 'A pictorial sense of space presented through frames, \nan illusion between two-dimensionality and three-dimensionality, a play of the seen and not seen, a curiosity to the ‘left out’. \nIs All Around is an abstract impression of Zoey Benschop’s artistic process while making her 2022 work ‘Raamwerk’.'},
 'release_date': {4596: ' an illusion between two-dimensionality and three-dimensionality'},
 'popularity': {4596: nan},
 'vote_average': {4596: nan},
 'vote_count': {4596: '2024-09-21'}}

In [119]:
movies.loc[(movies.id =='1244588'), 'release_date'] = '2024-09-21'

In [122]:
movies.loc[(movies.id =='1244588'), 'vote_count'] = np.nan

In [123]:
movies[movies.id =='1244588'].to_dict()

{'id': {4596: '1244588'},
 'title': {4596: 'Is All Around'},
 'overview': {4596: 'A pictorial sense of space presented through frames, \nan illusion between two-dimensionality and three-dimensionality, a play of the seen and not seen, a curiosity to the ‘left out’. \nIs All Around is an abstract impression of Zoey Benschop’s artistic process while making her 2022 work ‘Raamwerk’.'},
 'release_date': {4596: '2024-09-21'},
 'popularity': {4596: nan},
 'vote_average': {4596: nan},
 'vote_count': {4596: nan}}

In [127]:
movies['popularity'].unique().tolist()

['4065.642',
 '1743.127',
 '1689.252',
 '1434.179',
 '1107.009',
 '1266.199',
 '790.983',
 '782.071',
 '752.785',
 '773.751',
 '814.208',
 '640.237',
 '794.177',
 '768.78',
 '884.199',
 '770.783',
 '660.174',
 '761.713',
 '685.708',
 '541.325',
 '655.268',
 '496.178',
 '546.316',
 '500.909',
 '521.851',
 '548.164',
 '517.109',
 '466.158',
 '537.666',
 '525.65',
 '460.661',
 '466.638',
 '351.64',
 '395.189',
 '444.023',
 '428.462',
 '351.287',
 '344.934',
 '371.659',
 '331.123',
 '322.769',
 '351.12',
 '337.745',
 '254.234',
 '324.17',
 '265.186',
 '275.311',
 '283.379',
 '264.842',
 '292.162',
 '198.343',
 '223.664',
 '188.202',
 '184.341',
 '233.185',
 '185.686',
 '204.963',
 '226.303',
 '181.877',
 '193.644',
 '208.177',
 '190.072',
 '203.382',
 '140.737',
 '201.566',
 '173.788',
 '133.576',
 '182.907',
 '149.984',
 '149.049',
 '124.062',
 '131.119',
 '149.509',
 '126.251',
 '125.831',
 '113.221',
 '156.552',
 '109.352',
 '137.888',
 '108.337',
 '114.309',
 '117.67',
 '127.092',
 '13

In [128]:
movies[movies['popularity'] == '2024-09-18']

Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count
1081,1325425,Home Free,Three estranged adult sisters return home for ...,his final wish to have one last perfect weeke...,2024-09-18,5.976,0


In [129]:
movies[movies['id'] == '1325425'].to_dict()

{'id': {1081: '1325425'},
 'title': {1081: 'Home Free'},
 'overview': {1081: 'Three estranged adult sisters return home for their parents anniversary to learn their father is dying'},
 'release_date': {1081: ' his final wish to have one last perfect weekend sends everyone into a whirlwind of buried family secrets and conflict.'},
 'popularity': {1081: '2024-09-18'},
 'vote_average': {1081: '5.976'},
 'vote_count': {1081: '0'}}

In [130]:
movies.loc[movies['id'] == '1325425', 'overview'] = '''Three estranged adult sisters return home for their 
parents anniversary to learn their father is dying, his final wish to have one last perfect weekend sends 
everyone into a whirlwind of buried family secrets and conflict.'''

In [135]:
movies.loc[movies['id'] == '1325425', 'release_date'] = '2024-09-18'

In [136]:
movies.loc[movies['id'] == '1325425', 'popularity'] = np.nan

In [137]:
movies[movies['id'] == '1325425'].to_dict()

{'id': {1081: '1325425'},
 'title': {1081: 'Home Free'},
 'overview': {1081: 'Three estranged adult sisters return home for their \nparents anniversary to learn their father is dying, his final wish to have one last perfect weekend sends \neveryone into a whirlwind of buried family secrets and conflict.'},
 'release_date': {1081: '2024-09-18'},
 'popularity': {1081: nan},
 'vote_average': {1081: '5.976'},
 'vote_count': {1081: '0'}}

In [144]:
movies[movies['popularity'] == '2024-09-09']

Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count
1558,1354818,Several Successful Situations,Simultaneous & Successive,"I enjoy religion, I appreciate belief systems ...",2024-09-09,4.515,0
1561,1354818,Several Successful Situations,Simultaneous & Successive,"I enjoy religion, I appreciate belief systems ...",2024-09-09,3.098,0


In [140]:
movies[movies['popularity'] == '2024-09-09'].to_dict()

{'id': {1558: '1354818', 1561: '1354818'},
 'title': {1558: 'Several Successful Situations',
  1561: 'Several Successful Situations'},
 'overview': {1558: ' Simultaneous & Successive',
  1561: ' Simultaneous & Successive'},
 'release_date': {1558: "I enjoy religion, I appreciate belief systems and how they offer structure to people's lives. I also appreciate how spirituality manifests itself in Asian cultures as this almost earthbound presence guiding people through every day life and when they need an extra bit of help they need only ask whichever deity holds dominion over their desire.  Here is an experimental film I made with videos from my iPhone. Shot across Taiwan and South Korea.  An experimental film I made with videos from my iPhone. Shot across Taiwan and Korea. My aim was to explore success in how it pertains to every day life, the satisfaction of small moments, spirituality, superstition, and daily rituals.",
  1561: "I enjoy religion, I appreciate belief systems and how th

In [150]:
movies.drop(1561, inplace=True)

In [151]:
movies[movies['popularity'] == '2024-09-09']

Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count
1558,1354818,Several Successful Situations,Simultaneous & Successive,"I enjoy religion, I appreciate belief systems ...",2024-09-09,4.515,0


In [154]:
movies[movies['id'] == '1354818'].to_dict()

{'id': {1558: '1354818'},
 'title': {1558: 'Several Successful Situations'},
 'overview': {1558: ' Simultaneous & Successive'},
 'release_date': {1558: "I enjoy religion, I appreciate belief systems and how they offer structure to people's lives. I also appreciate how spirituality manifests itself in Asian cultures as this almost earthbound presence guiding people through every day life and when they need an extra bit of help they need only ask whichever deity holds dominion over their desire.  Here is an experimental film I made with videos from my iPhone. Shot across Taiwan and South Korea.  An experimental film I made with videos from my iPhone. Shot across Taiwan and Korea. My aim was to explore success in how it pertains to every day life, the satisfaction of small moments, spirituality, superstition, and daily rituals."},
 'popularity': {1558: '2024-09-09'},
 'vote_average': {1558: '4.515'},
 'vote_count': {1558: '0'}}

In [159]:
movies.loc[1558, 'overview'] = '''Simultaneous & Successive. An experimental film I made with videos from my iPhone. 
Shot across Taiwan and Korea. My aim was to explore success in how it pertains to every day life, 
the satisfaction of small moments, spirituality, superstition, and daily rituals.'''

In [163]:
movies.loc[1558, 'release_date'] = '2024-09-09'
movies.loc[1558, 'popularity'] = np.nan

In [164]:
movies[movies['id'] == '1354818'].to_dict()

{'id': {1558: '1354818'},
 'title': {1558: 'Several Successful Situations'},
 'overview': {1558: 'Simultaneous & Successive. An experimental film I made with videos from my iPhone. \nShot across Taiwan and Korea. My aim was to explore success in how it pertains to every day life, \nthe satisfaction of small moments, spirituality, superstition, and daily rituals.'},
 'release_date': {1558: '2024-09-09'},
 'popularity': {1558: nan},
 'vote_average': {1558: '4.515'},
 'vote_count': {1558: '0'}}

In [172]:
movies[movies['popularity']== '2024-08-29'].to_dict()

{'id': {1793: '749116'},
 'title': {1793: 'Cidade'},
 'overview': {1793: ' Campo'},
 'release_date': {1793: 'Two tales of migration. In the first,\u202fafter a tailings dam disaster floods her hometown, rural worker Joana (55) moves to São Paulo\u202fto find her sister Tania, who lives with her grandson Jaime. Joana enters the universe of insecurity, replying to an application for house cleaning. She bonds with her colleagues, and their struggle for better conditions gives Joana’s life a new meaning. Her relationship with young Jaime brings back old memories. In the second part, after the death of her estranged father, Flavia (32) moves to her farm with her wife Mara. The couple suffer a shock of reality when facing the harshness of rural life. The contact with the abandoned house reveals to Flavia unknown aspects of her father. She begins to suspect that there is something supernatural in the woods.'},
 'popularity': {1793: '2024-08-29'},
 'vote_average': {1793: '3.913'},
 'vote_coun

In [174]:
movies.loc[1793, 'title'] = 'Cidade; Campo'

In [175]:
movies.loc[1793, 'overview'] = '''Two stories of migration, memories and ghosts. After a disaster floods her land, Joana flees to São Paulo and tries to start her life over again. Meanwhile, after her father's death, Flavia moves to his farm with her wife Mara.'''

In [176]:
movies.loc[1793, 'popularity'] = np.nan
movies.loc[1793, 'release_date'] = '2024-08-29'

In [178]:
movies.loc[1793].to_dict()

{'id': '749116',
 'title': 'Cidade; Campo',
 'overview': "Two stories of migration, memories and ghosts. After a disaster floods her land, Joana flees to São Paulo and tries to start her life over again. Meanwhile, after her father's death, Flavia moves to his farm with her wife Mara.",
 'release_date': '2024-08-29',
 'popularity': nan,
 'vote_average': '3.913',
 'vote_count': '7'}

In [193]:
def is_date(value):
    try:
        pd.to_datetime(value, errors='raise')  # Try to parse as a date
        return True
    except:
        return False

In [195]:
date_mask = movies['popularity'].apply(is_date)

In [198]:
movies[date_mask]['popularity']

932            NaN
933            NaN
1081           NaN
1558           NaN
1793           NaN
2073    2024-09-15
2114           NaN
2115           NaN
2458    2024-09-11
2459           NaN
2464           NaN
2654    2024-08-21
2937    2024-09-10
3372           NaN
3374           NaN
3881    2022-11-05
4326           NaN
4329           NaN
4428    2024-08-16
4596           NaN
Name: popularity, dtype: object

In [201]:
movies[movies['popularity']== '2024-09-15'].to_dict()

{'id': {2073: '1357084'},
 'title': {2073: 'Taxi Diary'},
 'overview': {2073: 'A diary entry'},
 'release_date': {2073: ' a moment of introspection while transitioning from one environment to the next.'},
 'popularity': {2073: '2024-09-15'},
 'vote_average': {2073: '2.993'},
 'vote_count': {2073: '0'}}

In [203]:
# The movie is not found in internet. There is a similar movie title Taxi Driver which is from 1976 or so and 
# definitely not from 2024. Thus deleting this one.

movies.drop(2703, inplace=True)

In [204]:
movies[movies['popularity']=='2024-09-11'].to_dict()

{'id': {2458: '1337726'},
 'title': {2458: 'Chileans of the North'},
 'overview': {2458: 'The story of the Chilean refugees who came to Sheffield and Rotherham in the 1970s'},
 'release_date': {2458: ' why they came and what had happened in Chile.'},
 'popularity': {2458: '2024-09-11'},
 'vote_average': {2458: '2.077'},
 'vote_count': {2458: '0'}}

In [206]:
movies.loc[2458, 'overview'] = 'The story of the Chilean refugees who came to Sheffield and Rotherham in the 1970s, why they came and what had happened in Chile.'

In [209]:
movies.loc[2458, 'popularity'] = np.nan
movies.loc[2458, 'release_date'] = '2024-09-11'

In [211]:
movies[movies['popularity']== '2024-08-21'].to_dict()

{'id': {2654: '1324764'},
 'title': {2654: 'Конец фильма'},
 'overview': {2654: 'Slava and Katya come to the forest for a picnic. This is an important day for Slava'},
 'release_date': {2654: ' he proposes marriage to Katya. But this fabulous walk will have an unexpected ending.'},
 'popularity': {2654: '2024-08-21'},
 'vote_average': {2654: '1.55'},
 'vote_count': {2654: '0'}}

In [212]:
movies.loc[2654, 'title'] = 'End of the Film (Конец фильма)'
movies.loc[2654, 'overview'] = 'Slava and Katya come to the forest for a picnic. This is an important day for Slava; he proposes marriage to Katya. But this fabulous walk will have an unexpected ending.'
movies.loc[2654, 'popularity'] = np.nan
movies.loc[2654, 'release_date'] = '2024-08-21'

In [213]:
movies.loc[2654]

id                                                        1324764
title                              End of the Film (Конец фильма)
overview        Slava and Katya come to the forest for a picni...
release_date                                           2024-08-21
popularity                                                    NaN
vote_average                                                 1.55
vote_count                                                      0
Name: 2654, dtype: object

In [214]:
movies[movies['popularity']== '2024-09-10'].to_dict()

{'id': {2937: '1358591'},
 'title': {2937: '2 Super 2 Bad'},
 'overview': {2937: 'Young Sophomore Fogell is forced to spend his school day without his phone when he discovers the best thing in the world'},
 'release_date': {2937: ' BEER.'},
 'popularity': {2937: '2024-09-10'},
 'vote_average': {2937: '2.109'},
 'vote_count': {2937: '0'}}

In [215]:
movies.loc[2937, 'overview'] = 'Young Sophomore Fogell is forced to spend his school day without his phone when he discovers the best thing in the world; BEER.'
movies.loc[2937, 'popularity'] = np.nan
movies.loc[2937, 'release_date'] = '2024-09-10'

In [216]:
movies.loc[2937]

id                                                        1358591
title                                               2 Super 2 Bad
overview        Young Sophomore Fogell is forced to spend his ...
release_date                                           2024-09-10
popularity                                                    NaN
vote_average                                                2.109
vote_count                                                      0
Name: 2937, dtype: object

In [219]:
movies[movies.popularity=='2022-11-05'].to_dict()

{'id': {3881: '993558'},
 'title': {3881: 'Smalltown Boys'},
 'overview': {3881: 'A young man seems to be dreaming of a reality different from the business he is about to take over and the married life that goes with it'},
 'release_date': {3881: ' he is drawn to the spicy life of a queer vaudeville troupe performing in his village.'},
 'popularity': {3881: '2022-11-05'},
 'vote_average': {3881: '0.815'},
 'vote_count': {3881: '5'}}

In [223]:
movies.loc[3881, 'overview'] = 'A young man appears to be imagining a world distinct from the business he is set to take control of and the married life that comes with it. He gets attracted to the vibrant life of a queer vaudeville troupe performing in his village.'
movies.loc[3881, 'popularity'] = np.nan
movies.loc[3881, 'release_date'] = '2022-11-05'

In [225]:
movies[movies.popularity == '2024-08-16'].to_dict()

{'id': {4428: '1327929'},
 'title': {4428: 'demarcations'},
 'overview': {4428: ' you have black eyes (cheshmeh siya daree)'},
 'release_date': {4428: 'Drawing its title from a song by Afghan singer Ahmad Zahir, "you have black eyes" is a stream-of-consciousness montage depicting collective bodies in a constant state of disruption, movement, processing, and grieving. It celebrates the body as a historical, domestic site of resistance through dance and movement including found footage of ants carrying flowers, rubab player Ustad Beltoon interrupted, and Hazara throat singing across an imaginary landscape.'},
 'popularity': {4428: '2024-08-16'},
 'vote_average': {4428: '0.948'},
 'vote_count': {4428: '0'}}

In [226]:
movies.loc[4428, 'title'] = 'demarcations; you have black eyes (cheshmeh siya daree)'
movies.loc[4428, 'popularity'] = np.nan
movies.loc[4428, 'release_date'] = '2024-08-16'
movies.loc[4428, 'overview'] = 'Drawing its title from a song by Afghan singer Ahmad Zahir, "you have black eyes" is a stream-of-consciousness montage depicting collective bodies in a constant state of disruption, movement, processing, and grieving. It celebrates the body as a historical, domestic site of resistance through dance and movement including found footage of ants carrying flowers, rubab player Ustad Beltoon interrupted, and Hazara throat singing across an imaginary landscape.'

In [228]:
movies[movies.popularity == '2024-09-15']

Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count
2073,1357084,Taxi Diary,A diary entry,a moment of introspection while transitioning...,2024-09-15,2.993,0


In [229]:
movies.drop(2073, inplace=True)

In [230]:
movies['popularity'] = movies['popularity'].astype('float')

In [231]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4625 entries, 0 to 4703
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            4625 non-null   object 
 1   title         4625 non-null   object 
 2   overview      4625 non-null   object 
 3   release_date  4592 non-null   object 
 4   popularity    4606 non-null   float64
 5   vote_average  4614 non-null   object 
 6   vote_count    4614 non-null   object 
dtypes: float64(1), object(6)
memory usage: 289.1+ KB


In [234]:
movies['vote_average'].fillna(0, inplace=True)
movies['vote_count'].fillna(0, inplace=True)
movies['vote_average'] = movies['vote_average'].astype('float')
movies['vote_count'] = movies['vote_count'].astype('int')

In [235]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4625 entries, 0 to 4703
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            4625 non-null   object 
 1   title         4625 non-null   object 
 2   overview      4625 non-null   object 
 3   release_date  4592 non-null   object 
 4   popularity    4606 non-null   float64
 5   vote_average  4625 non-null   float64
 6   vote_count    4625 non-null   int32  
dtypes: float64(2), int32(1), object(4)
memory usage: 271.0+ KB


In [236]:
# movies.to_csv('cleaned_latest_movies.csv')