# Date Night Movie

In this assignment we are going to use pandas to figure out - What's the best **date-night movie**?

This assignment is going to use
- Joining
- Groupby
- Sorting


In [1]:
import os
import pandas as pd

##### Read in the movie data: `pd.read_table`

In [2]:
def get_movie_data():
    
    unames = ['user_id','gender','age','occupation','zip']
    users = pd.read_table(os.path.join('data','users.dat'), 
                          sep='::', header=None, names=unames, encoding='latin-1')
    
    rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
    ratings = pd.read_table(os.path.join('data', 'ratings.dat'), 
                            sep='::', header=None, names=rnames, encoding='latin-1')
    
    mnames = ['movie_id', 'title','genres']
    movies = pd.read_table(os.path.join('data', 'movies.dat'), 
                           sep='::', header=None, names=mnames, encoding='latin-1')

    return users, ratings, movies

In [3]:
users, ratings, movies = get_movie_data()

  users = pd.read_table(os.path.join('data','users.dat'),
  ratings = pd.read_table(os.path.join('data', 'ratings.dat'),
  movies = pd.read_table(os.path.join('data', 'movies.dat'),


In [4]:
print(users.head())

   user_id gender  age  occupation    zip
0        1      F    1          10  48067
1        2      M   56          16  70072
2        3      M   25          15  55117
3        4      M   45           7  02460
4        5      M   25          20  55455


In [5]:
print(ratings.head())

   user_id  movie_id  rating  timestamp
0        1      1193       5  978300760
1        1       661       3  978302109
2        1       914       3  978301968
3        1      3408       4  978300275
4        1      2355       5  978824291


In [6]:
print(movies.head())

   movie_id                               title                        genres
0         1                    Toy Story (1995)   Animation|Children's|Comedy
1         2                      Jumanji (1995)  Adventure|Children's|Fantasy
2         3             Grumpier Old Men (1995)                Comedy|Romance
3         4            Waiting to Exhale (1995)                  Comedy|Drama
4         5  Father of the Bride Part II (1995)                        Comedy


##### Clean up the `movies`

- Get the `year`
- Shorten the `title`


In [7]:
tmp = movies.title.str.extract('(.*) \(([0-9]+)\)')
tmp.apply(lambda x:x[0] if len(x) > 0 else None)
tmp.apply(lambda x: x[0][:40] if len(x) > 0 else None)

0    Toy Story
1         1995
dtype: object

In [8]:
movies['year'] = tmp[1]
movies['short_title'] = tmp[0]

In [9]:
print(movies.head())

   movie_id                               title                        genres  \
0         1                    Toy Story (1995)   Animation|Children's|Comedy   
1         2                      Jumanji (1995)  Adventure|Children's|Fantasy   
2         3             Grumpier Old Men (1995)                Comedy|Romance   
3         4            Waiting to Exhale (1995)                  Comedy|Drama   
4         5  Father of the Bride Part II (1995)                        Comedy   

   year                  short_title  
0  1995                    Toy Story  
1  1995                      Jumanji  
2  1995             Grumpier Old Men  
3  1995            Waiting to Exhale  
4  1995  Father of the Bride Part II  


##### Join the tables with `pd.merge` (20 pts)

In [10]:
birlestirme=pd.merge(ratings,movies,how='outer', on='movie_id')
birlestirme_son=pd.merge(birlestirme,users, on='user_id')
birlestirme_son

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,year,short_title,gender,age,occupation,zip
0,1.0,1193,5.0,978300760.0,One Flew Over the Cuckoo's Nest (1975),Drama,1975,One Flew Over the Cuckoo's Nest,F,1,10,48067
1,1.0,661,3.0,978302109.0,James and the Giant Peach (1996),Animation|Children's|Musical,1996,James and the Giant Peach,F,1,10,48067
2,1.0,914,3.0,978301968.0,My Fair Lady (1964),Musical|Romance,1964,My Fair Lady,F,1,10,48067
3,1.0,3408,4.0,978300275.0,Erin Brockovich (2000),Drama,2000,Erin Brockovich,F,1,10,48067
4,1.0,2355,5.0,978824291.0,"Bug's Life, A (1998)",Animation|Children's|Comedy,1998,"Bug's Life, A",F,1,10,48067
...,...,...,...,...,...,...,...,...,...,...,...,...
1000204,4211.0,3791,2.0,965319075.0,Footloose (1984),Drama,1984,Footloose,M,45,5,77662
1000205,4211.0,3806,3.0,965319138.0,MacKenna's Gold (1969),Western,1969,MacKenna's Gold,M,45,5,77662
1000206,4211.0,3840,4.0,965319197.0,Pumpkinhead (1988),Horror,1988,Pumpkinhead,M,45,5,77662
1000207,4211.0,3766,2.0,965319138.0,Missing in Action (1984),Action|War,1984,Missing in Action,M,45,5,77662


##### What's the highest rated movie? (20 pts))


In [11]:
birlestirme_son.sort_values(by=['rating'],ascending=False).head(20)



Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,year,short_title,gender,age,occupation,zip
0,1.0,1193,5.0,978300760.0,One Flew Over the Cuckoo's Nest (1975),Drama,1975,One Flew Over the Cuckoo's Nest,F,1,10,48067
327573,4344.0,1366,5.0,966272853.0,"Crucible, The (1996)",Drama,1996,"Crucible, The",M,25,1,44240
327567,4344.0,3655,5.0,965233731.0,Blow-Out (La Grande Bouffe) (1973),Drama,1973,Blow-Out (La Grande Bouffe),M,25,1,44240
327564,4344.0,3201,5.0,965236536.0,Five Easy Pieces (1970),Drama,1970,Five Easy Pieces,M,25,1,44240
830521,2995.0,2033,5.0,970696865.0,"Black Cauldron, The (1985)",Animation|Children's,1985,"Black Cauldron, The",M,25,15,97333
327559,4344.0,1228,5.0,965234657.0,Raging Bull (1980),Drama,1980,Raging Bull,M,25,1,44240
327554,4344.0,1173,5.0,965242065.0,"Cook the Thief His Wife & Her Lover, The (1989)",Drama,1989,"Cook the Thief His Wife & Her Lover, The",M,25,1,44240
830530,2995.0,1262,5.0,970684869.0,"Great Escape, The (1963)",Adventure|War,1963,"Great Escape, The",M,25,15,97333
830539,2995.0,1587,5.0,970691672.0,Conan the Barbarian (1982),Action|Adventure,1982,Conan the Barbarian,M,25,15,97333
830543,2995.0,1396,5.0,970683817.0,Sneakers (1992),Crime|Drama|Sci-Fi,1992,Sneakers,M,25,15,97333


In [12]:
birlestirme_son.duplicated().sum()

0

night###### What is a good rated movie for date ? (60 pts)

- Hint - highly rated movie by 
    - both partners (might be the same gender or not),
    - based on genre preferences,
    - age group can also be combined

In [13]:
birlestirme_son['genres'].value_counts()

Comedy                       116883
Drama                        111423
Comedy|Romance                42712
Comedy|Drama                  42245
Drama|Romance                 29170
                              ...  
Drama|Romance|Western            29
Children's|Fantasy               27
Comedy|Film-Noir|Thriller         5
Film-Noir|Horror                  2
Fantasy                           1
Name: genres, Length: 301, dtype: int64

In [14]:
y=birlestirme_son['rating'].max()
y

5.0

In [15]:
retur=birlestirme_son[birlestirme_son['rating']==y]
retur

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,year,short_title,gender,age,occupation,zip
0,1.0,1193,5.0,978300760.0,One Flew Over the Cuckoo's Nest (1975),Drama,1975,One Flew Over the Cuckoo's Nest,F,1,10,48067
4,1.0,2355,5.0,978824291.0,"Bug's Life, A (1998)",Animation|Children's|Comedy,1998,"Bug's Life, A",F,1,10,48067
6,1.0,1287,5.0,978302039.0,Ben-Hur (1959),Action|Adventure|Drama,1959,Ben-Hur,F,1,10,48067
7,1.0,2804,5.0,978300719.0,"Christmas Story, A (1983)",Comedy|Drama,1983,"Christmas Story, A",F,1,10,48067
10,1.0,595,5.0,978824268.0,Beauty and the Beast (1991),Animation|Children's|Musical,1991,Beauty and the Beast,F,1,10,48067
...,...,...,...,...,...,...,...,...,...,...,...,...
1000128,2982.0,1248,5.0,970936849.0,Touch of Evil (1958),Crime|Film-Noir|Thriller,1958,Touch of Evil,M,25,4,54902
1000131,2982.0,3683,5.0,970936885.0,Blood Simple (1984),Drama|Film-Noir,1984,Blood Simple,M,25,4,54902
1000132,2982.0,3706,5.0,970936919.0,Angel Heart (1987),Film-Noir|Mystery|Thriller,1987,Angel Heart,M,25,4,54902
1000172,3893.0,3718,5.0,965794040.0,American Pimp (1999),Documentary,1999,American Pimp,M,25,6,79401


In [16]:
return1=retur.sort_values(by=['rating'],ascending=False)
return1

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,year,short_title,gender,age,occupation,zip
0,1.0,1193,5.0,978300760.0,One Flew Over the Cuckoo's Nest (1975),Drama,1975,One Flew Over the Cuckoo's Nest,F,1,10,48067
694740,5700.0,1393,5.0,958588450.0,Jerry Maguire (1996),Drama|Romance,1996,Jerry Maguire,F,18,14,55426
694707,5691.0,1017,5.0,958599228.0,Swiss Family Robinson (1960),Adventure|Children's,1960,Swiss Family Robinson,M,35,14,66013
694713,5691.0,2615,5.0,958599666.0,My Science Project (1985),Adventure|Sci-Fi,1985,My Science Project,M,35,14,66013
694716,5691.0,362,5.0,958599014.0,"Jungle Book, The (1994)",Adventure|Children's|Romance,1994,"Jungle Book, The",M,35,14,66013
...,...,...,...,...,...,...,...,...,...,...,...,...
356614,4708.0,1104,5.0,963935986.0,"Streetcar Named Desire, A (1951)",Drama,1951,"Streetcar Named Desire, A",M,25,0,60647
356615,4708.0,953,5.0,963525983.0,It's a Wonderful Life (1946),Drama,1946,It's a Wonderful Life,M,25,0,60647
356616,4708.0,954,5.0,963525553.0,Mr. Smith Goes to Washington (1939),Drama,1939,Mr. Smith Goes to Washington,M,25,0,60647
356618,4708.0,1633,5.0,963513815.0,Ulee's Gold (1997),Drama,1997,Ulee's Gold,M,25,0,60647


In [17]:
return2=return1['genres'].value_counts().sort_values(ascending=False)
return2

Drama                                  29815
Comedy                                 22719
Comedy|Drama                           10998
Comedy|Romance                          8091
Drama|Romance                           6176
                                       ...  
Comedy|Mystery|Romance                     3
Drama|Romance|Western                      1
Action|Adventure|Comedy|War                1
Children's|Fantasy                         1
Action|Adventure|Children's|Fantasy        1
Name: genres, Length: 296, dtype: int64

In [18]:
return2.index[0]

'Drama'

In [19]:
return3=return1[return1['genres']==return2.index[0]]
return3

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,year,short_title,gender,age,occupation,zip
0,1.0,1193,5.0,978300760.0,One Flew Over the Cuckoo's Nest (1975),Drama,1975,One Flew Over the Cuckoo's Nest,F,1,10,48067
694719,5700.0,3408,5.0,958588215.0,Erin Brockovich (2000),Drama,2000,Erin Brockovich,F,18,14,55426
694733,5700.0,318,5.0,958588429.0,"Shawshank Redemption, The (1994)",Drama,1994,"Shawshank Redemption, The",F,18,14,55426
694940,5718.0,150,5.0,958509442.0,Apollo 13 (1995),Drama,1995,Apollo 13,F,35,14,38018
694954,5718.0,515,5.0,958509528.0,"Remains of the Day, The (1993)",Drama,1993,"Remains of the Day, The",F,35,14,38018
...,...,...,...,...,...,...,...,...,...,...,...,...
356614,4708.0,1104,5.0,963935986.0,"Streetcar Named Desire, A (1951)",Drama,1951,"Streetcar Named Desire, A",M,25,0,60647
356615,4708.0,953,5.0,963525983.0,It's a Wonderful Life (1946),Drama,1946,It's a Wonderful Life,M,25,0,60647
356616,4708.0,954,5.0,963525553.0,Mr. Smith Goes to Washington (1939),Drama,1939,Mr. Smith Goes to Washington,M,25,0,60647
356618,4708.0,1633,5.0,963513815.0,Ulee's Gold (1997),Drama,1997,Ulee's Gold,M,25,0,60647


In [20]:
return3['gender'].max()

'M'

In [21]:
return4=return3[return3['gender']==return3['gender'].max()]
return4

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,year,short_title,gender,age,occupation,zip
694826,5703.0,1228,5.0,958575531.0,Raging Bull (1980),Drama,1980,Raging Bull,M,56,1,14068
694831,5714.0,3408,5.0,958511738.0,Erin Brockovich (2000),Drama,2000,Erin Brockovich,M,35,2,96753
694589,5691.0,3408,5.0,958597365.0,Erin Brockovich (2000),Drama,2000,Erin Brockovich,M,35,14,66013
695781,5776.0,2959,5.0,958171170.0,Fight Club (1999),Drama,1999,Fight Club,M,18,12,97205
695627,5766.0,2336,5.0,958177673.0,Elizabeth (1998),Drama,1998,Elizabeth,M,56,14,96744
...,...,...,...,...,...,...,...,...,...,...,...,...
356614,4708.0,1104,5.0,963935986.0,"Streetcar Named Desire, A (1951)",Drama,1951,"Streetcar Named Desire, A",M,25,0,60647
356615,4708.0,953,5.0,963525983.0,It's a Wonderful Life (1946),Drama,1946,It's a Wonderful Life,M,25,0,60647
356616,4708.0,954,5.0,963525553.0,Mr. Smith Goes to Washington (1939),Drama,1939,Mr. Smith Goes to Washington,M,25,0,60647
356618,4708.0,1633,5.0,963513815.0,Ulee's Gold (1997),Drama,1997,Ulee's Gold,M,25,0,60647


In [22]:
return4['age'].max()

56

In [23]:
return5=return4[return4['age']==return4['age'].max()]
return5

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,year,short_title,gender,age,occupation,zip
694826,5703.0,1228,5.0,958575531.0,Raging Bull (1980),Drama,1980,Raging Bull,M,56,1,14068
695627,5766.0,2336,5.0,958177673.0,Elizabeth (1998),Drama,1998,Elizabeth,M,56,14,96744
695645,5766.0,1271,5.0,958178232.0,Fried Green Tomatoes (1991),Drama,1991,Fried Green Tomatoes,M,56,14,96744
695668,5766.0,1280,5.0,958177026.0,Raise the Red Lantern (1991),Drama,1991,Raise the Red Lantern,M,56,14,96744
695669,5766.0,3179,5.0,958177703.0,Angela's Ashes (1999),Drama,1999,Angela's Ashes,M,56,14,96744
...,...,...,...,...,...,...,...,...,...,...,...,...
356217,4694.0,3468,5.0,963603010.0,"Hustler, The (1961)",Drama,1961,"Hustler, The",M,56,7,40505
356222,4694.0,1225,5.0,963603063.0,Amadeus (1984),Drama,1984,Amadeus,M,56,7,40505
356247,4694.0,1104,5.0,963603338.0,"Streetcar Named Desire, A (1951)",Drama,1951,"Streetcar Named Desire, A",M,56,7,40505
356248,4694.0,1203,5.0,963603394.0,12 Angry Men (1957),Drama,1957,12 Angry Men,M,56,7,40505


In [24]:
#En meslehetli film
return6=return5['title'].max()
return6



'World of Apu, The (Apur Sansar) (1959)'

In [25]:
#En meslehetli film
[return5[return5['title']==return6]][0]

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,year,short_title,gender,age,occupation,zip
414312,5517.0,670,5.0,959702082.0,"World of Apu, The (Apur Sansar) (1959)",Drama,1959,"World of Apu, The (Apur Sansar)",M,56,0,80210
