In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import sys
sys.path.append("../")
import src.Resources as src

In [2]:
movies = pd.read_csv('../Data/Movies_clean.csv', index_col= 0)  
books = pd.read_csv('../Data/Books_clean.csv', index_col= 0)  

In [3]:
movies["Image"][3]

'https://m.media-amazon.com/images/S/sash/4FyxwxECzL-U1J8.png'

In [4]:
movies["Directors"]

0          John Francis Daley| Jonathan Goldstein 
1                                  Chad Stahelski 
2                                 Jeremy Garelick 
3                                   James Cameron 
4                                  Kyle Newacheck 
                            ...                   
130080             Jessica Kitrick| Lewis Lovhaug 
130081                              Tristan Price 
130082                              Aman Sachdeva 
130083                            Riccardo Ghione 
130084                               Edward Conna 
Name: Directors, Length: 130085, dtype: object

In [5]:
movies.columns = ["Title","Image","Genre","Description","Rating","Votes","Directors","Stars"]
books.columns = ["Author","Image","Description","Rating","Votes","Title","Genre"]

In [6]:
movies["Stars"].fillna("Nostars",inplace=True)
movies.drop_duplicates(inplace=True)
movies = movies.drop_duplicates(subset="Title")
movies = movies.reset_index()
movies = movies.drop("index",axis=1)

books["Description"].fillna("No description",inplace=True)
books.drop_duplicates(inplace=True)
books = books.drop_duplicates(subset="Title")
books = books.reset_index()
books = books.drop("index",axis=1)

In [7]:
director = movies["Directors"].apply(lambda x: x.split("|"))
m_genre = movies["Genre"].apply(lambda x: x.split("|"))
stars = movies["Stars"].apply(lambda x: x.split("|"))
b_genre = books["Genre"].apply(lambda x: x.split("|"))

In [8]:
n_directors = len(director.explode().unique())
movie_genres = len(m_genre.explode().unique())
movie_stars = len(stars.explode().unique())
book_genre = len(b_genre.explode().unique())
n_authors = books["Author"].nunique()
n_titles = books["Title"].nunique()
n_titles_m = movies["Title"].nunique()

In [9]:
Bignumbers = pd.DataFrame([n_authors], columns=["Book_Authors"])
Bignumbers["Movie_directors"] = n_directors
Bignumbers["Movie_stars"] = movie_stars
Bignumbers["Book_titles"] = n_titles
Bignumbers["Movie_titles"] = n_titles_m
Bignumbers["Book_genres"] = book_genre
Bignumbers["Movie_genres"] = movie_genres

In [10]:
Bignumbers

Unnamed: 0,Book_Authors,Movie_directors,Movie_stars,Book_titles,Movie_titles,Book_genres,Movie_genres
0,5219,41338,129173,12527,77900,414,28


In [11]:
movies_1 = movies[movies["Votes"]>1000]
books_1 = books[books["Votes"]>1000]

In [12]:
max_m =movies_1["Rating"].max()
min_m = movies_1["Rating"].min()
median_m = movies_1["Rating"].median()
mean_m = movies_1["Rating"].mean()

In [13]:
max_b = books_1["Rating"].max()
min_b = books_1["Rating"].min()
median_b = books_1["Rating"].median()
mean_b = books_1["Rating"].mean()

In [14]:
metric = ["Max","Min","Median","Mean","Max","Min","Median","Mean"]
rating = [max_m,min_m,median_m,mean_m,max_b,min_b,median_b,mean_b]
type = ["Movie","Movie","Movie","Movie","Book","Book","Book","Book"]


In [15]:
Data = pd.DataFrame({"Metric": metric, "Rating": rating, "Type": type})

In [16]:
fig = px.bar(Data, x="Metric",y="Rating",color="Type",barmode="group")
fig.show()

In [17]:
books['Genre'] = books['Genre'].apply(src.map_genre)
movies["Genre"] = movies["Genre"].apply(src.map_movie_genre)

In [18]:
books.dropna(inplace=True)
movies.dropna(inplace=True)
books["Genre"] = books["Genre"].apply(lambda x: x.split("|"))
movies["Genre"] = movies["Genre"].apply(lambda x : x.split("|"))

In [19]:
genre_books = books["Genre"].explode().value_counts().reset_index()
genre_movies = movies["Genre"].explode().value_counts().reset_index()

In [20]:
genre_movies.value_counts()

index        Genre
Action       28785    1
Adult        3        1
War          3353     1
Thriller     7824     1
Sports       2189     1
Sci-Fi       3960     1
Romance      8750     1
Mystery      4419     1
Music        7803     1
Horror       6736     1
History      4265     1
Film-Noir    806      1
Fantasy      3806     1
Family       4838     1
Drama        33514    1
Documentary  8458     1
Crime        11769    1
Comedy       18865    1
Biography    4279     1
Animation    4973     1
Adventure    12178    1
Western      3533     1
dtype: int64

In [21]:
genre_books["Type"]= "Book"
genre_movies["Type"] = "Movie"

In [22]:
genre_total = pd.concat([genre_books,genre_movies])

In [23]:
df = genre_total

fig = px.bar(
    genre_total,
    x = "index",
    y = "Genre",
    color = "Type",
    barmode="group",
    color_discrete_map = {"Book": "#44CF6C", "Movie": "#0E7C7B"})

fig.update_layout(
    plot_bgcolor = "#E1D5D7",
    paper_bgcolor = "#F7EBEC",
    font = dict(family = "Arial", color = "#00072D"))
    
fig.show()

In [24]:
movies_2 = movies[movies["Votes"]>80000]
books_2 = books[books["Votes"]>80000]

In [25]:
movies_2.head(1)

Unnamed: 0,Title,Image,Genre,Description,Rating,Votes,Directors,Stars
1,John Wick 4,https://m.media-amazon.com/images/M/MV5BMDExZG...,"[Crime, Thriller, Action]",John Wick uncovers a path to defeating The Hig...,8.3,106008,Chad Stahelski,Keanu Reeves| Laurence Fishburne| George Geor...


In [26]:
books_2.head(1)

Unnamed: 0,Author,Image,Description,Rating,Votes,Title,Genre
0,William Shakespeare,https://images-na.ssl-images-amazon.com/images...,"In Romeo and Juliet, Shakespeare creates a vio...",7.48,2462752,Romeo and Juliet,[Romance]


In [27]:
books_exploded = pd.DataFrame({"Image":books_2["Image"],"Title": books_2["Title"],"Author":books_2["Author"], "Rating": books_2["Rating"],"Description":books_2["Description"],"Genre":books_2["Genre"]})
movies_exploded = pd.DataFrame({"Image":movies_2["Image"],"Title": movies_2["Title"],"Directors":movies_2["Directors"], "Rating": movies_2["Rating"],"Description": movies_2["Description"],"Genre":movies_2["Genre"]})


In [28]:
books_exploded = books_exploded.explode("Genre").reset_index(drop=True)
movies_exploded = movies_exploded.explode("Genre").reset_index(drop=True)

In [29]:
def best_books(genre):
    books_exploded = pd.read_csv("../Data/Books_exploded.csv",index_col=0)
    
    books_genre = books_exploded[books_exploded["Genre"] == genre ]
    top_10_gen = books_genre.sort_values(by="Rating",ascending=False).head(10)
    top_10_gen.drop("Genre",axis=1,inplace=True)
    return top_10_gen

In [30]:
best_books("Thriller")

Unnamed: 0,Image,Title,Author,Rating,Description
3536,https://images-na.ssl-images-amazon.com/images...,Paint It All Red,S.T. Abby,9.16,"Hush, little baby, don't say a word... Will L..."
3528,https://images-na.ssl-images-amazon.com/images...,All the Lies,S.T. Abby,9.08,"To understand the monsters in the world, you h..."
203,https://images-na.ssl-images-amazon.com/images...,The Green Mile,Stephen King,8.94,"At Cold Mountain Penitentiary, along the lonel..."
3515,https://images-na.ssl-images-amazon.com/images...,Scarlet Angel,S.T. Abby,8.84,"To kill a monster, you have to be twice as mon..."
121,https://images-na.ssl-images-amazon.com/images...,The Godfather,Mario Puzo,8.76,The Godfather—the epic tale of crime and betra...
294,https://images-na.ssl-images-amazon.com/images...,Verity,Colleen Hoover,8.72,Lowen Ashleigh is a struggling writer on the b...
3285,https://images-na.ssl-images-amazon.com/images...,Troubled Blood,Robert Galbraith,8.72,Private Detective Cormoran Strike is visiting ...
3524,https://images-na.ssl-images-amazon.com/images...,Firekeeper's Daughter,Angeline Boulley,8.7,"As a biracial, unenrolled tribal member and th..."
371,https://images-na.ssl-images-amazon.com/images...,Different Seasons,Stephen King,8.7,This Book is in Good Condition. Used Copy With...
3225,https://images-na.ssl-images-amazon.com/images...,A Good Girl's Guide to Murder,Holly Jackson,8.7,"The case is closed. Five years ago, schoolgirl..."


In [31]:
def best_movies(genre):
    movies_exploded = pd.read_csv("../Data/Movies_exploded.csv",index_col=0)
    
    movies_genre = movies_exploded[movies_exploded["Genre"] == genre ]
    
    top_10_gen = movies_genre.sort_values(by="Rating",ascending=False).head(10)
    top_10_gen.drop("Genre",axis=1,inplace=True)
    return top_10_gen

In [32]:
best_movies("Thriller")

Unnamed: 0,Image,Title,Directors,Rating,Description
5316,https://m.media-amazon.com/images/S/sash/4Fyxw...,El silencio de los corderos,Jonathan Demme,8.6,A young F.B.I. cadet must receive the help of ...
5307,https://m.media-amazon.com/images/S/sash/4Fyxw...,Infiltrados,Martin Scorsese,8.5,An undercover cop and a mole in the police att...
6802,https://m.media-amazon.com/images/S/sash/4Fyxw...,La ventana indiscreta,Alfred Hitchcock,8.5,A wheelchair-bound photographer spies on his n...
6720,https://m.media-amazon.com/images/S/sash/4Fyxw...,Psicosis,Alfred Hitchcock,8.5,"A Phoenix secretary embezzles $40,000 from her..."
5833,https://m.media-amazon.com/images/S/sash/4Fyxw...,Parásitos,Bong Joon Ho,8.5,Greed and class discrimination threaten the ne...
260,https://m.media-amazon.com/images/S/sash/4Fyxw...,El caballero oscuro: La leyenda renace,Christopher Nolan,8.4,"Eight years after the Joker's reign of chaos, ..."
6077,https://m.media-amazon.com/images/S/sash/4Fyxw...,La vida de los otros,Florian Henckel von Donnersmarck,8.4,"In 1984 East Berlin, an agent of the secret po..."
5940,https://m.media-amazon.com/images/S/sash/4Fyxw...,Masacre (Ven y mira),Elem Klimov,8.4,"After finding an old rifle, a young boy joins ..."
6722,https://m.media-amazon.com/images/S/sash/4Fyxw...,Memento,Christopher Nolan,8.4,A man with short-term memory loss attempts to ...
5351,https://m.media-amazon.com/images/S/sash/4Fyxw...,Reservoir Dogs,Quentin Tarantino,8.3,When a simple jewelry heist goes horribly wron...


In [33]:
best_books("Thriller")

Unnamed: 0,Image,Title,Author,Rating,Description
3536,https://images-na.ssl-images-amazon.com/images...,Paint It All Red,S.T. Abby,9.16,"Hush, little baby, don't say a word... Will L..."
3528,https://images-na.ssl-images-amazon.com/images...,All the Lies,S.T. Abby,9.08,"To understand the monsters in the world, you h..."
203,https://images-na.ssl-images-amazon.com/images...,The Green Mile,Stephen King,8.94,"At Cold Mountain Penitentiary, along the lonel..."
3515,https://images-na.ssl-images-amazon.com/images...,Scarlet Angel,S.T. Abby,8.84,"To kill a monster, you have to be twice as mon..."
121,https://images-na.ssl-images-amazon.com/images...,The Godfather,Mario Puzo,8.76,The Godfather—the epic tale of crime and betra...
294,https://images-na.ssl-images-amazon.com/images...,Verity,Colleen Hoover,8.72,Lowen Ashleigh is a struggling writer on the b...
3285,https://images-na.ssl-images-amazon.com/images...,Troubled Blood,Robert Galbraith,8.72,Private Detective Cormoran Strike is visiting ...
3524,https://images-na.ssl-images-amazon.com/images...,Firekeeper's Daughter,Angeline Boulley,8.7,"As a biracial, unenrolled tribal member and th..."
371,https://images-na.ssl-images-amazon.com/images...,Different Seasons,Stephen King,8.7,This Book is in Good Condition. Used Copy With...
3225,https://images-na.ssl-images-amazon.com/images...,A Good Girl's Guide to Murder,Holly Jackson,8.7,"The case is closed. Five years ago, schoolgirl..."


In [34]:
books_exploded.to_csv("../Data/Books_exploded.csv")
movies_exploded.to_csv("../Data/Movies_exploded.csv")
genre_total.to_csv("../Data/genre_total.csv")