In [7]:
# Mengimpor modul zipfile 
import zipfile

# Mengimpor modul pandas untuk  data
import pandas as pd

# Mengimpor modul plotly.express untuk visualisasi 
import plotly.express as px

# Mengimpor modul matplotlib.pyplot untuk visualisasi data grafik statis
import matplotlib.pyplot as plt


In [8]:
# Fungsi untuk mengekstrak file zip
def ekstrakzip(file_path, extract_to='.'):
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

# Ekstrak file zip
zip_file_path = 'film.zip'  # Path ke file zip
ekstrakzip(zip_file_path, extract_to='./extracted')

# Path ke file yang diekstrak
movies_file_path = './extracted/movies.dat'
ratings_file_path = './extracted/ratings.dat'

# Membaca file movies.dat
movies_columns = ['ID', 'Title', 'Genre']
movies_df = pd.read_csv(movies_file_path, delimiter='::', header=None, names=movies_columns, engine='python')

# Membaca file ratings.dat
ratings_columns = ['User', 'ID', 'Ratings', 'Timestamp']
ratings_df = pd.read_csv(ratings_file_path, delimiter='::', header=None, names=ratings_columns, engine='python')

# Menampilkan data movies.dat dengan judul kolom
print("Data movies.dat:")
print(movies_df.head())

# Menampilkan data ratings.dat dengan judul kolom
print("Data ratings.dat:")
print(ratings_df.head())


Data movies.dat:
   ID                                              Title              Genre
0   8      Edison Kinetoscopic Record of a Sneeze (1894)  Documentary|Short
1  10                La sortie des usines Lumière (1895)  Documentary|Short
2  12                      The Arrival of a Train (1896)  Documentary|Short
3  25  The Oxford and Cambridge University Boat Race ...                NaN
4  91                         Le manoir du diable (1896)       Short|Horror
Data ratings.dat:
   User       ID  Ratings   Timestamp
0     1   114508        8  1381006850
1     2   499549        9  1376753198
2     2  1305591        8  1376742507
3     2  1428538        1  1371307089
4     3    75314        1  1595468524


In [9]:
# Gabungkan kedua Data Frame berdasarkan kolom 'ID'
merged_df = pd.merge(movies_df, ratings_df, on='ID')

# Tampilkan 5 record teratas dari Data Frame yang digabungkan
print("Data digabung berdasarkan ID yang sama:")
print(merged_df.head())

Data digabung berdasarkan ID yang sama:
   ID                                              Title              Genre  \
0   8      Edison Kinetoscopic Record of a Sneeze (1894)  Documentary|Short   
1  10                La sortie des usines Lumière (1895)  Documentary|Short   
2  12                      The Arrival of a Train (1896)  Documentary|Short   
3  25  The Oxford and Cambridge University Boat Race ...                NaN   
4  91                         Le manoir du diable (1896)       Short|Horror   

    User  Ratings   Timestamp  
0  42898        5  1396981211  
1  70577       10  1412878553  
2  69535       10  1439248579  
3  37628        8  1488189899  
4   5814        6  1385233195  


In [10]:
# Menghitung jumlah rating untuk setiap nilai
rating_counts = ratings_df['Ratings'].value_counts()

# Mengubahnya ke format persentase
rating_percentages = rating_counts / len(ratings_df) * 100

# Membuat pie chart 
fig = px.pie(
    values=rating_percentages.values, 
    names=rating_percentages.index,
    title="Persentase Ratings",
    color_discrete_sequence=px.colors.qualitative.Pastel
)

# Menampilkan pie chart
fig.show()

In [11]:
# Menampilkan 15 judul film yang mempunyai rating 10 tanpa judul yang sama
top_rated_movies = merged_df[merged_df['Ratings'] == 10].drop_duplicates(subset=['Title']).head(15)
print(top_rated_movies[['Title']])

                                  Title
1   La sortie des usines Lumière (1895)
2         The Arrival of a Train (1896)
16            A Trip to the Moon (1902)
49            Dough and Dynamite (1914)
53            His New Profession (1914)
54                  Laughing Gas (1914)
55               The New Janitor (1914)
59                  The Rounders (1914)
67                  The Champion (1915)
69           L'héroïsme de Paddy (1915)
70            A Jitney Elopement (1915)
72                     The Tramp (1915)
73                  Les vampires (1915)
77             Behind the Screen (1916)
80                 The Blacklist (1916)
