## Fetching API

## Percobaan Fetching 1 pages


In [6]:
import requests
import csv

def fetch_movie_data_csv():
    page = 1
    all_movie = []

    url = f"https://api.themoviedb.org/3/movie/popular?api_key=c5ddc71e3a32f3641f3d21d493f8906b&language=ID&page={page}"
    response = requests.get(url)
    
    # Periksa apakah respons sukses (kode 200)
    if response.status_code != 200:
        print(f"Error: Gagal mengambil data di halaman {page}, status code: {response.status_code}")
        return
    
    try:
        result = response.json()
        
        # Pastikan kunci 'results' ada
        if "results" not in result:
            print(f"Error: Respons di halaman {page} tidak memiliki kunci 'results'")
            return

        for movie in result['results']:
            all_movie.append([
                movie["id"],
                movie["original_title"],
                ", ".join(str(genre_id) for genre_id in movie.get("genre_ids", [])),
                movie.get("vote_average", "N/A"),
            ])
    
    except requests.exceptions.JSONDecodeError:
        print(f"Error: Gagal mengurai JSON di halaman {page}")
        return

    # Simpan ke file CSV jika ada data
    if all_movie:
        with open("movie_data.csv", "w", encoding="utf-8", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(["id", "original_title", "genres_id", "vote_average"])
            writer.writerows(all_movie)
        print("Data berhasil disimpan ke movie_data.csv!")
    else:
        print("Tidak ada data yang disimpan.")

fetch_movie_data_csv()


Data berhasil disimpan ke movie_data.csv!


## Fetching All Pages

In [12]:
import requests
import csv
import time

def fetch_movie_data_csv():
    page = 1
    all_movie = []
    has_next_page = True

    while has_next_page:
        url = f"https://api.themoviedb.org/3/movie/popular?api_key=c5ddc71e3a32f3641f3d21d493f8906b&language=ID&page={page}"
        response = requests.get(url)
        
        if response.status_code != 200:
            print(f"Error: Gagal mengambil data di halaman {page}, status code: {response.status_code}")
            break
        
        try:
            result = response.json()
            
            if "results" not in result:
                print(f"Error: Respons di halaman {page} tidak memiliki kunci 'results'")
                break

            for movie in result['results']:
                all_movie.append([
                    movie["id"],
                    movie["original_title"],
                    ", ".join(str(genre_id) for genre_id in movie.get("genre_ids", [])),
                    movie.get("vote_average", "N/A"),
                ])

            total_pages = result.get("total_pages", 1)
            if page >= total_pages:
                has_next_page = False
            else:
                page += 1
        
        except requests.exceptions.JSONDecodeError:
            print(f"Error: Gagal mengurai JSON di halaman {page}")
            break
        
        time.sleep(1)

    if all_movie:
        with open("movie_data.csv", "w", encoding="utf-8", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(["id", "original_title", "genres_id", "vote_average"])
            writer.writerows(all_movie)
        print("Data berhasil disimpan ke movie_data.csv!")
    else:
        print("Tidak ada data yang disimpan.")

fetch_movie_data_csv()


Error: Gagal mengambil data di halaman 501, status code: 400
Data berhasil disimpan ke movie_data.csv!


## Import Dataset

In [10]:
import pandas as pd
df = pd.read_csv("movie_data.csv")

In [11]:
df

Unnamed: 0,id,original_title,genres_id,vote_average
0,1125899,Cleaner,"28, 53",6.750
1,1229730,Carjackers,"28, 12",7.026
2,822119,Captain America: Brave New World,"28, 53, 878",6.119
3,1261050,De lydløse,"28, 18",6.200
4,1197306,A Working Man,"28, 80, 53",6.900
...,...,...,...,...
9993,11712,椿三十郎,"18, 28, 35",7.900
9994,763164,Apex,"28, 53, 878",5.200
9995,14177,Beauty Shop,"35, 10749",6.300
9996,9396,Crocodile Dundee II,"12, 35",5.835


In [13]:
data = df.copy()

## Preprocessing Data

In [14]:
print(f"Shape of The Anime Dataset : {df.shape}")
print(f"\nGlimpse of The Dataset :")
df.head().style.set_properties()

Shape of The Anime Dataset : (9998, 4)

Glimpse of The Dataset :


Unnamed: 0,id,original_title,genres_id,vote_average
0,1125899,Cleaner,"28, 53",6.75
1,1229730,Carjackers,"28, 12",7.026
2,822119,Captain America: Brave New World,"28, 53, 878",6.119
3,1261050,De lydløse,"28, 18",6.2
4,1197306,A Working Man,"28, 80, 53",6.9


In [15]:
print(f"Informations About Anime Dataset :\n")
print(df.info())

Informations About Anime Dataset :

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9998 entries, 0 to 9997
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              9998 non-null   int64  
 1   original_title  9998 non-null   object 
 2   genres_id       9915 non-null   object 
 3   vote_average    9998 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 312.6+ KB
None


In [16]:
print(f"Summary of The Anime Dataset :")
df.describe().T.style.set_properties()

Summary of The Anime Dataset :


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
id,9998.0,394361.177536,427810.22868,5.0,14306.0,243939.0,687259.0,1454308.0
vote_average,9998.0,6.222385,1.564017,0.0,5.8,6.5,7.11375,10.0


In [17]:
df.describe(include=object).T.style.set_properties()

Unnamed: 0,count,unique,top,freq
original_title,9998,8139,Tabu,6
genres_id,9915,1912,18,672


In [18]:
print("Null Values of Anime Dataset :")
df.isna().sum().to_frame().T.style.set_properties()

Null Values of Anime Dataset :


Unnamed: 0,id,original_title,genres_id,vote_average
0,0,0,83,0


In [19]:
print("After Dropping, Null Values of Anime Dataset :")
df.dropna(axis = 0, inplace = True)
df.isna().sum().to_frame().T.style.set_properties()

After Dropping, Null Values of Anime Dataset :


Unnamed: 0,id,original_title,genres_id,vote_average
0,0,0,0,0


In [20]:
import numpy as np


data["score"].replace(to_replace=-1, value=np.nan, inplace=True)
data.dropna(subset=["score"], inplace=True)

KeyError: 'score'

## Modelling


In [None]:
# %% Imports and Initial Setup
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import sigmoid_kernel
import re
from rapidfuzz import process, fuzz
import io

In [None]:
def text_cleaning(text):
    text = re.sub(r'&quot;', '', text)
    text = re.sub(r'.hack//', '', text)
    text = re.sub(r'&#039;', '', text)
    text = re.sub(r'A&#039;s', '', text)
    text = re.sub(r'I&#039;', 'I\'', text)
    text = re.sub(r'&amp;', 'and', text)
    return text

data["title"] = data["title"].apply(text_cleaning)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# TF-IDF Vectorizer untuk genre
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['genres'].fillna(''))

# Hitung Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Fungsi rekomendasi berdasarkan genre
def recommend_anime(title, top_n=10):
    if title not in df['title'].values:
        return f"Title '{title}' not found in the dataset."
    
    index = df[df['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    
    anime_indices = [i[0] for i in sim_scores]
    return df.iloc[anime_indices].sort_values(by='score', ascending=False)

# Contoh rekomendasi
print(recommend_anime("Trigun"))

    mal_id                                      title  \
1        5            Cowboy Bebop: Tengoku no Tobira   
60      81     Kidou Senshi Gundam: Dai 08 MS Shoutai   
76      97                                 Last Exile   
71      92                   Kidou Shinseiki Gundam X   
49      68                                  Black Cat   
63      84  Kidou Senshi Gundam 0083: Stardust Memory   
35      54                          Appleseed (Movie)   
90     111                              Corrector Yui   
88     109                           Bakuretsu Tenshi   
37      56                                    Avenger   

                                       genres  score  members   type  episodes  
1                              Action, Sci-Fi   8.38   393783  Movie       1.0  
60  Action, Adventure, Drama, Romance, Sci-Fi   8.00    88768    OVA      12.0  
76                  Action, Adventure, Sci-Fi   7.78   180477     TV      26.0  
71           Action, Adventure, Drama, Sci-Fi   

In [None]:
from flask import Flask, request, jsonify
app = Flask(__name__)

@app.route('/recommend', methods=['GET'])
def recommend():
    title = request.args.get('title')
    if not title:
        return jsonify({"error": "Masukkan parameter title"}), 400

    recommendations = recommend_anime(title)
    if not recommendations:
        return jsonify({"error": "Anime tidak ditemukan"}), 404

    return jsonify(recommendations)

if __name__ == '__main__':
    app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
