<a href="https://www.kaggle.com/code/mustafagull/film-oneri?scriptVersionId=178068530" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import scipy.sparse as sp
from sklearn.preprocessing import MinMaxScaler
import re
import time
# Veri kümesini yükle

start_time = time.time()
scaler = MinMaxScaler()

df = pd.read_csv('/kaggle/input/top5000-movies/tmdb_5000_movies.csv')

df['release_date'] = pd.to_datetime(df['release_date'])
df['release_year'] = df['release_date'].dt.year
df['release_month'] = df['release_date'].dt.month
df['release_day'] = df['release_date'].dt.day

# Metinsel sütunları seç
text_columns = ['genres', 'keywords', 'original_language', 'original_title', 'overview', 'production_companies', 'production_countries', 'spoken_languages', 'status', 'tagline', 'title']

# Eksik değerleri doldur
df[text_columns] = df[text_columns].fillna('')

# Metinsel sütunları birleştir
df['combined_text'] = df[text_columns].apply(lambda x: ' '.join(x), axis=1)
df['combined_text'] = df['combined_text'].apply(lambda x: re.sub(r'[^\w\s]', '', x))

display(df['combined_text'][0])
# TF-IDF vektörizasyonu
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_text'])

# Sayısal sütunları seç
numerical_columns = ['budget', 'revenue', 'popularity', 'runtime', 'vote_average', 'vote_count', 'release_year']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])
df[numerical_columns] = df[numerical_columns].fillna(0)

numerical_data = df[numerical_columns].values

# Sayısal ve metinsel verileri birleştir
combined_data = sp.hstack((numerical_data.astype(float), tfidf_matrix))


# Benzerlik matrisini hesapla
cosine_sim = cosine_similarity(combined_data, combined_data)

# Öneri yapmak için fonksiyon
def get_recommendations(title, cosine_sim=cosine_sim):
    # Film başlığına göre index'i al
    idx = df[df['title'] == title].index[0]
    # Tüm filmlerin benzerlik skorunu al
    sim_scores = list(enumerate(cosine_sim[idx]))
    # Benzerlik skorlarına göre sırala
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    # En benzer 10 filmi al
    sim_scores = sim_scores[1:11]
    # Benzer filmlerin index'lerini al
    movie_indices = [i[0] for i in sim_scores]
    # Benzer filmlerin başlıklarını ve benzerlik skorlarını döndür
    return df['title'].iloc[movie_indices], [i[1] for i in sim_scores]



# Örnek kullanım

movie_title = 'Borat: Cultural Learnings of America for Make Benefit Glorious Nation of Kazakhstan'

recommendations, scores = get_recommendations(movie_title)

for movie, score in zip(recommendations, scores):
    print(f'{movie}: {score}')
    
    
    
end_time = time.time()
execution_time = end_time - start_time
print(f"Kodun çalışma süresi: {execution_time} saniye")


'id 28 name Action id 12 name Adventure id 14 name Fantasy id 878 name Science Fiction id 1463 name culture clash id 2964 name future id 3386 name space war id 3388 name space colony id 3679 name society id 3801 name space travel id 9685 name futuristic id 9840 name romance id 9882 name space id 9951 name alien id 10148 name tribe id 10158 name alien planet id 10987 name cgi id 11399 name marine id 13065 name soldier id 14643 name battle id 14720 name love affair id 165431 name anti war id 193554 name power relations id 206690 name mind and soul id 209714 name 3d en Avatar In the 22nd century a paraplegic Marine is dispatched to the moon Pandora on a unique mission but becomes torn between following orders and protecting an alien civilization name Ingenious Film Partners id 289 name Twentieth Century Fox Film Corporation id 306 name Dune Entertainment id 444 name Lightstorm Entertainment id 574 iso_3166_1 US name United States of America iso_3166_1 GB name United Kingdom iso_639_1 en n

Straight Outta Compton: 0.6431939978254294
Room: 0.6361452254385236
Lincoln: 0.6332970158854709
Brooklyn's Finest: 0.6328268229545316
The Big Short: 0.631384958484745
The Revenant: 0.6312468984810327
Gone Girl: 0.6302399781401382
Spotlight: 0.6300355387676989
Chronicle: 0.6296875151571922
Zodiac: 0.6292782875357819
Kodun çalışma süresi: 3.1935513019561768 saniye
