In [None]:
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
import streamlit as st
import pandas as pd

In [None]:
# Загрузка модели и токенизатора BERT
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

# Функция для получения вектора BERT
def get_bert_vector(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).detach().numpy()

In [None]:
# Загрузка данных
@st.cache
def load_data():
    return pd.read_csv("movies.csv")

data = load_data()

# Векторизация описаний фильмов
movie_vectors = [get_bert_vector(desc) for desc in data['description']]

In [None]:
# Streamlit интерфейс
st.title("Movie Recommender")

user_input = st.text_area("Enter a movie description:")
num_movies = st.slider("Number of recommendations:", 1, 10, 5)

if st.button("Recommend"):
    user_vector = get_bert_vector(user_input)
    similarities = cosine_similarity(user_vector, movie_vectors).flatten()
    recommended_indices = similarities.argsort()[-num_movies:][::-1]
    
    st.subheader("Recommended Movies:")
    for index in recommended_indices:
        st.write(data.iloc[index]['movie_title'])