In [7]:
import streamlit as st
import pandas as pd

# Load the MovieLens dataset
movies_df = pd.read_csv("movies.csv")
ratings_df = pd.read_csv("ratings.csv")

# Merge movies and ratings dataframes to get movie ratings
movie_ratings_df = pd.merge(movies_df, ratings_df, on='movieId')

# Group movie ratings by movieId and calculate average rating and number of ratings
movie_ratings_grouped = movie_ratings_df.groupby('movieId').agg({'rating': ['mean', 'count']})

# Filter out movies with fewer than a certain number of ratings to avoid recommending obscure movies
min_ratings_count = 50
popular_movies = movie_ratings_grouped[movie_ratings_grouped['rating']['count'] >= min_ratings_count]

# Calculate the average rating and number of ratings for each movie
popular_movies['average_rating'] = popular_movies['rating']['mean']
popular_movies['num_ratings'] = popular_movies['rating']['count']

# Merge back with the movies dataframe to get movie information
popular_movies = popular_movies.merge(movies_df, left_on='movieId', right_on='movieId')

# Create a list of movie titles and genres for recommendation
movie_titles = popular_movies['title'].tolist()
movie_genres = popular_movies['genres'].tolist()

# Preprocess movie genres to extract actors and directors
movie_actors = []
movie_directors = []

for genre in movie_genres:
    parts = genre.split('|')
    movie_actors.append(parts[1].strip() if len(parts) > 1 else "")
    movie_directors.append(parts[2].strip() if len(parts) > 2 else "")

# Combine all movie information into a single list for similarity comparison
movie_info = list(zip(movie_titles, movie_genres, movie_actors, movie_directors))

# Function to calculate similarity between user query and movie information
def calculate_similarity(query, movie_info):
    # Implement your similarity metric here
    # You can use techniques like cosine similarity, Jaccard index, or any other suitable method
    # For simplicity, let's just use a basic string similarity comparison (case-insensitive)

    # Extract the relevant text attributes from the movie_info tuple
    movie_titles, movie_genres, movie_actors, movie_directors = zip(*movie_info)

    # Concatenate all the relevant text attributes into a single string for each movie
    movie_text = [title.lower() + " " + genre.lower() + " " + actor.lower() + " " + director.lower()
                  for title, genre, actor, director in movie_info]

    query = query.lower()
    similarity_scores = [sum(word in movie_text_item for word in query.split()) for movie_text_item in movie_text]
    return similarity_scores

# Function to get movie recommendations based on user query
def get_movie_recommendations(user_query, movie_info, num_recommendations=5):
    similarity_scores = calculate_similarity(user_query, movie_info)

    # Get indices of movies with highest similarity scores
    top_indices = sorted(range(len(similarity_scores)), key=lambda i: similarity_scores[i], reverse=True)

    # Get top recommended movie titles
    recommended_movies = [movie_titles[idx] for idx in top_indices[:num_recommendations]]
    return recommended_movies

# Streamlit app
def main():
    st.title("Movie Recommendations Chatbot")
    st.write("Enter your movie preferences, and the chatbot will suggest some movie recommendations!")

    user_query = st.text_input("Enter your movie preferences:")

    if user_query:
        recommended_movies = get_movie_recommendations(user_query, movie_info)
        st.write("Recommended Movies:")
        for idx, movie in enumerate(recommended_movies, 1):
            st.write(f"{idx}. {movie}")

if __name__ == "__main__":
    main()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  popular_movies['average_rating'] = popular_movies['rating']['mean']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  popular_movies['num_ratings'] = popular_movies['rating']['count']
  popular_movies = popular_movies.merge(movies_df, left_on='movieId', right_on='movieId')
