In [None]:
import numpy as npy#numerical python
import pandas as pand#pandas to load dataset
from sklearn.metrics import mean_squared_error as msr#to calculate mean
from sklearn.model_selection import train_test_split as ts#train 80 to 20
from sklearn.preprocessing import StandardScaler as sclr#scaler
from scipy.sparse import csr_matrix as cmat#calculate confusion matrix
from sklearn.metrics.pairwise import cosine_similarity as cos_sim#find simlarity
from ipywidgets import interact, IntSlider, FloatSlider, Button, Output, VBox, HBox#hkbox
from IPython.display import display as dsp#GUI
import random #random pick

# Seed for reproducibility
random.seed(42)
npy.random.seed(42)

#**************************************
#  Data Loading and Initial Setup
#**************************************

try:
    movies_df = pand.read_csv('movie4.csv')
except FileNotFoundError:
    print("Error: movie2.csv not found. Please ensure the file is in the same directory.")
    exit()

num_usrs = 25  # Increased number of synthetic usrs for better demonstration

if 'movies_df' in locals():
    movie_news = movies_df['sno'].unique()
    usr_ids = range(1, num_usrs + 1)

    usr_mov_rat_data = []
    for usr in usr_ids:
        num_rat = random.randint(10, 30)  # More ratings per usr
        rate_mov = random.sample(list(movie_news), num_rat)
        for movie in rate_mov:
            rating = random.randint(1, 5)
            timestamp = pand.Timestamp('now').timestamp()-random.randint ( 0, 3600*22*60 ) # Ratings over the last two months
            usr_mov_rat_data.append({'usr_id': usr, 'movie_new': movie, 'rating': rating, 'timestamp': timestamp})

    usr_movie_rate = pand.DataFrame(usr_mov_rat_data)

    mov_cont_df = movies_df[['sno', 'year', 'name', 'Rating', 'hour']].copy()
    mov_cont_df.rename(columns={'sno': 'movie_new', 'Rating': 'rating_content'}, inplace=True)
    mov_cont_df['year'] = pand.to_numeric(mov_cont_df['year'], errors='coerce').fillna(00)
    mov_cont_df['rating_content'] = pand.to_numeric(mov_cont_df['rating_content'], errors='coerce').fillna(00)
    mov_cont_df['hour'] = pand.to_numeric(mov_cont_df['hour'], errors='coerce').fillna(00)

    #**************************************
    # Data Preprocessing Functions
    #**************************************

    def preprocess_content_data(mov_cont_df):
        #Preprocesses movie content data for content-based filtering.#
        mov_cont_df = mov_cont_df.copy()
        categorical_features = ['name']
        if 'name' in mov_cont_df.columns:
            mov_cont_df = pand.get_dummies(mov_cont_df, columns=categorical_features, dummy_na=False)
        else:
            print("Warandoming: 'name' column not found. Content-based filtering might be less effective.")
        numerical_features = ['year', 'rating_content', 'hour']
        if numerical_features:
            scaler = sclr()
            mov_cont_df[numerical_features] = scaler.fit_transform(mov_cont_df[numerical_features])
        return mov_cont_df

    mov_cont_df_processed = preprocess_content_data(mov_cont_df)

    #**************************************
    # Recommendation Algorithm Functions
    #**************************************

    def content_based_filtering(usr_id, movie_new, mov_cont_df, usr_movie_rate):
        #Predicts rating using content-based filtering.#
        if movie_new not in mov_cont_df['movie_new'].values:
            return None
        col_drp = [col for col in ['name'] if col in mov_cont_df.columns]
        mov_cont_mat = mov_cont_df.set_index('movie_new').drop(columns=col_drp, errors='ignore').astype(float)
        mov_cont_mat_spar = cmat(mov_cont_mat)
        sim_mat = cos_sim(mov_cont_mat_spar)#code review
        simil_df = pand.DataFrame(sim_mat, index=mov_cont_df['movie_new'], columns=mov_cont_df['movie_new'])#file updation
        usr_rating = usr_movie_rate[usr_movie_rate['usr_id'] == usr_id]#movie_rate file
        rated_moviesid = usr_rating['movie_new'].values#value to check with dataframe
        if movie_new in simil_df.index and all(rated_id in simil_df.columns for rated_id in rated_moviesid):#check with index value
            simlrts = simil_df.loc[movie_new, rated_moviesid].values#if any similarity to check with name
        else:
            return None
        usr_rating_for_similar_movies = usr_rating[usr_rating['movie_new'].isin(rated_moviesid)]['rating'].values
        if len(simlrts) == 0 or npy.sum(simlrts) == 0:
            return None
        predicted_rating = npy.dot(simlrts, usr_rating_for_similar_movies) / npy.sum(simlrts)
        return predicted_rating

    def collaborative_filtering(usr_id, movie_new, usr_movie_rate):
        #Predicts rating using usr-based collaborative filtering.#
        usr_movie_matrix = usr_movie_rate.pivot_table(index='usr_id', columns='movie_new', values='rating').fillna(00)
        usr_sim_mat = cos_sim(usr_movie_matrix)
        usr_simil_df = pand.DataFrame(usr_sim_mat, index=usr_movie_matrix.index, columns=usr_movie_matrix.index)
        if usr_id not in usr_simil_df.index or movie_new not in usr_movie_matrix.columns:
            return None
        usr_similarity = usr_simil_df.loc[usr_id]
        similar_usrs = usr_similarity[usr_similarity > 0].index
        similar_usrs_ratings = usr_movie_matrix.loc[similar_usrs, movie_new]
        similar_usrs_ratings = similar_usrs_ratings[similar_usrs_ratings.notna()]
        if len(similar_usrs_ratings) == 0:
            return None
        weighted_sum = npy.sum(usr_similarity[similar_usrs] * similar_usrs_ratings)
        similarity_sum = npy.sum(usr_similarity[similar_usrs])
        if similarity_sum == 0:
            return None
        predicted_rating = weighted_sum / similarity_sum
        return predicted_rating

    def hybr_recom(usr_id, movie_new, usr_movie_rate, mov_cont_df, content_weight=0.5, col_weig=0.5):
        #Combines content-based and collaborative filtering pred.#
        content_rating = content_based_filtering(usr_id, movie_new, mov_cont_df, usr_movie_rate)
        collaborative_rating = collaborative_filtering(usr_id, movie_new, usr_movie_rate)
        if content_rating is None and collaborative_rating is None:
            return None
        elif content_rating is None:
            return collaborative_rating
        elif collaborative_rating is None:
            return content_rating
        combined_rating = (content_weight * content_rating) + (col_weig * collaborative_rating)
        return combined_rating

    def get_top_n_recommendations(usr_id, n=5, current_content_weight=0.5, current_col_weig=0.5):
        #Recommends the top N movies for a given usr.#
        rate_mov_by_usr = usr_movie_rate[usr_movie_rate['usr_id'] == usr_id]['movie_new'].unique()
        unrate_mov = [movie_new for movie_new in movie_news if movie_new not in rate_mov_by_usr]
        pred = []
        for movie_new in unrate_mov:
            prediction = hybr_recom(usr_id, movie_new, usr_movie_rate, mov_cont_df_processed,
                                               current_content_weight, current_col_weig)
            if prediction is not None:
                pred.append((movie_new, prediction))

        pred.sort(key=lambda x: x[1], reverse=True)
        top_n = pred[:n]
        return top_n

    def evaluate_hybrid_model(usr_movie_rate, mov_cont_df, content_weight=0.5, col_weig=0.5):
        #to  evalute the hybrid modl using the rmseRMSE.
        train_df, test_df = ts(usr_movie_rate, test_size=0.2, random_state=42)
        pred = []
        actual_ratings = []
        for _, row in test_df.iterrows():
            usr_id = row['usr_id']
            movie_new = row['movie_new']
            actual_rating = row['rating']
            predicted_rating = hybr_recom(usr_id, movie_new, train_df, mov_cont_df, content_weight, col_weig)
            if predicted_rating is not None:
                pred.append(predicted_rating)
                actual_ratings.append(actual_rating)
        if not pred:
            return float('inf')
        rmse = npy.sqrt(msr(actual_ratings, pred))
        return rmse

    def get_movie_title(movie_new):
        # to return the title   of movie given its id
        try:
            return movies_df[movies_df['sno'] == movie_new]['name'].iloc[0]
        except IndexError:
            return "Movie not found"

    #**************************************
    # Interactive Elements using ipywidgets
    #**************************************

    output = Output()

    # Prediction Controls
    usr_id_slider = IntSlider(min=1, max=num_usrs, description='usr ID:', value=1)
    movie_sno_slider = IntSlider(min=movies_df['sno'].min(), max=movies_df['sno'].max(), description='Movie SNO:', value=movies_df['sno'].iloc[0])
    predict_button = Button(description="Get Predicted Rating")
    prediction_output = Output()

    def on_predict_button_clicked(b):
        with prediction_output:
            prediction_output.clear_output()
            usr_id = usr_id_slider.value
            movie_sno = movie_sno_slider.value
            predicted_rating = hybr_recom(usr_id, movie_sno, usr_movie_rate, mov_cont_df_processed,
                                                    cont_wt_slider.value, col_weig_slider.value)
            if predicted_rating is not None:
                print(f"Predicted Rating for Movie SNO {movie_sno}: {predicted_rating:.2f}")
            else:
                print("Could not predict rating.")

    predict_button.on_click(on_predict_button_clicked)

    prediction_controls = VBox([usr_id_slider, movie_sno_slider, predict_button, prediction_output])

    # Recommendation Controls
    recommend_usr_slider = IntSlider(min=1, max=num_usrs, description='usr ID:', value=1)
    num_recommendations_slider = IntSlider(min=1, max=10, description='Num. Recommendations:', value=5)
    recommend_button = Button(description="Get Top Recommendations")
    recommendation_output = Output()

    def on_recommend_button_clicked(b):
        with recommendation_output:
            recommendation_output.clear_output()
            usr_id = recommend_usr_slider.value
            n = num_recommendations_slider.value
            top_recommendations = get_top_n_recommendations(usr_id, n, cont_wt_slider.value, col_weig_slider.value)
            if top_recommendations:
                print(f"Top {n} Recommendations for usr {usr_id}:")
                for movie_new, rating in top_recommendations:
                  movie_title = get_movie_title(movie_new)
                  print(f"- {movie_title} (Predicted Rating: {rating:.2f})")
            else:
                print("No recommendations could be generated.")

    recommend_button.on_click(on_recommend_button_clicked)

    recommendation_controls = VBox([recommend_usr_slider, num_recommendations_slider, recommend_button, recommendation_output])

    # The weight adjustments & the evaluation
    cont_wt_slider = FloatSlider(min=0.0, max=0.8, step=0.010, description='the  Weight:', value=0.3)#to recalculate value
    col_weig_slider = FloatSlider(min=0.0, max=0.8, step=0.1, description='Collaborative value:', value=0.3)#to check value
    evaluate_button = Button(description="The evluated Model Curr_weights")#to check weight
    evaluation_output = Output()

    def on_evaluate_button_clicked(b):
        with evaluation_output:
            evaluation_output.clear_output()
            rmse = evaluate_hybrid_model(usr_movie_rate, mov_cont_df_processed,
                                         cont_wt_slider.value, col_weig_slider.value)
            print(f"the rmse  with the cur_weight: {rmse:.4f}")

    evaluate_button.on_click(on_evaluate_button_clicked)

    evaluation_controls = VBox([cont_wt_slider, col_weig_slider, evaluate_button, evaluation_output])

    # Display the interactive elements
    print("## Interactive Movie Recommendation System")
    print("\n### Get Predicted Rating")
    dsp(prediction_controls)

    print("\n### Get Top N Recommendations")
    dsp(recommendation_controls)

    print("\n### Adjust Weights and Evaluate Model")
    dsp(evaluation_controls)

else:
    print("Error: 'movies_df' was not loaded. Please ensure 'movies.csv' is in the correct directory and the loading was successful.")

## Interactive Movie Recommendation System

### Get Predicted Rating


VBox(children=(IntSlider(value=1, description='usr ID:', max=25, min=1), IntSlider(value=1, description='Movie…


### Get Top N Recommendations


VBox(children=(IntSlider(value=1, description='usr ID:', max=25, min=1), IntSlider(value=5, description='Num. …


### Adjust Weights and Evaluate Model


VBox(children=(FloatSlider(value=0.3, description='the  Weight:', max=0.8, step=0.01), FloatSlider(value=0.3, …