<a href="https://colab.research.google.com/github/PujanPandey07/Movie-Recommender-system-using-NLP-and-ML/blob/main/finalsystem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## code to create a simple ui based recommender system using streamlit

## trained logistic regression model,trained tfidf vectorizer and dataset with vector embeddings are saved in my drive

In [None]:
from google.colab import drive

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.47.0-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.0-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m61.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m80.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [None]:
!pip install thefuzz

Collecting thefuzz
  Downloading thefuzz-0.22.1-py3-none-any.whl.metadata (3.9 kB)
Collecting rapidfuzz<4.0.0,>=3.0.0 (from thefuzz)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading thefuzz-0.22.1-py3-none-any.whl (8.2 kB)
Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, thefuzz
Successfully installed rapidfuzz-3.13.0 thefuzz-0.22.1


In [None]:
# Install required packages
!pip install -q streamlit thefuzz scikit-learn pandas numpy joblib
!npm install -g localtunnel

# Create the Streamlit app file
with open('app.py', 'w') as f:
    f.write('''
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from thefuzz import process
from sklearn.feature_extraction.text import TfidfVectorizer
from joblib import load
import warnings

# Configuration
warnings.filterwarnings('ignore')
st.set_page_config(page_title="Movie Recommender", layout="wide")



# Load Data and Models
@st.cache_data
def load_data():
    try:
        dataset = load('/content/drive/My Drive/colab_data/finaldataset.pkl')
        # Optimize memory usage
        for col in ['plot_vector', 'genre_vector', 'metadata_vector', 'review_vector']:
            if col in dataset:
                dataset[col] = dataset[col].apply(lambda x: x.astype(np.float32))
        return dataset
    except Exception as e:
        st.error(f"Error loading dataset: {str(e)}")
        return None

@st.cache_resource
def load_models():
    try:
        model = load('/content/drive/My Drive/colab_data/logistic_model.pkl')
        vectorizer = load('/content/drive/My Drive/colab_data/tfidf_vectorizer.pkl')
        return model, vectorizer
    except Exception as e:
        st.error(f"Error loading models: {str(e)}")
        return None, None

# Initialize components
dataset = load_data()
model, vectorizer = load_models()

# Recommendation Logic
weights = {
    'plot_vector': 0.3,
    'genre_vector': 0.2,
    'metadata_vector': 0.2,
    'review_vector': 0.3
}

def compute_weighted_similarity(query_vector, candidate_vectors, weights):
    similarities = []
    for key in query_vector.keys():
        sim = cosine_similarity(
            query_vector[key].reshape(1, -1),
            np.stack(candidate_vectors[key].values)
        ).flatten()
        similarities.append(weights[key] * sim)
    return np.sum(similarities, axis=0)

def get_recommendations(query_title, dataset, top_n=20):
    movie_titles = dataset['title_x'].astype(str).tolist()

    # Fixed extraction
    result = process.extractOne(query_title, movie_titles, scorer=process.fuzz.ratio)
    if not result:
        return None, "No matching movies found"

    closest_title, score = result

    if score < 80:
        return None, f"Movie not found. Did you mean: '{closest_title}'?"

    query_row = dataset[dataset['title_x'].str.lower() == closest_title.lower()].iloc[0]

    query_vector = {
        'plot_vector': query_row['plot_vector'],
        'genre_vector': query_row['genre_vector'],
        'metadata_vector': query_row['metadata_vector'],
        'review_vector': query_row['review_vector']
    }
    candidate_vectors = dataset[['plot_vector', 'genre_vector', 'metadata_vector', 'review_vector']]

    similarity_scores = compute_weighted_similarity(query_vector, candidate_vectors, weights)
    dataset['similarity_score'] = similarity_scores

    top_movies = dataset[dataset['title_x'] != closest_title].nlargest(top_n, 'similarity_score').copy()

    top_movies['review_text'] = top_movies['cleaned_reviews'].apply(
        lambda x: " ".join(x) if isinstance(x, list) else x if isinstance(x, str) else "")

    review_vectors = vectorizer.transform(top_movies['review_text'])
    top_movies['sentiment_score'] = model.predict_proba(review_vectors)[:, 1]
    top_movies['final_score'] = top_movies['sentiment_score']*0.2 + top_movies['similarity_score']*0.8

    return closest_title, top_movies.sort_values('final_score', ascending=False)
# Streamlit UI
st.title("🎬 Hybrid-Movie Recommender")

# Sidebar settings
with st.sidebar:
    st.header("Settings")
    top_n = st.slider("Number of recommendations", 5, 20, 10)
    show_details = st.checkbox("Show detailed scores", True)

# Main input
query_title = st.text_input("Enter a movie you like:", placeholder="E.g. Inception")

if query_title:
    if dataset is None or model is None or vectorizer is None:
        st.error("Failed to load required data or models. Please check your files.")
    else:
        closest_title, recommendations = get_recommendations(query_title, dataset, top_n)

        if recommendations is None:
            st.warning(closest_title)
        else:
            st.success(f"Found: **{closest_title}**")

            # Display recommendations
            st.subheader(f"Top {top_n} Recommendations")
            cols = ['title_x', 'similarity_score', 'sentiment_score', 'final_score'] if show_details else ['title_x']

            st.dataframe(
                recommendations[cols].rename(columns={'title_x': 'Title'}),
                column_config={
                    "similarity_score": st.column_config.NumberColumn(format="%.3f"),
                    "sentiment_score": st.column_config.NumberColumn(format="%.3f"),
                    "final_score": st.column_config.NumberColumn(format="%.3f")
                },
                hide_index=True,
                use_container_width=True
            )

            # Visualize top recommendations
            st.bar_chart(
                recommendations.head(5).set_index('Title')[['SSscore']],
                color="#FF4B4B"
            )
''')

# Run the app
!streamlit run app.py &>/content/logs.txt &
!npx localtunnel --port 8501

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K
changed 22 packages in 1s
[1G[0K⠇[1G[0K
[1G[0K⠇[1G[0K3 packages are looking for funding
[1G[0K⠇[1G[0K  run `npm fund` for details
[1G[0K⠇[1G[0K[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0Kyour url is: https://tiny-animals-sing.loca.lt
/tools/node/lib/node_modules/localtunnel/bin/lt.js:81
    throw err;
    ^

Error: connection refused: localtunnel.me:10407 (check your firewall settings)
    at Socket.<anonymous> (/tools/node/lib/node_modules/[4mlocaltunnel[24m/lib/TunnelCluster.js:52:11)
[90m    at Socket.emit (node:events:524:28)[39m
[90m    at emitErrorNT (node:internal/streams/destroy:169:8)[39m
[90m    at emitErrorCloseNT (node:internal/streams/destroy:128:3)[39m
[90m    at process.processTicksAndRejections (node:internal/process/task_queues:82:21)[39m

Node.js v20.19.0
[1G[0K⠙[1G[0K

In [None]:
!curl https://loca.lt/mytunnelpassword

34.32.137.201