In [None]:
!pip install streamlit pyngrok pandas scikit-learn nltk -q


In [None]:
# ✅ 1. Install dependencies
!pip install streamlit pyngrok pandas scikit-learn nltk -q

# ✅ 2. Download NLTK resources
import nltk
nltk.download('punkt')
nltk.download('stopwords')

# ✅ 3. Kill any running streamlit or ngrok processes
!pkill -f streamlit || true
!pkill ngrok || true

# ✅ 4. Setup ngrok token (make sure this is your token)
from pyngrok import conf, ngrok
conf.get_default().auth_token = "2vtgEOXNSRCbgXSwkdP3VKBCsER_5rbnuwvpZh3Pai2iKfL4b"

# ✅ 5. Write the Streamlit app to app.py
with open("app.py", "w") as f:
    f.write('''
import streamlit as st
import pandas as pd
import nltk
import re
import os
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

stemmer = PorterStemmer()

def preprocess_text(text):
    if pd.isna(text):
        return ""
    text = str(text).lower()
    text = re.sub(r'[^a-zA-Z0-9\\s]', '', text)
    try:
        tokens = word_tokenize(text)
    except LookupError:
        nltk.download('punkt')
        tokens = word_tokenize(text)
    stemmed_tokens = [stemmer.stem(word) for word in tokens if word not in stopwords.words('english')]
    return " ".join(stemmed_tokens)

@st.cache_data()
def load_data():
    try:
        files = [f for f in os.listdir() if f.endswith(('.csv', '.tsv', '.txt'))]
        for file in files:
            if file == "moviesss.txt":
                try:
                    df = pd.read_csv(file, sep='\\t')
                    if len(df.columns) == 1:
                        df = pd.read_csv(file, sep=',')
                except Exception as e:
                    st.error(f"Error reading the file: {e}")
                    return None

                title_col = next((col for col in df.columns if 'title' in col.lower()), None)
                genres_col = next((col for col in df.columns if 'genres' in col.lower()), None)

                if title_col and genres_col:
                    df = df.rename(columns={title_col: 'title', genres_col: 'genress'})
                    df.dropna(subset=['title', 'genress'], inplace=True)
                    df['processed_genress'] = df['genress'].apply(preprocess_text)
                    return df
                else:
                    st.error("No 'title' and 'genres' columns found.")
                    return None
    except Exception as e:
        st.error(f"Failed to load file: {e}")
    return None

def recommend_movies(title, df, tfidf_matrix, top_n=5):
    matches = df[df['title'].str.contains(title, case=False)]
    if matches.empty:
        return None
    idx = matches.index[0]
    sim_scores = cosine_similarity(tfidf_matrix[idx:idx+1], tfidf_matrix)[0]
    top_indices = sim_scores.argsort()[::-1][1:top_n+1]
    return df.iloc[top_indices][['title', 'genress']]

st.set_page_config(layout="wide")
st.title("🎬 Movie Recommender")

df = load_data()
if df is None:
    st.error("No valid movie file found.")
else:
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['processed_genress'])

    user_title = st.text_input("Enter a movie title:", "The Matrix")
    if st.button("Recommend"):
        results = recommend_movies(user_title, df, tfidf_matrix)
        if results is not None:
            st.write("### Recommendations:")
            for _, row in results.iterrows():
                st.markdown(f"**{row['title']}**")
                st.write(f"*genres:* {row['genress'].replace('|', ', ')}")
        else:
            st.warning("No matches found.")

    if st.checkbox("Show Dataset"):
        st.dataframe(df[['title', 'genress']])
''')

# ✅ 6. Start Streamlit with ngrok and print the public URL
import subprocess, threading, time

def run_app():
    subprocess.run(["streamlit", "run", "app.py", "--server.headless=true", "--server.port=8501"])

thread = threading.Thread(target=run_app)
thread.start()

time.sleep(6)  # Give streamlit some time to start
public_url = ngrok.connect(8501)
print("👇 Your Streamlit app is live at:")
print(public_url)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


^C
👇 Your Streamlit app is live at:
NgrokTunnel: "https://a1db30cc3955.ngrok-free.app" -> "http://localhost:8501"


In [None]:
from google.colab import files
uploaded = files.upload()


Saving moviesss.txt to moviesss (1).txt


In [None]:
import streamlit as st
import pandas as pd
import nltk
import re
import os
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

stemmer = PorterStemmer()

# Make sure nltk resources are downloaded
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

def preprocess_text(text):
    if pd.isna(text):
        return ""
    text = str(text).lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = word_tokenize(text)
    stemmed_tokens = [stemmer.stem(word) for word in tokens if word not in stopwords.words('english')]
    return " ".join(stemmed_tokens)

@st.cache_data
def load_data():
    try:
        files = [f for f in os.listdir() if f.endswith(('.csv', '.tsv', '.txt'))]
        for file in files:
            if file.lower() == "moviesss.txt":
                try:
                    df = pd.read_csv(file, sep='\t', engine='python')
                    if len(df.columns) == 1:
                        df = pd.read_csv(file, sep=',', engine='python')
                except Exception as e:
                    st.error(f"Error reading the file: {e}")
                    return None

                title_col = next((col for col in df.columns if 'title' in col.lower()), None)
                genres_col = next((col for col in df.columns if 'genres' in col.lower()), None)

                if title_col and genres_col:
                    df = df.rename(columns={title_col: 'title', genres_col: 'genress'})
                    df.dropna(subset=['title', 'genress'], inplace=True)
                    df['processed_genress'] = df['genress'].apply(preprocess_text)
                    return df
                else:
                    st.error("The file must have 'title' and 'genres' columns.")
                    return None
        st.warning("No file named 'moviesss.txt' found.")
    except Exception as e:
        st.error(f"Failed to load file: {e}")
    return None

def recommend_movies(title, df, tfidf_matrix, top_n=5):
    matches = df[df['title'].str.contains(title, case=False)]
    if matches.empty:
        return None
    idx = matches.index[0]
    sim_scores = cosine_similarity(tfidf_matrix[idx:idx+1], tfidf_matrix)[0]
    top_indices = sim_scores.argsort()[::-1][1:top_n+1]
    return df.iloc[top_indices][['title', 'genress']]

# Streamlit UI
st.set_page_config(page_title="Movie Recommender", layout="wide")
st.title("🎬 Movie Recommender System")

df = load_data()
if df is None:
    st.info("Please upload a valid movie file named `moviesss.txt` with 'title' and 'genres' columns.")
else:
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['processed_genress'])

    user_title = st.text_input("Enter a movie title:", "The Matrix")

    if st.button("Recommend"):
        results = recommend_movies(user_title, df, tfidf_matrix)
        if results is not None and not results.empty:
            st.write("### 🎥 Recommendations:")
            for _, row in results.iterrows():
                st.markdown(f"**{row['title']}**")
                st.write(f"*Genres:* {row['genress'].replace('|', ', ')}")
        else:
            st.warning("❌ No matches found for that title.")

    if st.checkbox("🔍 Show Dataset"):
        st.dataframe(df[['title', 'genress']])


2025-07-25 18:51:05.339 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-07-25 18:51:05.344 No runtime found, using MemoryCacheStorageManager


In [None]:
!pip install streamlit pyngrok pandas scikit-learn nltk chardet -q


In [None]:
# 💥 Delete old/corrupt NLTK punkt if any
!rm -rf /root/nltk_data/tokenizers/punkt

# 📥 Download clean nltk data
import nltk
nltk.download('punkt_tab')
nltk.download('stopwords')

# 🔫 Kill any streamlit/ngrok processes (if any)
!pkill -f streamlit || true
!pkill ngrok || true

# 📤 Upload your moviesss.txt file
from google.colab import files
uploaded = files.upload()


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


^C


Saving moviesss.txt to moviesss (13).txt


In [None]:
with open("app.py", "w") as f:
    f.write('''
import streamlit as st
import pandas as pd
import nltk
import re
import os
import chardet
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Ensure NLTK data is present
try:
    nltk.data.find('tokenizers/punkt_tab')
except LookupError:
    nltk.download('punkt_tab')
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

stemmer = PorterStemmer()

def preprocess_text(text):
    if pd.isna(text):
        return ""
    text = str(text).lower()
    text = re.sub(r'[^a-zA-Z0-9\\s]', '', text)
    tokens = word_tokenize(text)
    stemmed_tokens = [stemmer.stem(word) for word in tokens if word not in stopwords.words('english')]
    return " ".join(stemmed_tokens)

@st.cache_data()
def load_data():
    try:
        for file in os.listdir():
            if file.lower() == "moviesss.txt":
                with open(file, 'rb') as rawdata:
                    encoding = chardet.detect(rawdata.read(10000))['encoding']
                df = pd.read_csv(file, sep='\\t', encoding=encoding)
                if len(df.columns) == 1:
                    df = pd.read_csv(file, sep=',', encoding=encoding)
                title_col = next((col for col in df.columns if 'title' in col.lower()), None)
                genres_col = next((col for col in df.columns if 'genres' in col.lower()), None)
                if title_col and genres_col:
                    df = df.rename(columns={title_col: 'title', genres_col: 'genress'})
                    df.dropna(subset=['title', 'genress'], inplace=True)
                    df['processed_genress'] = df['genress'].apply(preprocess_text)
                    return df
                else:
                    st.error("File must have 'title' and 'genres' columns.")
                    return None
        st.warning("No file named 'moviesss.txt' found.")
    except Exception as e:
        st.error(f"Failed to load file: {e}")
    return None

def recommend_movies(title, df, tfidf_matrix, top_n=5):
    matches = df[df['title'].str.contains(title, case=False)]
    if matches.empty:
        return None
    idx = matches.index[0]
    sim_scores = cosine_similarity(tfidf_matrix[idx:idx+1], tfidf_matrix)[0]
    top_indices = sim_scores.argsort()[::-1][1:top_n+1]
    return df.iloc[top_indices][['title', 'genress']]

# Streamlit UI
st.set_page_config(page_title="Movie Recommender", layout="wide")
st.title("🎬 Movie Recommender System")

df = load_data()
if df is not None:
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['processed_genress'])

    user_title = st.text_input("Enter a movie title:", "The Matrix")

    if st.button("Recommend"):
        results = recommend_movies(user_title, df, tfidf_matrix)
        if results is not None and not results.empty:
            st.write("### 🎥 Recommendations:")
            for _, row in results.iterrows():
                st.markdown(f"**{row['title']}**")
                st.write(f"*Genres:* {row['genress'].replace('|', ', ')}")
        else:
            st.warning("❌ No matches found.")

    if st.checkbox("🔍 Show Dataset"):
        st.dataframe(df[['title', 'genress']])
''')


In [None]:
from pyngrok import conf, ngrok
import subprocess
import threading
import time

# ✅ Set your ngrok auth token
conf.get_default().auth_token = "2vtgEOXNSRCbgXSwkdP3VKBCsER_5rbnuwvpZh3Pai2iKfL4b"

# ✅ Start Streamlit server in background
def run():
    subprocess.run(["streamlit", "run", "app.py", "--server.headless=true", "--server.port=8501"])

thread = threading.Thread(target=run)
thread.start()

# ✅ Wait for server to boot, then connect ngrok tunnel
time.sleep(6)
public_url = ngrok.connect(8501)
print("👇 Your Movie Recommender App is LIVE at 👇")
print(public_url)


👇 Your Movie Recommender App is LIVE at 👇
NgrokTunnel: "https://7951e847c899.ngrok-free.app" -> "http://localhost:8501"


In [None]:
import nltk
nltk.download('punkt_tab')



[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True