In [11]:
pip install requests textblob scikit-learn pandas numpy joblib

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [12]:
import requests
import pandas as pd
import numpy as np
import sqlite3
import joblib
from datetime import datetime
from textblob import TextBlob
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import IsolationForest

In [13]:
conn = sqlite3.connect("database.db")
cursor = conn.cursor()

cursor.execute("""
CREATE TABLE IF NOT EXISTS movies (
    title TEXT,
    rating REAL,
    votes INTEGER,
    year INTEGER,
    runtime INTEGER,
    sentiment REAL,
    movie_age INTEGER,
    rating_vote_ratio REAL,
    feedback INTEGER DEFAULT NULL,
    timestamp TEXT
)
""")

conn.commit()

In [14]:
OMDB_API_KEY = "beb74b3d"

def fetch_movie(movie_name):
    url = f"http://www.omdbapi.com/?t={movie_name}&apikey={OMDB_API_KEY}&plot=full"
    return requests.get(url).json()

In [15]:
def extract_features(movie):
    rating = float(movie["imdbRating"])
    votes = int(movie["imdbVotes"].replace(",", ""))
    year = int(movie["Year"].split("–")[0])
    runtime = int(movie["Runtime"].split(" ")[0])
    sentiment = TextBlob(movie["Plot"]).sentiment.polarity

    movie_age = datetime.now().year - year
    rating_vote_ratio = rating / np.log1p(votes)

    return {
        "rating": rating,
        "votes": votes,
        "year": year,
        "runtime": runtime,
        "sentiment": sentiment,
        "movie_age": movie_age,
        "rating_vote_ratio": rating_vote_ratio
    }

In [16]:
def store_movie(title, features):
    cursor.execute("""
    INSERT INTO movies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """, (
        title,
        features["rating"],
        features["votes"],
        features["year"],
        features["runtime"],
        features["sentiment"],
        features["movie_age"],
        features["rating_vote_ratio"],
        None,
        datetime.now().isoformat()
    ))
    conn.commit()

In [35]:
def train_model():
    df = pd.read_sql("SELECT * FROM movies", conn)

    if len(df) < 30:
        print("Need at least 30 movies to train.")
        return False

    X = df[[
        "rating", "votes", "runtime",
        "sentiment", "movie_age",
        "rating_vote_ratio"
    ]]

    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    model = IsolationForest(contamination=0.05, random_state=42)
    model.fit(X_scaled)

    # Save score range for proper scaling
    train_scores = model.decision_function(X_scaled)
    min_score = train_scores.min()
    max_score = train_scores.max()

    joblib.dump(model, "model.pkl")
    joblib.dump(scaler, "scaler.pkl")
    joblib.dump((min_score, max_score), "score_range.pkl")

    print("Model trained successfully.")
    return True

In [37]:
train_model()

Model trained successfully.


True

In [30]:
def predict_fraud(features):
    model = joblib.load("model.pkl")
    scaler = joblib.load("scaler.pkl")

    X = pd.DataFrame([[
        features["rating"],
        features["votes"],
        features["runtime"],
        features["sentiment"],
        features["movie_age"],
        features["rating_vote_ratio"]
    ]], columns=[
        "rating","votes","runtime",
        "sentiment","movie_age",
        "rating_vote_ratio"
    ])

    X_scaled = scaler.transform(X)
    score = model.decision_function(X_scaled)
    min_score = -0.5
    max_score = 0.5
    normalized = (score[0] - min_score) / (max_score - min_score)
    fraud_score = round((1 - normalized) * 100, 2)

    return round(fraud_score,2)

In [27]:
movie_name = "RRR"

movie = fetch_movie(movie_name)

if movie["Response"] == "True":
    features = extract_features(movie)
    store_movie(movie_name, features)

    try:
        fraud_score = predict_fraud(features)
        print("Fraud Score:", fraud_score)
    except:
        print("Model not trained yet.")

train_model()

{'Title': 'RRR', 'Year': '2022', 'Rated': 'TV-MA', 'Released': '25 Mar 2022', 'Runtime': '187 min', 'Genre': 'Action, Adventure, Drama', 'Director': 'S.S. Rajamouli', 'Writer': 'Vijayendra Prasad, S.S. Rajamouli, Sai Madhav Burra', 'Actors': 'N.T. Rama Rao Jr., Ram Charan, Ajay Devgn', 'Plot': "During the British era Malli a small tribal girl is taken away by British governor Scott Buxton and his wife Catherine against the wishes of her parents.A Rama Raju an Indian cop who works for the British army for him duty comes first and is very ruthless to revolutionary Indians but he is never credited for his due by British government.The British government find that a tribal Komaram Bheem who considers Malli his sister has started his search for her and could be an obstacle for the British army.The governor and his wife announces a special post for any officer who can bring Bheem to them,Rama Raju decides to take the matters in his hand and promises the government to bring him dead or alive.

True

In [21]:
def fetch_movie(movie_name):
    url = f"http://www.omdbapi.com/?t={movie_name}&apikey={OMDB_API_KEY}&plot=full"
    response = requests.get(url).json()
    print(response)  # DEBUG
    return response

In [23]:
def extract_features(movie):

    try:
        # Skip non-movie types
        if movie.get("Type") != "movie":
            return None

        # Handle missing values safely
        rating = float(movie["imdbRating"]) if movie["imdbRating"] != "N/A" else None
        votes = int(movie["imdbVotes"].replace(",", "")) if movie["imdbVotes"] != "N/A" else None
        year = int(movie["Year"].split("–")[0]) if movie["Year"] != "N/A" else None

        runtime_str = movie.get("Runtime", "N/A")
        runtime = int(runtime_str.split(" ")[0]) if runtime_str != "N/A" else None

        if None in [rating, votes, year, runtime]:
            return None

        sentiment = TextBlob(movie["Plot"]).sentiment.polarity
        movie_age = datetime.now().year - year
        rating_vote_ratio = rating / np.log1p(votes)

        return {
            "rating": rating,
            "votes": votes,
            "year": year,
            "runtime": runtime,
            "sentiment": sentiment,
            "movie_age": movie_age,
            "rating_vote_ratio": rating_vote_ratio
        }

    except:
        return None

In [24]:
inserted = 0

for name in popular_movies:
    movie = fetch_movie(name)

    if movie["Response"] == "True":
        features = extract_features(movie)

        if features is not None:
            store_movie(name, features)
            inserted += 1
            print(f"Inserted: {name}")
        else:
            print(f"Skipped (invalid data): {name}")

print("Total Inserted:", inserted)

{'Title': 'RRR', 'Year': '2022', 'Rated': 'TV-MA', 'Released': '25 Mar 2022', 'Runtime': '187 min', 'Genre': 'Action, Adventure, Drama', 'Director': 'S.S. Rajamouli', 'Writer': 'Vijayendra Prasad, S.S. Rajamouli, Sai Madhav Burra', 'Actors': 'N.T. Rama Rao Jr., Ram Charan, Ajay Devgn', 'Plot': "During the British era Malli a small tribal girl is taken away by British governor Scott Buxton and his wife Catherine against the wishes of her parents.A Rama Raju an Indian cop who works for the British army for him duty comes first and is very ruthless to revolutionary Indians but he is never credited for his due by British government.The British government find that a tribal Komaram Bheem who considers Malli his sister has started his search for her and could be an obstacle for the British army.The governor and his wife announces a special post for any officer who can bring Bheem to them,Rama Raju decides to take the matters in his hand and promises the government to bring him dead or alive.

In [25]:
df = pd.read_sql("SELECT COUNT(*) FROM movies", conn)
df

Unnamed: 0,COUNT(*)
0,84


In [31]:
train_model()

Model trained successfully.


True