In [1]:
from google.colab import drive
drive.mount('/content/drive')
# Linking the drive folder where files are stored
import os
os.chdir('/content/drive/My Drive/Colab Notebooks/')

Mounted at /content/drive


In [3]:
import pandas as pd

# reading the csv file
df = pd.read_csv('imdb_movies_with_id.csv')

In [4]:
#variables
ID = df['ID']
date = df['date_x']
names = df['names']
score = df['score']
genre = df['genre']
overview = df['overview']
crew = df['crew']
orig_title = df['orig_title']
status = df['status']
orig_lang = df['orig_lang']
budget = df['budget_x']
revenue = df['revenue']
country = df['country']

In [5]:
from sklearn.model_selection import train_test_split
# Define features (X) and target (y)
X = df[['date_x', 'score', 'genre', 'status', 'orig_lang', 'budget_x', 'revenue', 'country', 'ID']]
y = df['names']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the training set (trainset)
trainset = pd.concat([X_train, y_train], axis=1)

# Create the testing set (testdataset)
testdataset = pd.concat([X_test, y_test], axis=1)

# Save the training set to a CSV file
trainset.to_csv('trainset.csv', index=False)

# Save the testing set to a CSV file
testdataset.to_csv('testdataset.csv', index=False)



In [6]:
# Function to find similar movies based on predicted score and multiple genres
def find_similar_movies(predicted_score, genres, df, top_n=5):
    # Split genres and filter movies that share at least one genre
    genre_set = set(genres.split(", "))
    df['shared_genres'] = df['genre'].apply(
        lambda g: bool(genre_set & set(g.split(", ")) if isinstance(g, str) else set())
    )
    genre_filtered_df = df[df['shared_genres']].copy()

    # Use .loc[] to modify 'score_diff' safely
    genre_filtered_df.loc[:, 'score_diff'] = (genre_filtered_df['score'] - predicted_score).abs()

    # Sort by score difference and return top N
    similar_movies = genre_filtered_df.sort_values(by='score_diff').head(top_n)
    return similar_movies[['genre', 'names', 'score']]

# Function to predict and suggest similar movies based on multiple movie IDs
def predict_and_suggest_movies_batch(movie_ids, df, top_n=5):
    movies = df[df['ID'].isin(movie_ids)].copy()

    if movies.empty:
        print(f"Movies with IDs {movie_ids} not found!")
        return None

    all_recommendations = pd.DataFrame()

    for _, movie in movies.iterrows():
        predicted_score = movie['score']  # Kasuta olemasolevat skoori
        genres = movie['genre']

        similar_movies = find_similar_movies(predicted_score, genres, df, top_n)
        all_recommendations = pd.concat([all_recommendations, similar_movies])

    all_recommendations = all_recommendations.drop_duplicates(subset=['names']).head(top_n)

    print("=" * 50)
    print(f"Recommended movies based on provided IDs {movie_ids}:")
    print(all_recommendations)
    print("=" * 50)

# Example: Predicting a new movie score and suggesting similar movies based on multiple movie IDs
movie_ids = [4, 8, 22]  # Example movie IDs
predict_and_suggest_movies_batch(movie_ids, df)


Recommended movies based on provided IDs [4, 8, 22]:
                                              genre  \
3     Animation, Comedy, Family, Adventure, Fantasy   
2354  Animation, Comedy, Family, Adventure, Fantasy   
7     Animation, Family, Fantasy, Adventure, Comedy   
21                      Action, Adventure, Thriller   
825                     Action, Adventure, Thriller   

                             names  score  
3                          Mummies   70.0  
2354           Horton Hears a Who!   65.0  
7     Puss in Boots: The Last Wish   83.0  
21                           Plane   69.0  
825                        Ashfall   68.0  


In [8]:
import re
import random

def prediction(fail, username):
    """
    Reads a file, searches for movie IDs corresponding to the given username, and recommends movies.
    """
    with open(fail, "r") as file:
        content = file.read()

    # Search for the block corresponding to the given username
    pattern = rf"Username:\s*{re.escape(username)}\nSelected Movies:\s*(\d+(?:,\s*\d+)*)"
    match = re.search(pattern, content)

    if match:
        # Find the corresponding IDs
        movie_ids = list(map(int, match.group(1).split(", ")))

        # Randomly select up to 5 movie IDs
        selected_random_ids = random.sample(movie_ids, min(len(movie_ids), 5))

        # Predict and recommend
        predict_and_suggest_movies_batch(selected_random_ids, df)
    else:
        print(f"User '{username}' not found or no selected movies available.")

In [9]:
import pandas as pd
import csv
from random import randint

# Class to represent a movie
class Film:
    def __init__(self, movie_id, title, date, rating, genre, description, cast, language):
        self.movie_id = movie_id
        self.title = title
        self.date = date
        self.rating = rating
        self.genre = genre
        self.description = description
        self.cast = cast
        self.language = language
        self.year = date[6:10] # Extract the year from the date

# Function to load movie data from a CSV file
def load_movie_data(file_path='imdb_movies_with_id.csv'):
    try:
        df = pd.read_csv(file_path)
        return df
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return pd.DataFrame()
    except pd.errors.EmptyDataError:
        print("Error: The CSV file is empty.")
        return pd.DataFrame()
    except Exception as e:
        print(f"An error occurred while loading the file: {e}")
        return pd.DataFrame()

# Function to convert a dataframe into a list of Film objects
def convert_dataframe_to_film_objects(df):
    movies = []
    for _, row in df.iterrows():
        try:
            movie_id = row.iloc[-1]
            movie = Film(
                movie_id=movie_id,
                title=row['names'],
                date=row['date_x'],
                rating=row['score'],
                genre=row['genre'],
                description=row['overview'],
                cast=row['crew'],
                language=row['orig_lang']
            )
            movies.append(movie)
        except KeyError as e:
            print(f"Missing column in dataset: {e}")
            continue
    return movies

# Login system for the application
def login_system():
    print("Welcome to the Movie Data Login System")
    while True:
        username = input("Please enter your username: ").strip()

        if username:
            if is_user_in_file(username):
                print(f"Welcome back, {username}!")
                prediction("user_info.txt", username)
            else:
                print(f"Hello, {username}! You are a new user.")
                user_decision = input("Would you like to see movie recommendations? (y/n): ").strip().lower()

                if user_decision == 'y':
                    selected_movies = display_random_movies(username)
                    save_user_movie_associations(username, selected_movies)
                else:
                    print("Thank you! Goodbye!")
            break
        else:
            print("Username cannot be empty. Please try again.")

# Function to check if a user exists in the file
def is_user_in_file(username, file_path="user_info.txt"):
    try:
        with open(file_path, "r") as file:
            for line in file:
                if line.strip() == f"Username: {username}":
                    return True
        return False
    except FileNotFoundError:
        return False

# Function to display random movies and allow the user to choose
def display_random_movies(username):
    movie_data = load_movie_data('imdb_movies_with_id.csv')

    if movie_data.empty:
        print("Error: Movie data could not be loaded.")
        return []

    movies = convert_dataframe_to_film_objects(movie_data)
    limit = len(movies)
    used_numbers = set()
    count = min(20, limit)

    print("\nHere are 20 random movie recommendations:\n")
    random_movies = []
    for i in range(count):
        random_index = randint(0, limit - 1)
        while random_index in used_numbers:
            random_index = randint(0, limit - 1)
        used_numbers.add(random_index)
        movie = movies[random_index]
        random_movies.append(movie)
        print(f"{i + 1}. {movie.title} ({movie.year}) - Rating: {movie.rating}/100 - {movie.genre}")
        print(f"Description: {movie.description}")
        print(f"Cast: {movie.cast}")
        print(f"Language: {movie.language}\n")

    chosen_movies = []
    chosen_indices = set()
    while len(chosen_movies) < 5:
        try:
            choice = int(input(f"Select a movie by number (1-{count}) ({len(chosen_movies) + 1}/5): "))
            if 1 <= choice <= count and choice not in chosen_indices:
                chosen_indices.add(choice)
                chosen_movies.append(random_movies[choice - 1].movie_id)
                selected_movie = random_movies[choice - 1]
                print(f"Selected: {selected_movie.title} ({selected_movie.year})")
            elif choice in chosen_indices:
                print("You have already chosen this movie. Please select a different one.")
            else:
                print(f"Invalid choice. Please select a number between 1 and {count}.")
        except ValueError:
            print("Invalid input. Please enter a number.")

    print("\nYou selected the following movies:")
    for movie_id in chosen_movies:
        selected_movie = next(movie for movie in random_movies if movie.movie_id == movie_id)
        print(f"{selected_movie.title} ({selected_movie.year}) - {selected_movie.genre}")

    print("\nThank you for your selections! Enjoy your movie marathon!")
    return chosen_movies

# Function to save user-movie associations to a file
def save_user_movie_associations(username, movie_ids):
    try:
        with open("user_info.txt", "r") as file:
            lines = file.readlines()
    except FileNotFoundError:
        lines = []

    user_found = False
    updated_lines = []
    i = 0
    while i < len(lines):
        line = lines[i]
        if line.startswith(f"Username: {username}"):
            user_found = True
            current_ids_line = lines[i + 1]
            existing_ids = current_ids_line.strip().replace("Selected Movies: ", "").split(", ")
            combined_ids = sorted(set(existing_ids + list(map(str, movie_ids))))
            updated_lines.append(f"Username: {username}\n")
            updated_lines.append("Selected Movies: " + ", ".join(combined_ids) + "\n")
            updated_lines.append("-" * 40 + "\n")
            i += 3
        else:
            updated_lines.append(line)
            i += 1

    if not user_found:
        updated_lines.append(f"Username: {username}\n")
        updated_lines.append("Selected Movies: " + ", ".join(map(str, movie_ids)) + "\n")
        updated_lines.append("-" * 40 + "\n")

    with open("user_info.txt", "w") as file:
        file.writelines(updated_lines)

    print(f"\nSaved selections for {username}: {combined_ids if user_found else movie_ids}")
    print("User information saved to 'user_info.txt'.")



# Main entry point of the program
if __name__ == "__main__":
    login_system()


Welcome to the Movie Data Login System
Please enter your username: jerich
Welcome back, jerich!
Recommended movies based on provided IDs [2481, 5241, 3420, 8425, 2002]:
       genre                    names  score
4469  Horror    The Amityville Horror   63.0
2001  Horror       Cannibal Holocaust   63.0
9340  Horror           Sleepaway Camp   63.0
8233  Horror  City of the Living Dead   63.0
7818  Horror                  A Gruta   63.0
