In [1]:
import os

In [2]:
%pwd

'/home/adityasanyal1996/Movie_Recommendation/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/home/adityasanyal1996/Movie_Recommendation'

### Entity

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainingConfig:
    root_dir: Path
    transformed_data_file: Path
    tokenized_data: Path

### Configuration Manager

In [6]:
from recommender.constants import *
from recommender.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_model_training_config(self)->ModelTrainingConfig:
        config = self.config.model_inference
        create_directories([config.root_dir])
        model_training_config = ModelTrainingConfig(
            root_dir = config.root_dir,
            transformed_data_file = config.transformed_data_file,
            tokenized_data = config.tokenized_data
        )
        return model_training_config

### Component

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from recommender import logger

from pathlib import Path

In [9]:
class MovieRecommender:
    def __init__(self, config:ModelTrainingConfig):
        self.config = config
        
    
    def similarities(self):
        df = pd.read_csv(self.config.transformed_data_file)
        vectorizer = TfidfVectorizer(stop_words='english')
        description_matrix = vectorizer.fit_transform(df['description'])
        description_matrix.toarray()
        cosine_similarities = cosine_similarity(description_matrix)
        cosine_similarities = pd.DataFrame(cosine_similarities)
        self.similarity_df = df[['type', 'title']].copy()
        self.similarity_df = pd.concat([self.similarity_df, cosine_similarities], axis=1)
        self.similarity_df.to_csv(self.config.tokenized_data)
        logger.info(f"created and saved the similarity matrix succesfully!")
    
    def recommend(self, name:str):
        movie = self.similarity_df[self.similarity_df['title'] == name].iloc[:, 2:]
        movie_index = movie.index[0]
        movie = movie.squeeze() #converts index object to a scalar
        similar_5 = movie.nlargest(6).index
        selected_movies = similar_5[similar_5 != movie_index] 
        return selected_movies
    
    

### Pipeline


In [10]:
try:
    config = ConfigurationManager()
    model_training_config = config.get_model_training_config()
    model = MovieRecommender(config = model_training_config)
    model.similarities()
    similarities = model.similarity_df
    name = input("enter the name of the movie: ")
    movies = model.recommend(name)
    for i in movies:
        print(similarities.iloc[int(i), 1])
    
    
except Exception as e:
     raise e



[2024-12-12 23:03:10,553: INFO: common: yaml file:config/config.yaml loaded successfully]
[2024-12-12 23:03:10,556: INFO: common: yaml file:schema.yaml loaded successfully]
[2024-12-12 23:03:10,557: INFO: common: create directory at artifacts]
[2024-12-12 23:03:10,557: INFO: common: create directory at artifacts/model_inference]
[2024-12-12 23:03:49,825: INFO: 4003437131: created the similarity matrix succesfully!]
College Romance
Engineering Girls
Candy Jar
Mr. Young
Pahuna
