In [1]:
import numpy as np
import pandas as pd
import json
import csv
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from flask import Flask, render_template, request
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

app = Flask(__name__)

data = []
with open(r"C:\Users\pauli\Desktop\booksummaries.txt", "r",  encoding='utf-8') as f:
    reader = csv.reader(f, dialect='excel-tab')
    for row in reader:
        data.append(row)

# convert data to pandas dataframe
books = pd.DataFrame.from_records(data, columns=['book_id', 'freebase_id', 'book_title', 'author', 'publication_date', 'genre', 'summary'])
books.head()
def parse_genre_entry(genre_info):
    if genre_info == '':
        return []
    genre_dict = json.loads(genre_info)
    genres = list(genre_dict.values())
    return genres

books['genre'] = books['genre'].apply(parse_genre_entry)

from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english')
books['summary'] = books['summary'].fillna('')

tfidf_matrix = tfidf.fit_transform(books['summary'])

from sklearn.metrics.pairwise import linear_kernel
cosine_similarity = linear_kernel(tfidf_matrix, tfidf_matrix)
# sklearn TF-IDF defaults to using L2 Norm, for which linear kernel == cosine similarity

def clean_flatten(data):
    cleaned = []
    for entry in data:
        # strip spaces and flatten into small caps
        cleaned.append(str.lower(entry.replace(' ', '')))
    return cleaned

books['genre_kws'] = books['genre'].apply(clean_flatten)
books['author_kws'] = books['author'].apply(lambda x: str.lower(x.replace(' ', '')))

def merge_kws(df):
    return ' '.join(df.genre_kws) + ' ' + df.author_kws

books['kws'] = books.apply(merge_kws, axis=1)

indices = pd.Series(books.index, index=books['book_title']).drop_duplicates()
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(books['kws'])
kw_similarity = cosine_similarity(count_matrix, count_matrix)

def recommend(titles, similarity_matrix, topk=10):
    book_indices = [indices[title] for title in titles]
    
    # Initialize combined_vector as an array of zeros with appropriate length
    combined_vector = np.zeros(similarity_matrix.shape[1])
    
    # Add up the vectors for each title
    for index in book_indices:
        sim_vector = similarity_matrix[index]
        print(f"Shape of sim_vector: {sim_vector.shape}")  # Print shape for debugging
        
        if sim_vector.shape != combined_vector.shape:
            print(f"Skipping index {index} due to shape mismatch")
            continue  # Skip this iteration if shapes do not match
        
        combined_vector += sim_vector
        
    combined_vector /= len(book_indices)  # Normalize by the number of titles

    similarity_scores = list(enumerate(combined_vector))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    top_scores = similarity_scores[:topk]  # Get top k scores
    
    recommendation_indices = [i[0] for i in top_scores]

    return books['book_title'].iloc[recommendation_indices]

titles_to_recommend = ['The Stranger']
recs = recommend(titles_to_recommend, kw_similarity)
# print(recs)
books.iloc[recs.index]


# Your book recommendation code here

# Define the route for the home page
@app.route('/')
def home():
    return render_template('index.html')

# Define the route for the recommendation results page
@app.route('/recommend', methods=['POST'])
def recommend():
    title = request.form.get('book_title')
    titles_to_recommend = [title]
    recs = recommend(titles_to_recommend, cosine_similarity)
    recommended_books = books.iloc[recs.index]

    return render_template('recommendation.html', books=recommended_books)

if __name__ == '__main__':
    app.run(debug=True)


NameError: name '__name' is not defined