In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from sklearn.impute import SimpleImputer
import numpy as np
import pickle
import joblib
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, TextVectorization, Lambda
from google.colab import files

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
df = pd.read_csv('/content/gdrive/My Drive/animes.csv')

In [None]:
df = df.fillna('')


In [None]:
def extract_start_year(aired):
    try:
        if pd.isna(aired):
            return np.nan
        start_year = int(str(aired).split()[2])
        return start_year
    except (ValueError, IndexError):
        return np.nan

# Apply the function to create a new 'start_year' column
df['start_year'] = df['aired'].apply(extract_start_year)

def preprocess_aired(row):
    aired = row['aired']
    # Check if the value is a float or NaN
    if pd.isna(aired):
        return np.nan
    try:
        start_year = int(str(aired).split()[2])
        return start_year
    except (AttributeError, ValueError, IndexError):
        return np.nan

# Apply the function to create a new 'start_year' column
df['start_year'] = df.apply(preprocess_aired, axis=1)


# Handle missing values in numerical columns

numeric_columns = ['episodes', 'members', 'popularity', 'ranked', 'score']
for column in numeric_columns:
    # Convert numeric values to float, and non-numeric values to NaN
    df[column] = pd.to_numeric(df[column], errors='coerce')

# Impute missing values using mean strategy
imputer = SimpleImputer(strategy='mean')
df[numeric_columns] = imputer.fit_transform(df[numeric_columns])

In [None]:
# Combine relevant text features for TF-IDF
df['features'] = df['synopsis'] + ' ' + df['genre'].apply(lambda x: ' '.join(x))

In [None]:
# Use TF-IDF to create feature vectors
tfidf_vectorizer = TfidfVectorizer(stop_words='english',max_features=None)
tfidf_matrix = tfidf_vectorizer.fit_transform(df['features'])

In [None]:
joblib.dump(tfidf_vectorizer, 'tfidf_vectorizer.pkl')
joblib.dump(tfidf_matrix, 'tfidf_matrix.pkl')

['tfidf_matrix.pkl']

In [None]:
tfidf_matrix_np = tfidf_matrix.toarray().astype(np.float32)

In [None]:
knn_model = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=10, n_jobs=-1)
knn_model.fit(tfidf_matrix_np)

In [None]:
def get_hybrid_recommendations(user_id, anime_title, top_n=10):
    anime_index = df[df['title'] == anime_title].index
    if len(anime_index) == 0:
        return [], []  # Anime not found

    collaborative_filtering_indices = get_collaborative_filtering_recommendations(anime_index)

    # Exclude the queried anime itself from collaborative filtering recommendations
    collaborative_filtering_indices = collaborative_filtering_indices[1:]

    # Extract titles of collaborative filtering recommendations
    collaborative_filtering_recommendations = df.iloc[collaborative_filtering_indices]['title'].tolist()

    return collaborative_filtering_recommendations[:top_n]



In [None]:
def get_collaborative_filtering_recommendations(anime_index):
    distances, indices = knn_model.kneighbors(tfidf_matrix_np[anime_index], n_neighbors=10)
    return indices.flatten()

In [None]:
user_id = 5530  # Replace with the actual user ID
anime_title = 'Pandora Hearts'  # Replace with the actual anime title
collaborative_filtering_recs = get_hybrid_recommendations(user_id, anime_title)
print("Collaborative Filtering Recommendations:", collaborative_filtering_recs)


Collaborative Filtering Recommendations: ['Oz no Mahoutsukai (1986)', 'OZ', 'Oz no Mahoutsukai', 'Made in Abyss', 'Oz no Mahoutsukai no Koutsuu Anzen no Tabi', 'Space Oz no Bouken', 'Code Geass: Soubou no Oz Picture Drama', 'Summer Wars', 'Zhandou Wang Zhi Jufeng Zhan Hun']


In [None]:
row_count = sum(1 for row in df)

print(f'Number of rows in : {row_count}')

Number of rows in : 14


In [None]:
with open('knn_model.pkl', 'wb') as f:
    pickle.dump(knn_model, f)

In [None]:
knn_model_path = 'collaborative_filtering_model.h5'
tf.keras.models.save_model(knn_model, knn_model_path, save_format='h5')

# Example usage to load the models
loaded_tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')
loaded_knn_model = tf.keras.models.load_model(knn_model_path)

  tf.keras.models.save_model(knn_model, knn_model_path, save_format='h5')


AttributeError: 'NearestNeighbors' object has no attribute 'outputs'

In [None]:
# Load TF-IDF vectorizer from joblib
loaded_tfidf_vectorizer = joblib.load('tfidf_vectorizer.joblib')

# Function to convert the TF-IDF transform to NumPy array
def tfidf_transform(x):
    return loaded_tfidf_vectorizer.transform(x).toarray().astype(np.float32)

In [None]:
# Custom layer for TF-IDF transformation
class TfidfLayer(tf.keras.layers.Layer):
    def __init__(self, tfidf_model, **kwargs):
        self.tfidf_model = tfidf_model
        super(TfidfLayer, self).__init__(**kwargs)

    def call(self, x):
        return tf.py_function(tfidf_transform, [x], Tout=tf.float32)

def transform_with_numpy(x):
    return np.asarray(loaded_tfidf_vectorizer.transform(x).toarray(), dtype=np.float32)

In [None]:
def eager_pyfunc_conversion(x):
    return tf.py_function(transform_with_numpy, [x], tf.float32)

In [None]:
text_vectorizer = TextVectorization(max_tokens=1000, output_mode="tf-idf")
text_vectorizer.adapt(df['features'].values)

In [None]:
# Use Sequential API with a combination of layers for the custom operation
model_tfidf = tf.keras.Sequential([
    Input(shape=(1,), dtype=tf.string),
    text_vectorizer
])

In [None]:
# Compile the model if needed
#model_tfidf.compile(optimizer='adam', loss='mse')

model_tfidf.save("saved_model_tfidf")

In [None]:
def my_custom_op(interpreter):
    def my_custom_op_impl(inputs, outputs):
        # Implement your custom op logic here
        pass
    interpreter.add_custom_op(tf.lite.OpDef(name="MyCustomOp", opcode=1, custom_code=my_custom_op_impl))

In [None]:
# Load the model back
loaded_model = tf.keras.models.load_model("saved_model_tfidf")

# Replace custom op with Lambda layer
loaded_model.layers[-1] = Lambda(lambda x: tfidf_vectorizer.transform(x.numpy()).astype(np.float32))


# Create a concrete function from the SavedModel
concrete_func = loaded_model.signatures[
    tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]

In [None]:
# Convert the Keras model to a TensorFlow Lite model.
converter_tflite = tf.lite.TFLiteConverter.from_keras_model(loaded_model)

# Add the missing input tensor to the input_tensors list.
converter_tflite.input_tensors = [tf.TensorSpec(shape=(None,), dtype=tf.float32, name="sequential_1/text_vectorization/string_lookup_2/None_Lookup/LookupTableFindV2/table_handle")]

# Convert the Keras model to a TensorFlow Lite model.
tflite_model_tfidf = converter_tflite.convert()


ConverterError: Could not translate MLIR to FlatBuffer. UNKNOWN: /usr/lib/python3.10/runpy.py:196:1: error: 'tf.StringLower' op is neither a custom op nor a flex op
    return _run_code(code, main_globals, None,
^
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_3"]): called from
/usr/lib/python3.10/runpy.py:196:1: note: Error code: ERROR_NEEDS_FLEX_OPS
    return _run_code(code, main_globals, None,
^
/usr/lib/python3.10/runpy.py:196:1: error: 'tf.StaticRegexReplace' op is neither a custom op nor a flex op
    return _run_code(code, main_globals, None,
^
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_3"]): called from
/usr/lib/python3.10/runpy.py:196:1: note: Error code: ERROR_NEEDS_FLEX_OPS
    return _run_code(code, main_globals, None,
^
/usr/lib/python3.10/runpy.py:196:1: error: 'tf.StringSplitV2' op is neither a custom op nor a flex op
    return _run_code(code, main_globals, None,
^
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_3"]): called from
/usr/lib/python3.10/runpy.py:196:1: note: Error code: ERROR_NEEDS_FLEX_OPS
    return _run_code(code, main_globals, None,
^
/usr/lib/python3.10/runpy.py:196:1: error: 'tf.DenseBincount' op is neither a custom op nor a flex op
    return _run_code(code, main_globals, None,
^
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_3"]): called from
/usr/lib/python3.10/runpy.py:196:1: note: Error code: ERROR_NEEDS_FLEX_OPS
    return _run_code(code, main_globals, None,
^
/usr/lib/python3.10/runpy.py:196:1: error: 'tf.RaggedBincount' op is neither a custom op nor a flex op
    return _run_code(code, main_globals, None,
^
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_3"]): called from
/usr/lib/python3.10/runpy.py:196:1: note: Error code: ERROR_NEEDS_FLEX_OPS
    return _run_code(code, main_globals, None,
^
<unknown>:0: error: failed while converting: 'main': 
Some ops are not supported by the native TFLite runtime, you can enable TF kernels fallback using TF Select. See instructions: https://www.tensorflow.org/lite/guide/ops_select 
TF Select ops: DenseBincount, RaggedBincount, StaticRegexReplace, StringLower, StringSplitV2
Details:
	tf.DenseBincount(tensor<?xi32>, tensor<i32>, tensor<0xi64>) -> (tensor<?xi64>) : {T = i64, Tidx = i32, binary_output = false, device = ""}
	tf.RaggedBincount(tensor<?xi64>, tensor<*xi64>, tensor<*xi64>, tensor<0xf32>) -> (tensor<*xf32>) : {T = f32, Tidx = i64, binary_output = false, device = ""}
	tf.StaticRegexReplace(tensor<?x1x!tf_type.string>) -> (tensor<?x1x!tf_type.string>) : {device = "", pattern = "[!\22#$%&()\\*\\+,-\\./:;<=>?@\\[\\\\\\]^_`{|}~\\']", replace_global = true, rewrite = ""}
	tf.StringLower(tensor<?x1x!tf_type.string>) -> (tensor<?x1x!tf_type.string>) : {device = "", encoding = ""}
	tf.StringSplitV2(tensor<?x!tf_type.string>, tensor<!tf_type.string>) -> (tensor<?x2xi64>, tensor<?x!tf_type.string>, tensor<2xi64>) : {device = "", maxsplit = -1 : i64}



In [None]:
# Convert collaborative filtering model to TFLite model
with open('knn_model.pkl', 'rb') as f:
    knn_model = pickle.load(f)

converter_collab = tf.lite.TFLiteConverter.from_keras_model(knn_model)
tflite_model_collab = converter_collab.convert()

# Save the TFLite models
with open('tfidf_model.tflite', 'wb') as f:
    f.write(tflite_model_tfidf)

with open('collab_model.tflite', 'wb') as f:
    f.write(tflite_model_collab)


ConverterError: Could not translate MLIR to FlatBuffer. UNKNOWN: <unknown>:0: error: loc(callsite(callsite(fused["EagerPyFunc:", "sequential_3/lambda_3/EagerPyFunc@__inference__wrapped_model_710"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper_758"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall"])): 'tf.EagerPyFunc' op is neither a custom op nor a flex op
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall"]): called from
<unknown>:0: note: loc(callsite(callsite(fused["EagerPyFunc:", "sequential_3/lambda_3/EagerPyFunc@__inference__wrapped_model_710"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper_758"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall"])): Error code: ERROR_NEEDS_CUSTOM_OPS
<unknown>:0: error: failed while converting: 'main': 
Some ops in the model are custom ops, See instructions to implement custom ops: https://www.tensorflow.org/lite/guide/ops_custom 
Custom ops: EagerPyFunc
Details:
	tf.EagerPyFunc(tensor<?x1x!tf_type.string>) -> (tensor<*xf32>) : {Tin = [!tf_type.string], Tout = [f32], device = "/job:localhost/replica:0/task:0/device:CPU:0", is_async = false, token = "pyfunc_13"}



In [None]:
df = df.drop_duplicates(['uid', 'title'], keep='first')
