<a href="https://colab.research.google.com/github/Sarvesh1814/US-Book-Recommendation-System-/blob/main/Baseline_estimate(SURPRISE).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mathematics Behind Baseline Model



---
* **Baseline Equation**
---
\begin{equation}
b_{ui} = \mu + b_u + b_i
\end{equation}
---
---
* **Optimization Problem**
---
\begin{equation}
\min_{b_u, b_i} \sum_{(u,i) \in R_{train}} (r_{ui} - b_{ui})^2 + \lambda (||b_u||^2 + ||b_i||^2)
\end{equation}
---
---
* **Gradient Descent**
---
\begin{equation}
\frac{\partial}{\partial b_u} = -2 \sum_{i \in I_u} (r_{ui} - \mu - b_u - b_i) + 2 \lambda b_u
\end{equation}

\begin{equation}
\frac{\partial}{\partial b_i} = -2 \sum_{u \in U_i} (r_{ui} - \mu - b_u - b_i) + 2 \lambda b_i
\end{equation}

\begin{equation}
b_{u}^{(k+1)} = b_{u}^{(k)} - \gamma \cdot \frac{\partial}{\partial b_u} J(b_u^{(k)}, b_i^{(k)})
\end{equation}

\begin{equation}
b_{i}^{(k+1)} = b_{i}^{(k)} - \gamma \cdot \frac{\partial}{\partial b_i} J(b_u^{(k)}, b_i^{(k)})
\end{equation}

\begin{aligned}
b_{u}^{(k+1)} &= b_{u}^{(k)} + \gamma \cdot \left( \sum_{i \in I_u} (r_{ui} - \mu - b_u^{(k)} - b_i^{(k)}) - \lambda b_u^{(k)} \right) \\
b_{i}^{(k+1)} &= b_{i}^{(k)} + \gamma \cdot \left( \sum_{u \in U_i} (r_{ui} - \mu - b_u^{(k)} - b_i^{(k)}) - \lambda b_i^{(k)} \right)
\end{aligned}

---
* **Update Rule**
---
\begin{equation}
b_{u}^{(k+1)} = b_{u}^{(k)} + \gamma \cdot \left( e_{ui} - \lambda \cdot b_{u}^{(k)} \right)
\end{equation}


# Importing Data and Libraries 

In [1]:
!pip install surprise


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3095441 sha256=0386ac658aaa02b26731c45d303e2bc0be7102e01994a7e6d89663ee5d415904
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.3 surprise-0.1


In [2]:
from surprise import Dataset, Reader
from surprise import BaselineOnly
from surprise.model_selection import cross_validate, train_test_split
import numpy as np
import pandas as pd
from surprise import accuracy

# Dataset Preparation 

In [3]:
Base = "/content/drive/MyDrive/RS Data/Assignment 3/Processed_data/"
train=pd.read_csv(Base+"train_df.csv")
test = pd.read_csv(Base+"test_df.csv")

In [4]:
train = train[["User-ID"	,"ISBN",	"Book-Rating"	,"book_name"]]
test = test[["User-ID"	,"ISBN",	"Book-Rating"	,"book_name"]]

In [5]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(train[['User-ID', 'book_name', 'Book-Rating']], reader)
trainset = data.build_full_trainset()



# Grid Search Cross-Validation

In [None]:
from surprise.model_selection import GridSearchCV
param_grid = {'bsl_options': {'method': ['als', 'sgd'],
                              'reg_i': [5, 10, 15,20],
                              'reg_u': [10, 15, 20]},}

# Perform a grid search to find the best set of parameters
grid_search = GridSearchCV(BaselineOnly, param_grid, measures=['rmse', 'mae'], cv=3)
grid_search.fit(data)



In [7]:
# Print the best RMSE score and the corresponding parameters
print(grid_search.best_score['rmse'])
print(grid_search.best_params['rmse'])

1.6145892054613513
{'bsl_options': {'method': 'sgd', 'reg_i': 5, 'reg_u': 10}}


# Model Training

In [8]:
trainset, valset = train_test_split(data, test_size=0.2)
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(train[['User-ID', 'ISBN', 'Book-Rating']], reader)
algo = BaselineOnly({'method': 'sgd', 'reg_i': 5, 'reg_u': 10},True)
cv_results = cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)
trainset = data.build_full_trainset()
algo.fit(trainset)
predictions = algo.test(valset)
print("RMSE:", accuracy.rmse(predictions))
print("MAE:", accuracy.mae(predictions))

Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Evaluating RMSE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6010  1.6113  1.5995  1.6180  1.6075  1.6075  0.0068  
Fit time          0.75    0.72    0.79    0.75    1.04    0.81    0.12    
Test time         0.09    0.09    0.11    0.09    0.16    0.11    0.03    
Estimating biases using sgd...
RMSE: 1.5404
RMSE: 1.540403474525308
MAE:  1.1900
MAE: 1.189950254669012


# Recommendation System

In [9]:
def BaseLine():
  k = int(input("Value of K for top-K Recommendations: "))
  user_id = int(input("Enter User-ID: "))  
  uid = trainset.to_inner_uid(user_id)
  all_items = trainset.all_items()
  x = trainset.ur[int(uid)]
  a = [x[i][0] for i in range(len(x))]
  not_rated_items = [item for item in all_items if item not in a]
  predictions = []
  for iid in not_rated_items:
      pred = algo.predict(uid, trainset.to_raw_iid(iid))
      predictions.append((iid, pred.est))
  predictions_sorted = sorted(predictions, key=lambda x: x[1], reverse=True)[:k]
  print("\n")
  print(f"Top {k} Book recommendations for User {user_id} are: ")
  lst =()
  for item, rating in predictions_sorted:
      lst+= ((trainset.to_raw_iid(item),rating),)
  dx = pd.DataFrame(lst,columns=["ISBN","Predicted Rating"])
  
  display(dx)
 

In [10]:
BaseLine()


Value of K for top-K Recommendations: 5
Enter User-ID: 151824


Top 5 Book recommendations for User 151824 are: 


Unnamed: 0,ISBN,Predicted Rating
0,0877017883,9.223929
1,0140143505,9.203793
2,0743454529,9.173125
3,0060256656,9.120538
4,067168390X,9.11864


#Saving the Model

In [11]:
from surprise import dump

# Define the file name for the saved model
file_name = '/content/drive/MyDrive/RS Data/Assignment 3/Processed_data/baseline_model.pkl'

# Save the model to a file
dump.dump(file_name, algo=algo)