In [1]:
from typing import Optional
import re
from pydantic import BaseModel, ValidationError, validator
# Create your models here.

class User(BaseModel):
    _id:Optional[str]
    username:str
    password:str
    email:str
    first_name:Optional[str]
    last_name:Optional[str]
    @validator('username')
    def username_alphanumeric(cls, v):
        assert v.isalnum(), 'must be alphanumeric'
        assert len(v)>3, 'too short username'
        return v
    @validator('password')
    def password_strength(cls, v):
        assert len(v)>8,'too short password'
    def email_validator(cls, v):
        assert re.match(r"[^@]+@[^@]+\.[^@]+",v),'invalid email address'

In [5]:
external_data = {'_id': 'shaisd78w7e8qw7e', 'username': 'saad','kkskd':'asdasd' ,'password': 'hhhhhhhhhhhhhhhhhhhhhhhhhh','email':'saad@gmail.com'}
user = User(**external_data)

In [1]:
from mongoengine import connect,disconnect

In [256]:
from datetime import datetime
from mongoengine import *
from models import *


In [86]:
connect('rpb_d')

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary(), uuidrepresentation=3)

In [1]:
from bs4 import BeautifulSoup
from libgen_api import LibgenSearch
import requests
s = LibgenSearch()
results = s.search_title("Prince of Persia")[:5]
print(results)

[{'ID': '737115', 'Author': 'Catherine Hapka,Doug Miro,Boaz Yakin,Jordan Mechner,Carlo Bernard', 'Title': 'Prince of Persia: The Chronicle of Young Dastan', 'Publisher': 'Disney Press', 'Year': '2010', 'Pages': '', 'Language': 'English', 'Size': '567 Kb', 'Extension': 'epub', 'Mirror_1': 'http://library.lol/main/3A6467A38D3622DE2C90E32A71C71D57', 'Mirror_2': 'https://cdn1.booksdl.org/ads.php?md5=3A6467A38D3622DE2C90E32A71C71D57', 'Mirror_3': 'https://3lib.net/md5/3A6467A38D3622DE2C90E32A71C71D57', 'Mirror_4': 'https://library.bz/main/edit/3A6467A38D3622DE2C90E32A71C71D57'}, {'ID': '783371', 'Author': 'James Ponti', 'Title': 'Prince of Persia: The Sands of Time', 'Publisher': 'Disney Press', 'Year': '2010', 'Pages': '', 'Language': 'English', 'Size': '645 Kb', 'Extension': 'epub', 'Mirror_1': 'http://library.lol/main/F4875CE62F07B1E779118F2C10AAF6CA', 'Mirror_2': 'https://cdn1.booksdl.org/ads.php?md5=F4875CE62F07B1E779118F2C10AAF6CA', 'Mirror_3': 'https://3lib.net/md5/F4875CE62F07B1E779

In [220]:
Book.objects.all().sort(key=lambda x: x.stats)

AttributeError: 'QuerySet' object has no attribute 'sort'

In [231]:
users=User.objects.all()

In [230]:
print([u.to_mongo().to_dict() for u in User.objects.all()])

[{'_id': ObjectId('625a694dc38f7c7c75b3f0bc'), 'username': 'saad', 'password': 'passwordone', 'email': 'emailfifty@gmail.com', 'birthday': datetime.datetime(2022, 4, 27, 17, 13, 18, 860000), 'session': {'session_id': 'bf28548a0c60f54b75017614eb4e1dc8ccad46a34318ff313e9e183ac3e906d0', 'expires': datetime.datetime(2022, 4, 28, 17, 12, 43, 216000)}, 'ratings': [{'ID': ObjectId('62699f7f0dc5f49e8fc1b548'), 'rating': 2.0}, {'ID': ObjectId('62699fd30dc5f49e8fc1b549'), 'rating': 3.0}, {'ID': ObjectId('6269a0250dc5f49e8fc1b54a'), 'rating': 3.0}, {'ID': ObjectId('6269a0550dc5f49e8fc1b54b'), 'rating': 4.0}], 'genres': []}, {'_id': ObjectId('625a6d6fd1144018b9ac3e3b'), 'username': 'works', 'password': 'passwordworks', 'email': 'ilfifty@gmail.com', 'birthday': datetime.datetime(2000, 11, 23, 0, 0), 'session': {'session_id': '2b2a826bb1aee9d3b32604cb114d274fd4ea144c66fdb0748026fd6bc573b0dd', 'expires': datetime.datetime(2022, 4, 28, 18, 34, 6, 508000)}, 'ratings': [{'ID': ObjectId('6269a9660dc5f49e

In [245]:
interactions_pipeline=[{"$unwind":"$ratings"},{"$group":{"_id" : "$ratings.ID","count":{"$sum":1}}}, { "$project": { "_id": 1, "count":1} }]
list(User.objects.aggregate(interactions_pipeline))

[{'_id': ObjectId('6269a9cf0dc5f49e8fc1b54e'), 'count': 2},
 {'_id': ObjectId('6269a0250dc5f49e8fc1b54a'), 'count': 1},
 {'_id': ObjectId('6269a9660dc5f49e8fc1b54c'), 'count': 1},
 {'_id': ObjectId('6269a9a40dc5f49e8fc1b54d'), 'count': 1},
 {'_id': ObjectId('62699f7f0dc5f49e8fc1b548'), 'count': 1},
 {'_id': ObjectId('6269a0550dc5f49e8fc1b54b'), 'count': 1},
 {'_id': ObjectId('62699fd30dc5f49e8fc1b549'), 'count': 1}]

In [257]:
books_id=[book["_id"] for book in list(User.objects.aggregate(interactions_pipeline))]
Book.objects.filter(__raw__={"_id":{"$in":books_id}})

FieldDoesNotExist: The fields "{'image'}" do not exist on the document "Book"

In [210]:
from lightfm import LightFM
from lightfm.data import Dataset

data=Dataset()
model=LightFM()

books_c=Book.objects.aggregate([{"$project":{"_id":1}}])
users_c=User.objects.aggregate([{"$project":{"_id":1}}])
books=[str(b['_id']) for b in list(books_c)]
users=[str(u['_id']) for u in list(users_c)]
print("users:",users)
print("books:",books)
data.fit(users,books)
interactions_pipeline=[{"$unwind":"$ratings"},{"$project":{"ratings.ID" : 1,"ratings.rating" : 1}}]
interactions_c=User.objects.aggregate(interactions_pipeline)
interactions=[]
for elm in list(interactions_c):
    temp=[]
    temp.append(str(list(elm.values())[0]))
    temp.append(str(list(list(elm.values())[1].values())[0]))
    temp.append(float(list(list(elm.values())[1].values())[1]))    
    interactions.append(temp)
mappings=Mapping.objects.first()
(interactions,weights)=data.build_interactions(interactions)
mappings.user_mapping,r,mappings.book_mapping,q,=data.mapping()
mappings.save()
model.fit(interactions,sample_weight=weights)


users: ['625a694dc38f7c7c75b3f0bc', '625a6d6fd1144018b9ac3e3b', '62629d6179e50a4eff48b1db', '626824ce9f4cdb6afb787b7b']
books: ['62699f7f0dc5f49e8fc1b548', '62699fd30dc5f49e8fc1b549', '6269a0250dc5f49e8fc1b54a', '6269a0550dc5f49e8fc1b54b', '6269a9660dc5f49e8fc1b54c', '6269a9a40dc5f49e8fc1b54d', '6269a9cf0dc5f49e8fc1b54e']


<lightfm.lightfm.LightFM at 0x1fdd6711f70>

In [214]:
mappings=Mapping.objects.first()
user_mappings=mappings.user_mapping
book_mappings=mappings.book_mapping
model.predict(3,list(book_mappings.values()))

array([0.0522299 , 0.07234416, 0.0778123 , 0.09572192, 0.10269453,
       0.04464534, 0.07140563], dtype=float32)

In [212]:
mappings.to_json()

'{"_id": {"$oid": "6269aa453d3b7e43f89b64ae"}, "user_mapping": {"625a694dc38f7c7c75b3f0bc": 0, "625a6d6fd1144018b9ac3e3b": 1, "62629d6179e50a4eff48b1db": 2, "626824ce9f4cdb6afb787b7b": 3}, "book_mapping": {"62699f7f0dc5f49e8fc1b548": 0, "62699fd30dc5f49e8fc1b549": 1, "6269a0250dc5f49e8fc1b54a": 2, "6269a0550dc5f49e8fc1b54b": 3, "6269a9660dc5f49e8fc1b54c": 4, "6269a9a40dc5f49e8fc1b54d": 5, "6269a9cf0dc5f49e8fc1b54e": 6}}'

In [13]:
import pandas as pd

In [14]:
file=pd.read_csv(r"C:\Users\saade\Downloads\archive\main_dataset.csv")

In [15]:
filej=pd.read_json('books.json',lines=True)

In [6]:
from lightfm import LightFM
from lightfm.data import Dataset

3 things I want to build for thi

In [7]:
filej.info()

NameError: name 'Book' is not defined

In [16]:
filej['isbn']=filej['isbn'].astype(str)
file['isbn']=file['isbn'].astype(str)

In [17]:
merge=pd.merge(file,filej,how='inner',on='isbn')

In [22]:
merge

Unnamed: 0,image,name,author,format,book_depository_stars,price,currency,old_price,isbn,category,...,_id,title,pageCount,publishedDate,thumbnailUrl,shortDescription,longDescription,status,authors,categories


In [28]:
file.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32581 entries, 0 to 32580
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   image                  32581 non-null  object 
 1   name                   32581 non-null  object 
 2   author                 32383 non-null  object 
 3   format                 32548 non-null  object 
 4   book_depository_stars  32581 non-null  float64
 5   price                  32581 non-null  object 
 6   currency               32581 non-null  object 
 7   old_price              27467 non-null  float64
 8   isbn                   32581 non-null  object 
 9   category               32581 non-null  object 
 10  img_paths              32581 non-null  object 
dtypes: float64(2), object(9)
memory usage: 2.7+ MB


In [5]:
from models import *

In [6]:
connect(db='testing')

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary(), uuidrepresentation=3)

In [18]:
import urllib.request
from PIL import Image

def InsertBook(r,):
    try:
        book=Book(title=r[1],author=r[2],isbn=r[8],genre=r[9])
        try:
            with open(r[10],'rb') as fd:
                book.cover.put(fd,content_type="image/jpg")
        except IOError:
            urllib.request.urlretrieve(r[10], 'temp.jpg')
            with open('temp.jpg','wb') as fd:
                book.cover.put(fd,content_type="image/jpg")
        book.save()
        InsertBook.counter+=1
        print ('rows inserted:',InsertBook.counter, end="\r")
    except Exception as e:
        print('not inserted:',e,end="\r")
InsertBook.counter=0

In [19]:
file.apply(InsertBook,axis=1)

rows inserted: 32383tionError (Book:None) (StringField only accepts string values: ['author'])

0        None
1        None
2        None
3        None
4        None
         ... 
32576    None
32577    None
32578    None
32579    None
32580    None
Length: 32581, dtype: object

In [49]:
book=Book(title='fsdfs',author='sdasd',genre='sdasdsa',isbn='sdas')

FieldDoesNotExist: The fields "{'isbn'}" do not exist on the document "Book"