In [16]:
import requests
import pandas as pd
import logging
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
fetch_data_url = os.getenv('FETCH_DATA_URL')

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class DataFetcher:
    @staticmethod
    def get_all_books():
        try:
            response = requests.get(f'{fetch_data_url}/book/all-books')
            response.raise_for_status()
            raw_data = response.json()
            logger.debug("Raw books data: %s", raw_data)
            data = pd.DataFrame(raw_data['books'])  # Extract 'books' key
            logger.info("Fetched books data: %s rows, columns: %s", data.shape[0], list(data.columns))
            logger.debug("Books data sample:\n%s", data.head().to_string())
            return data
        except requests.RequestException as e:
            logger.error("Failed to fetch books: %s", str(e))
            raise Exception(f"Failed to fetch books: {str(e)}")
        except (KeyError, TypeError) as e:
            logger.error("Invalid books data format: %s", str(e))
            raise Exception(f"Invalid books data format: {str(e)}")

    @staticmethod
    def get_all_users():
        try:
            response = requests.get(f'{fetch_data_url}/user/all-users')
            response.raise_for_status()
            raw_data = response.json()
            logger.debug("Raw users data: %s", raw_data)
            data = pd.DataFrame(raw_data['users'])  # Extract 'users' key
            if not all(col in data.columns for col in ['id', 'gender']):
                logger.error("Invalid users data: missing 'id' or 'gender'. Available columns: %s", list(data.columns))
                raise ValueError("Users data must contain 'id' and 'gender'")
            data = data[['id', 'gender']]
            logger.info("Fetched users data: %s rows, columns: %s", data.shape[0], list(data.columns))
            logger.debug("Users data sample:\n%s", data.head().to_string())
            return data
        except requests.RequestException as e:
            logger.error("Failed to fetch users: %s", str(e))
            raise Exception(f"Failed to fetch users: {str(e)}")
        except (KeyError, ValueError) as e:
            logger.error("Data validation error: %s", str(e))
            raise Exception(f"Data validation error: {str(e)}")

    @staticmethod
    def get_book_genres():
        try:
            response = requests.get(f'{fetch_data_url}/book/all-book-genres')
            response.raise_for_status()
            raw_data = response.json()
            logger.debug("Raw book genres data: %s", raw_data)
            data = pd.DataFrame(raw_data['book_genres'])  # Extract 'book_genres' key
            if not all(col in data.columns for col in ['book_id', 'genre_id']):
                logger.error("Invalid book genres data: missing 'book_id' or 'genre_id'. Available columns: %s", list(data.columns))
                raise ValueError("Book genres data must contain 'book_id' and 'genre_id'")
            logger.info("Fetched book genres data: %s rows, unique books: %s, unique genres: %s", 
                        data.shape[0], data['book_id'].nunique(), data['genre_id'].nunique())
            logger.debug("Book genres data sample:\n%s", data.head().to_string())
            return data
        except requests.RequestException as e:
            logger.error("Failed to fetch book genres: %s", str(e))
            raise Exception(f"Failed to fetch book genres: {str(e)}")
        except (KeyError, ValueError) as e:
            logger.error("Data validation error: %s", str(e))
            raise Exception(f"Data validation error: {str(e)}")

    @staticmethod
    def get_user_preferred_genres():
        try:
            response = requests.get(f'{fetch_data_url}/user/user-preferences')
            response.raise_for_status()
            raw_data = response.json()
            logger.debug("Raw user preferences data: %s", raw_data)
            data = pd.DataFrame(raw_data['user_preferred_genres'])  # Extract 'user_preferred_genres' key
            if not all(col in data.columns for col in ['user_id', 'genre_id']):
                logger.error("Invalid user preferences data: missing 'user_id' or 'genre_id'. Available columns: %s", list(data.columns))
                raise ValueError("User preferences data must contain 'user_id' and 'genre_id'")
            logger.info("Fetched user preferred genres data: %s rows, columns: %s", data.shape[0], list(data.columns))
            logger.debug("User preferred genres data sample:\n%s", data.head().to_string())
            return data
        except requests.RequestException as e:
            logger.error("Failed to fetch user preferred genres: %s", str(e))
            raise Exception(f"Failed to fetch user preferred genres: {str(e)}")
        except (KeyError, ValueError) as e:
            logger.error("Data validation error: %s", str(e))
            raise Exception(f"Data validation error: {str(e)}")

    @staticmethod
    def get_user_item_matrix():
        try:
            response = requests.get(f'{fetch_data_url}/user/user-review')
            response.raise_for_status()
            raw_data = response.json()
            logger.debug("Raw user reviews data: %s", raw_data['reviews'][:5])
            reviews_df = pd.DataFrame(raw_data['reviews'])  # Extract 'reviews' key
            user_item_matrix = reviews_df.pivot(index='user_id', columns='book_id', values='rating').fillna(0)
            logger.info("Fetched user-item matrix: %s users, %s books", user_item_matrix.shape[0], user_item_matrix.shape[1])
            logger.debug("User-item matrix sample:\n%s", user_item_matrix.head().to_string())
            return user_item_matrix
        except requests.RequestException as e:
            logger.error("Failed to fetch user-item matrix: %s", str(e))
            raise Exception(f"Failed to fetch user-item matrix: {str(e)}")
        except (KeyError, TypeError) as e:
            logger.error("Invalid user-item matrix data format: %s", str(e))
            raise Exception(f"Invalid user-item matrix data format: {str(e)}")



In [17]:
fetcher = DataFetcher()

In [18]:
fetcher.get_all_books()

2025-03-14 21:03:25,995 - INFO - Fetched books data: 20 rows, columns: ['id', 'title', 'author', 'description', 'language', 'isbn', 'publisher', 'publish_date', 'cover_image_url', 'num_ratings', 'average_rating', 'created_at', 'updated_at']


Unnamed: 0,id,title,author,description,language,isbn,publisher,publish_date,cover_image_url,num_ratings,average_rating,created_at,updated_at
0,1,The Hunger Games,Suzanne Collins,WINNING MEANS FAME AND FORTUNE.LOSING MEANS CE...,English,9780439023481,Scholastic Press,2008-09-14T00:00:00Z,https://i.gr-assets.com/images/S/compressed.ph...,0,0.0,0001-01-01T00:00:00Z,0001-01-01T00:00:00Z
1,2,Harry Potter and the Order of the Phoenix,"J.K. Rowling, Mary GrandPré (Illustrator)",There is a door at the end of a silent corrido...,English,9780439358071,Scholastic Inc.,2004-09-28T00:00:00Z,https://i.gr-assets.com/images/S/compressed.ph...,1,4.0,0001-01-01T00:00:00Z,0001-01-01T00:00:00Z
2,3,To Kill a Mockingbird,Harper Lee,The unforgettable novel of a childhood in a sl...,English,9999999999999,Harper Perennial Modern Classics,2006-05-23T00:00:00Z,https://i.gr-assets.com/images/S/compressed.ph...,0,0.0,0001-01-01T00:00:00Z,0001-01-01T00:00:00Z
3,5,Twilight,Stephenie Meyer,About three things I was absolutely positive.\...,English,9780316015844,"Little, Brown and Company",2006-09-06T00:00:00Z,https://i.gr-assets.com/images/S/compressed.ph...,0,0.0,0001-01-01T00:00:00Z,0001-01-01T00:00:00Z
4,6,The Book Thief,Markus Zusak (Goodreads Author),Librarian's note: An alternate cover edition c...,English,9780375831003,Alfred A. Knopf,2006-03-14T00:00:00Z,https://i.gr-assets.com/images/S/compressed.ph...,0,0.0,0001-01-01T00:00:00Z,0001-01-01T00:00:00Z
5,7,Animal Farm,"George Orwell, Russell Baker (Preface), C.M. W...",Librarian's note: There is an Alternate Cover ...,English,9780451526342,Signet Classics,1996-04-28T00:00:00Z,https://i.gr-assets.com/images/S/compressed.ph...,0,0.0,0001-01-01T00:00:00Z,0001-01-01T00:00:00Z
6,9,J.R.R. Tolkien 4-Book Boxed Set: The Hobbit an...,J.R.R. Tolkien,"This four-volume, boxed set contains J.R.R. To...",English,9780345538376,Ballantine Books,2012-09-25T00:00:00Z,https://i.gr-assets.com/images/S/compressed.ph...,0,0.0,0001-01-01T00:00:00Z,0001-01-01T00:00:00Z
7,10,Gone with the Wind,Margaret Mitchell,"Scarlett O'Hara, the beautiful, spoiled daught...",English,9780446675536,Warner Books,1999-04-01T00:00:00Z,https://i.gr-assets.com/images/S/compressed.ph...,0,0.0,0001-01-01T00:00:00Z,0001-01-01T00:00:00Z
8,13,The Giving Tree,Shel Silverstein,"""Once there was a tree...and she loved a littl...",English,9780060256654,HarperCollins Publishers,2064-10-07T00:00:00Z,https://i.gr-assets.com/images/S/compressed.ph...,0,0.0,0001-01-01T00:00:00Z,0001-01-01T00:00:00Z
9,14,Wuthering Heights,"Emily Brontë, Richard J. Dunn (Editor), David ...",You can find the redesigned cover of this edit...,English,9780393978896,Norton,2002-10-28T00:00:00Z,https://i.gr-assets.com/images/S/compressed.ph...,0,0.0,0001-01-01T00:00:00Z,0001-01-01T00:00:00Z


In [19]:
fetcher.get_all_users()

2025-03-14 21:03:26,073 - INFO - Fetched users data: 2 rows, columns: ['id', 'gender']


Unnamed: 0,id,gender
0,1,female
1,2,other


In [20]:
fetcher.get_book_genres()

2025-03-14 21:03:26,171 - INFO - Fetched book genres data: 200 rows, unique books: 20, unique genres: 60


Unnamed: 0,book_id,genre_id
0,1,1
1,1,2
2,1,3
3,1,4
4,1,5
...,...,...
195,29,2
196,29,24
197,29,27
198,29,60


In [21]:
fetcher.get_user_preferred_genres()

2025-03-14 21:03:26,252 - INFO - Fetched user preferred genres data: 4 rows, columns: ['user_id', 'genre_id']


Unnamed: 0,user_id,genre_id
0,1,4
1,1,23
2,1,30
3,1,41


In [22]:
fetcher.get_user_item_matrix()

2025-03-14 21:03:26,310 - INFO - Fetched user-item matrix: 1 users, 2 books


book_id,2,24
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,4,5
