In [8]:
# Komórka 1 - Imports and setup
import sys
import os

# Dodaj ścieżkę do katalogu src
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)

from src.scraper.letterboxd_scraper import LetterboxdScraper

In [9]:
# Komórka 2 - Test scraping
# Create scraper instance
scraper = LetterboxdScraper()

# Define test username
username = 'paesielawa'  # przykładowy użytkownik

# Get ratings
print(f"Fetching ratings for user: {username}")
df = scraper.get_user_ratings(username)

# Display results
print("\nFirst 5 rows of scraped data:")
print(df.head())
print("\nRating distribution:")
print(df['rating'].value_counts().sort_index())
print("\nTotal movies scraped:", len(df))

Fetching ratings for user: paesielawa
Page 1: Found 72 films
Page 2: Found 72 films
Page 3: Found 72 films
Page 4: Found 72 films
Page 5: Found 72 films
Page 6: Found 72 films
Page 7: Found 72 films
Page 8: Found 72 films
Page 9: Found 20 films

First 5 rows of scraped data:
                                     movie_id  \
0                                sinners-2025   
1                            the-amateur-2025   
2                                   mickey-17   
3           secret-level-playtime-fulfillment   
4  secret-level-honor-of-kings-the-way-of-all   

                                         movie_title  rating  date  
0                                            Sinners     4.5  None  
1                                        The Amateur     2.5  None  
2                                          Mickey 17     3.5  None  
3               Secret Level - Playtime: Fulfillment     2.5  None  
4  Secret Level - Honor of Kings: The Way of All ...     3.5  None  

Rating distrib

In [11]:
# Komórka 3 - Multiple users scraping
import pandas as pd
from datetime import datetime

# Lista użytkowników do scrapowania
usernames = [
    'paesielawa',
    'kabarecik',  # znany krytyk filmowy
]

# Utworzenie folderu na dane jeśli nie istnieje
output_dir = os.path.join(parent_dir, 'data', 'user_ratings')
os.makedirs(output_dir, exist_ok=True)

# Scrapowanie dla każdego użytkownika
for username in usernames:
    try:
        print(f"\nProcessing user: {username}")
        
        # Nazwa pliku z datą pobrania
        timestamp = datetime.now().strftime('%Y%m%d')
        output_file = os.path.join(output_dir, f'{username}_ratings_{timestamp}.csv')
        
        # Pobranie ocen
        df = scraper.get_user_ratings(username)
        
        # Zapisanie do CSV
        df.to_csv(output_file, index=False)
        
        print(f"Saved {len(df)} ratings for {username}")
        print(f"File saved as: {output_file}")
        
        # Podstawowe statystyki
        print(f"Rating distribution for {username}:")
        print(df['rating'].value_counts().sort_index())
        
    except Exception as e:
        print(f"Error processing user {username}: {e}")
        continue

# Podsumowanie
print("\nScraping completed!")
print(f"Data saved in: {output_dir}")


Processing user: paesielawa
Page 1: Found 72 films
Page 2: Found 72 films
Page 3: Found 72 films
Page 4: Found 72 films
Page 5: Found 72 films
Page 6: Found 72 films
Page 7: Found 72 films
Page 8: Found 72 films
Page 9: Found 20 films
Saved 596 ratings for paesielawa
File saved as: d:\Workspace\projekty\Movie recomendation model\movie-recommendation-system\data\user_ratings\paesielawa_ratings_20250512.csv
Rating distribution for paesielawa:
rating
0.5      7
1.0      8
1.5      9
2.0     36
2.5     98
3.0    148
3.5    105
4.0    115
4.5     42
5.0     20
Name: count, dtype: int64

Processing user: kabarecik
Page 1: Found 72 films
Page 2: Found 72 films
Page 3: Found 72 films
Page 4: Found 72 films
Page 5: Found 72 films
Page 6: Found 72 films
Page 7: Found 72 films
Page 8: Found 72 films
Page 9: Found 72 films
Page 10: Found 72 films
Page 11: Found 72 films
Page 12: Found 72 films
Page 13: Found 72 films
Page 14: Found 72 films
Page 15: Found 72 films
Page 16: Found 72 films
Page 17: