In [55]:
import pandas as pd

def load_movielens_data(base_path: str) -> Tuple[Dict, Dict, Dict]:
    df_users = pd.read_csv(f'{base_path}/u.user', sep='|', encoding='latin-1',
                          names=['user_id', 'age', 'gender', 'occupation', 'zipcode'])
    df_users['stt'] = df_users['user_id']
    
    df_movies = pd.read_csv(f'{base_path}/u.item', sep='|', encoding='latin-1',
                           names=['movie_id', 'title', 'release_date', 'video_release_date', 
                                 'IMDb_URL'] + [f'genre_{i}' for i in range(19)])
    df_movies['stt'] = df_movies['movie_id']
    
    df_ratings = pd.read_csv(f'{base_path}/u.data', sep='\t', encoding='latin-1',
                            names=['user_id', 'movie_id', 'rating', 'timestamp'])
    
    users = df_users.set_index('stt').to_dict(orient='index')
    movies = df_movies.set_index('stt').to_dict(orient='index')
    ratings = {(row['user_id'], row['movie_id']): row['rating'] 
              for _, row in df_ratings.iterrows()}
    
    return users, movies, ratings

users, movies, ratings = load_movielens_data('ml-100k')
print(f"Số lượng users: {len(users)}")
print(f"Số lượng movies: {len(movies)}")
print(f"Số lượng ratings: {len(ratings)}")

Số lượng users: 943
Số lượng movies: 1682
Số lượng ratings: 100000


In [56]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
from typing import Dict, List, Tuple
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

class MovieFeatureExtractor:
    """Extract and process movie features"""
    def __init__(self, movies_df: pd.DataFrame):
        self.movies_df = movies_df
        self.genre_columns = self._extract_genres()
        self.scaler = StandardScaler()
        self.features = self._build_features()
        
    def _extract_genres(self) -> List[str]:
        """Extract unique genres from movies dataset"""
        all_genres = set()
        for genres in self.movies_df['genres'].str.split('|'):
            if isinstance(genres, list):
                all_genres.update(genres)
        return sorted(list(all_genres))
    
    def _build_features(self) -> np.ndarray:
        """Build movie feature matrix"""
        # Create genre binary features
        genre_features = np.zeros((len(self.movies_df), len(self.genre_columns)))
        
        for i, genres in enumerate(self.movies_df['genres'].str.split('|')):
            if isinstance(genres, list):
                for genre in genres:
                    genre_idx = self.genre_columns.index(genre)
                    genre_features[i, genre_idx] = 1
                    
        # Combine with other numerical features if available
        numerical_features = []
        if 'vote_average' in self.movies_df.columns:
            numerical_features.append(self.movies_df['vote_average'].fillna(0))
        if 'runtime' in self.movies_df.columns:
            numerical_features.append(self.movies_df['runtime'].fillna(0))
            
        if numerical_features:
            numerical_features = np.column_stack(numerical_features)
            numerical_features = self.scaler.fit_transform(numerical_features)
            features = np.hstack([genre_features, numerical_features])
        else:
            features = genre_features
            
        return features
    
    def get_movie_features(self, movie_id: int) -> np.ndarray:
        """Get features for a specific movie"""
        movie_idx = self.movies_df[self.movies_df['movie_id'] == movie_id].index[0]
        return self.features[movie_idx]

class UserFeatureExtractor:
    """Extract and process user features"""
    def __init__(self, users_df: pd.DataFrame, ratings_df: pd.DataFrame):
        self.users_df = users_df
        self.ratings_df = ratings_df
        self.scaler = StandardScaler()
        self.features = self._build_features()
        
    def _build_features(self) -> np.ndarray:
        """Build user feature matrix"""
        # Calculate rating statistics for each user
        user_stats = self.ratings_df.groupby('user_id').agg({
            'rating': ['count', 'mean', 'std']
        }).fillna(0)
        user_stats.columns = ['rating_count', 'rating_mean', 'rating_std']
        
        # Normalize numerical features
        numerical_features = self.scaler.fit_transform(user_stats)
        
        # Create demographic features if available
        demographic_features = []
        if 'age' in self.users_df.columns:
            age_scaled = self.scaler.fit_transform(self.users_df[['age']])
            demographic_features.append(age_scaled)
            
        if 'occupation' in self.users_df.columns:
            occupation_dummies = pd.get_dummies(self.users_df['occupation'])
            demographic_features.append(occupation_dummies.values)
            
        if demographic_features:
            features = np.hstack([numerical_features] + demographic_features)
        else:
            features = numerical_features
            
        return features
    
    def get_user_features(self, user_id: int) -> np.ndarray:
        """Get features for a specific user"""
        user_idx = self.users_df[self.users_df['user_id'] == user_id].index[0]
        return self.features[user_idx]



In [57]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
from typing import Dict, List, Tuple, Optional
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

class MovieFeatureExtractor:
    """Extract and process movie features"""
    def __init__(self, movies: Dict):
        self.movies = movies
        self.genre_columns = [f'genre_{i}' for i in range(19)]
        self.scaler = StandardScaler()
        self.features = self._build_features()
        
    def _build_features(self) -> np.ndarray:
        """Build movie feature matrix"""
        n_movies = len(self.movies)
        n_features = len(self.genre_columns)
        features = np.zeros((n_movies, n_features))
        
        for movie_id, movie_data in self.movies.items():
            for i, genre_col in enumerate(self.genre_columns):
                features[movie_id-1, i] = movie_data[genre_col]
                    
        return features
    
    def get_movie_features(self, movie_id: int) -> np.ndarray:
        """Get features for a specific movie"""
        return self.features[movie_id-1]

class UserFeatureExtractor:
    """Extract and process user features"""
    def __init__(self, users: Dict, ratings: Dict):
        self.users = users
        self.ratings = ratings
        self.scaler = StandardScaler()
        self.features = self._build_features()
        
    def _build_features(self) -> np.ndarray:
        """Build user feature matrix"""
        # Calculate rating statistics for each user
        user_stats = self._calculate_user_stats()
        
        # Create demographic features
        demographic_features = self._create_demographic_features()
        
        # Combine features
        features = np.hstack([user_stats, demographic_features])
        return self.scaler.fit_transform(features)
    
    def _calculate_user_stats(self) -> np.ndarray:
        """Calculate rating statistics for each user"""
        user_stats = {}
        for user_id in self.users.keys():
            user_ratings = [rating for (u_id, _), rating in self.ratings.items() if u_id == user_id]
            user_stats[user_id] = {
                'rating_count': len(user_ratings),
                'rating_mean': np.mean(user_ratings) if user_ratings else 0,
                'rating_std': np.std(user_ratings) if user_ratings else 0
            }
        
        stats_array = np.zeros((len(self.users), 3))
        for user_id, stats in user_stats.items():
            stats_array[user_id-1] = [stats['rating_count'], stats['rating_mean'], stats['rating_std']]
        return stats_array
    
    def _create_demographic_features(self) -> np.ndarray:
        """Create demographic features from user data"""
        n_users = len(self.users)
        
        # Age feature
        age_feature = np.zeros((n_users, 1))
        for user_id, user_data in self.users.items():
            age_feature[user_id-1] = user_data['age']
            
        # Gender feature (binary)
        gender_feature = np.zeros((n_users, 1))
        for user_id, user_data in self.users.items():
            gender_feature[user_id-1] = 1 if user_data['gender'] == 'M' else 0
            
        # Occupation one-hot encoding
        occupations = sorted(set(user['occupation'] for user in self.users.values()))
        occupation_features = np.zeros((n_users, len(occupations)))
        for user_id, user_data in self.users.items():
            occupation_idx = occupations.index(user_data['occupation'])
            occupation_features[user_id-1, occupation_idx] = 1
            
        return np.hstack([age_feature, gender_feature, occupation_features])
    
    def get_user_features(self, user_id: int) -> np.ndarray:
        """Get features for a specific user"""
        return self.features[user_id-1]

class MovieRecommenderEnv:
    """Environment for movie recommendation"""
    def __init__(self, users: Dict, movies: Dict, ratings: Dict, max_steps: int = 100):
        self.users = users
        self.movies = movies
        self.ratings = ratings
        self.max_steps = max_steps
        
        self.user_history = self._build_user_history()
        self.current_user = None
        self.current_movie = None
        self.steps = 0
        
    def _build_user_history(self) -> Dict[int, List[int]]:
        """Build dictionary of user rating history"""
        history = {}
        for (user_id, movie_id) in self.ratings.keys():
            if user_id not in history:
                history[user_id] = []
            history[user_id].append(movie_id)
        return history
    
    def reset(self) -> Tuple[int, int]:
        """Reset environment and return initial state"""
        self.steps = 0
        self.current_user = random.choice(list(self.users.keys()))
        available_movies = list(set(self.movies.keys()) - set(self.user_history.get(self.current_user, [])))
        self.current_movie = random.choice(available_movies) if available_movies else random.choice(list(self.movies.keys()))
        return self.current_user, self.current_movie
    
    def step(self, action: int) -> Tuple[Tuple[int, int], float]:
        """Take action and return next state and reward"""
        self.steps += 1
        
        # Calculate reward based on prediction accuracy
        true_rating = self.ratings.get((self.current_user, self.current_movie), None)
        if true_rating is not None:
            reward = -abs(true_rating - action)  # Negative absolute error as reward
        else:
            reward = -2.5  # Penalty for recommending already rated movie
        
        # Get next state
        if self.steps >= self.max_steps:
            return (self.current_user, self.current_movie), reward
            
        available_movies = list(set(self.movies.keys()) - set(self.user_history.get(self.current_user, [])))
        self.current_movie = random.choice(available_movies) if available_movies else random.choice(list(self.movies.keys()))
        
        return (self.current_user, self.current_movie), reward
    
    def evaluate(self) -> float:
        """Calculate RMSE on test set"""
        predictions = []
        true_ratings = []
        
        for (user_id, movie_id), rating in random.sample(list(self.ratings.items()), min(1000, len(self.ratings))):
            state = (user_id, movie_id)
            predicted_rating = self.predict(state)
            predictions.append(predicted_rating)
            true_ratings.append(rating)
            
        return np.sqrt(mean_squared_error(true_ratings, predictions))
    
    def predict(self, state: Tuple[int, int]) -> float:
        """Predict rating for a given state"""
        # This method will be implemented by the recommender system
        return 3.0  # Default prediction


class DQN(nn.Module):
    """Deep Q-Network architecture"""
    def __init__(self, input_dim: int, hidden_dims: List[int], output_dim: int):
        super(DQN, self).__init__()
        
        layers = []
        prev_dim = input_dim
        
        for hidden_dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.2)
            ])
            prev_dim = hidden_dim
            
        layers.append(nn.Linear(prev_dim, output_dim))
        
        self.network = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.network(x)

class ReplayBuffer:
    """Experience replay buffer"""
    def __init__(self, capacity: int):
        self.buffer = deque(maxlen=capacity)
        
    def push(self, state, action, reward, next_state):
        self.buffer.append((state, action, reward, next_state))
        
    def sample(self, batch_size: int) -> List:
        return random.sample(self.buffer, min(batch_size, len(self.buffer)))
    
    def __len__(self) -> int:
        return len(self.buffer)

class DeepQRecommender:
    """Deep Q-Learning based movie recommender"""
    def __init__(self, env,
                 movie_features: MovieFeatureExtractor,
                 user_features: UserFeatureExtractor,
                 hidden_dims: List[int] = [128, 64],
                 learning_rate: float = 0.001,
                 gamma: float = 0.95,
                 epsilon_start: float = 1.0,
                 epsilon_end: float = 0.01,
                 epsilon_decay: float = 0.995,
                 buffer_size: int = 10000,
                 batch_size: int = 32,
                 target_update: int = 10):
        self.env = env
        self.movie_features = movie_features
        self.user_features = user_features
        self.possible_ratings = [1, 2, 3, 4, 5]
        
        # Calculate input dimension
        state_dim = (len(self.movie_features.get_movie_features(0)) + 
                    len(self.user_features.get_user_features(0)))
        
        # Initialize networks
        self.policy_net = DQN(state_dim, hidden_dims, len(self.possible_ratings))
        self.target_net = DQN(state_dim, hidden_dims, len(self.possible_ratings))
        self.target_net.load_state_dict(self.policy_net.state_dict())
        
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
        self.memory = ReplayBuffer(buffer_size)
        
        # Training parameters
        self.gamma = gamma
        self.epsilon = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = epsilon_decay
        self.batch_size = batch_size
        self.target_update = target_update
        self.steps_done = 0
        
    def get_state(self, user_id: int, movie_id: int) -> torch.Tensor:
        """Combine user and movie features into state"""
        movie_feat = self.movie_features.get_movie_features(movie_id)
        user_feat = self.user_features.get_user_features(user_id)
        state = np.concatenate([movie_feat, user_feat])
        return torch.FloatTensor(state)
    
    def select_action(self, state: torch.Tensor) -> int:
        """Select action using epsilon-greedy policy"""
        if random.random() > self.epsilon:
            with torch.no_grad():
                q_values = self.policy_net(state)
                return self.possible_ratings[q_values.argmax().item()]
        else:
            return random.choice(self.possible_ratings)
        
    def optimize_model(self):
        """Train the model using experience replay"""
        if len(self.memory) < self.batch_size:
            return
        
        transitions = self.memory.sample(self.batch_size)
        batch = list(zip(*transitions))
        
        state_batch = torch.stack(batch[0])
        action_batch = torch.tensor(batch[1])
        reward_batch = torch.tensor(batch[2])
        next_state_batch = torch.stack(batch[3])
        
        # Compute current Q values
        current_q_values = self.policy_net(state_batch).gather(1, 
            torch.tensor([[self.possible_ratings.index(a)] for a in action_batch]))
        
        # Compute next Q values
        next_q_values = self.target_net(next_state_batch).max(1)[0].detach()
        expected_q_values = reward_batch + self.gamma * next_q_values
        
        # Compute loss and optimize
        loss = nn.MSELoss()(current_q_values, expected_q_values.unsqueeze(1))
        
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        # Update epsilon
        self.epsilon = max(self.epsilon_end, self.epsilon * self.epsilon_decay)
        
    def update_target_network(self):
        """Update target network weights"""
        if self.steps_done % self.target_update == 0:
            self.target_net.load_state_dict(self.policy_net.state_dict())
            
    def train(self, num_episodes: int):
        """Train the model"""
        rewards_history = []
        rmse_history = []
        
        for episode in range(num_episodes):
            state = self.env.reset()
            episode_rewards = []
            
            while True:
                # Convert state to tensor
                state_tensor = self.get_state(state[0], state[1])
                
                # Select and perform action
                action = self.select_action(state_tensor)
                next_state, reward = self.env.step(action)
                
                # Store transition
                next_state_tensor = self.get_state(next_state[0], next_state[1])
                self.memory.push(state_tensor, action, reward, next_state_tensor)
                
                # Update state
                state = next_state
                episode_rewards.append(reward)
                
                # Optimize model
                self.optimize_model()
                self.steps_done += 1
                
                # Update target network
                self.update_target_network()
                
                if len(episode_rewards) >= self.env.max_steps:
                    break
            
            # Calculate metrics
            avg_reward = np.mean(episode_rewards)
            rmse = self.env.evaluate()
            
            rewards_history.append(avg_reward)
            rmse_history.append(rmse)
            
            if episode % 100 == 0:
                print(f"Episode {episode}")
                print(f"Average Reward: {avg_reward:.4f}")
                print(f"RMSE: {rmse:.4f}")
                print(f"Epsilon: {self.epsilon:.4f}")
                print("------------------------")
                
        return rewards_history, rmse_history
    
    def recommend_movies(self, user_id: int, top_k: int = 5):
        """Recommend top-k movies for a user"""
        user_rated_movies = set(self.env.user_history.get(user_id, []))
        all_movies = set(self.env.movies.keys())
        unwatched_movies = list(all_movies - user_rated_movies)
        
        predictions = []
        for movie_id in unwatched_movies:
            state = self.get_state(user_id, movie_id)
            with torch.no_grad():
                q_values = self.policy_net(state)
                predicted_rating = self.possible_ratings[q_values.argmax().item()]
                confidence = torch.softmax(q_values, dim=0).max().item()
                predictions.append((movie_id, predicted_rating, confidence))
        
        # Sort by predicted rating and confidence
        predictions.sort(key=lambda x: (x[1], x[2]), reverse=True)
        
        top_recommendations = []
        for movie_id, pred_rating, conf in predictions[:top_k]:
            movie_info = self.env.movies[movie_id]
            top_recommendations.append({
                'movie_id': movie_id,
                'title': movie_info['title'],
                'predicted_rating': pred_rating,
                'confidence': conf
            })
            
        return top_recommendations
    
    

In [58]:
# Load data
users, movies, ratings = load_movielens_data('ml-100k')

# Initialize feature extractors
movie_features = MovieFeatureExtractor(movies)
user_features = UserFeatureExtractor(users, ratings)

# Initialize environment
env = MovieRecommenderEnv(users, movies, ratings)

# Initialize recommender
recommender = DeepQRecommender(
    env,
    movie_features=movie_features,
    user_features=user_features,
    hidden_dims=[128, 64],
    learning_rate=0.001,
    gamma=0.95,
    epsilon_start=1.0,
    epsilon_end=0.01,
    epsilon_decay=0.995,
    buffer_size=10000,
    batch_size=32,
    target_update=10
)

# Train model
rewards_history, rmse_history = recommender.train(num_episodes=100)

Episode 0
Average Reward: -2.5000
RMSE: 1.2377
Epsilon: 0.7076
------------------------


In [61]:
# Get recommendations for a user
user_id = 1
recommendations = recommender.recommend_movies(user_id, 2000)

print("\nTop 5 movie recommendations for user", user_id)
for i, rec in enumerate(recommendations, 1):
    print(f"{i}. Movie: {rec['title']}")
    print(f"Predicted Rating: {rec['predicted_rating']:.1f}")
    print(f"Confidence: {rec['confidence']:.2f}")
    print("------------------------")


Top 5 movie recommendations for user 1
1. Movie: Fear, The (1995)
Predicted Rating: 5.0
Confidence: 0.34
------------------------
2. Movie: Once Upon a Time... When We Were Colored (1995)
Predicted Rating: 5.0
Confidence: 0.33
------------------------
3. Movie: Hideaway (1995)
Predicted Rating: 5.0
Confidence: 0.33
------------------------
4. Movie: Jaws 3-D (1983)
Predicted Rating: 5.0
Confidence: 0.32
------------------------
5. Movie: Bride of Frankenstein (1935)
Predicted Rating: 5.0
Confidence: 0.32
------------------------
6. Movie: Entertaining Angels: The Dorothy Day Story (1996)
Predicted Rating: 5.0
Confidence: 0.32
------------------------
7. Movie: Deceiver (1997)
Predicted Rating: 5.0
Confidence: 0.32
------------------------
8. Movie: Object of My Affection, The (1998)
Predicted Rating: 5.0
Confidence: 0.31
------------------------
9. Movie: Gate of Heavenly Peace, The (1995)
Predicted Rating: 5.0
Confidence: 0.31
------------------------
10. Movie: Thirty-Two Short Film