In [10]:
# Install required dependencies
!pip install gradio torch sentence-transformers pandas numpy

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
import pickle
import json
from collections import deque
import random
import sqlite3
import gradio as gr
import time

# Define the RL model architecture
class RLRecommender(nn.Module):
    def __init__(self, state_dim: int, action_dim: int, hidden_dim: int = 256):
        super(RLRecommender, self).__init__()

        self.network = nn.Sequential(
            nn.Linear(state_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, action_dim)
        )

    def forward(self, state):
        return self.network(state)

# Generate sample data
class ResearchPaperDataset:
    def __init__(self):
        self.paper_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.papers = {}
        self.paper_embeddings = {}

    def generate_sample_data(self, num_papers=200):
        """Generate realistic sample research paper data"""
        categories = {
            'ML': ['Deep Learning', 'Reinforcement Learning', 'Supervised Learning', 'Unsupervised Learning'],
            'NLP': ['Transformers', 'Language Models', 'Text Generation', 'Sentiment Analysis'],
            'CV': ['Object Detection', 'Image Segmentation', 'GANs', 'Neural Rendering'],
            'RL': ['Q-Learning', 'Policy Gradients', 'Multi-Agent', 'Imitation Learning'],
            'AI': ['Ethics', 'Theory', 'Applications', 'Systems']
        }

        venues = ['NeurIPS', 'ICML', 'ICLR', 'ACL', 'CVPR', 'AAAI', 'IJCAI']
        research_topics = [
            "Efficient Transformers for Long Sequences",
            "Multi-Modal Learning with Vision-Language Models",
            "Robust Deep Learning against Adversarial Attacks",
            "Self-Supervised Learning Paradigms",
            "Federated Learning for Privacy Preservation",
            "Explainable AI for Medical Diagnostics",
            "Reinforcement Learning from Human Feedback",
            "Graph Neural Networks for Social Networks",
            "Quantum Machine Learning Applications",
            "Meta-Learning for Few-Shot Classification",
            "Large Language Model Fine-tuning",
            "Computer Vision for Autonomous Driving",
            "Neural Architecture Search",
            "AI for Scientific Discovery",
            "Multimodal Representation Learning"
        ]

        sample_papers = []
        for i in range(num_papers):
            category = random.choice(list(categories.keys()))
            subcategory = random.choice(categories[category])
            research_topic = random.choice(research_topics)

            paper = {
                'paper_id': f'paper_{i:04d}',
                'title': f'Advancements in {research_topic}',
                'abstract': f'This paper presents novel research in {category} focusing on {subcategory}. We introduce a new methodology that significantly improves upon existing approaches in {research_topic}. Our experiments demonstrate state-of-the-art performance on benchmark datasets with comprehensive ablation studies and real-world applications.',
                'authors': [f'Researcher_{j}' for j in range(1, 2 + i % 4)],
                'categories': [category, subcategory],
                'venue': random.choice(venues),
                'year': 2020 + (i % 4),
                'citations': random.randint(0, 200)
            }
            sample_papers.append(paper)

        return sample_papers

    def create_embeddings(self, papers: list):
        """Create embeddings for all papers"""
        print("Creating paper embeddings...")

        for paper in papers:
            paper_id = paper['paper_id']
            self.papers[paper_id] = paper

            # Create embedding from title and abstract
            text = paper['title'] + " " + paper['abstract']
            embedding = self.paper_model.encode(text)
            self.paper_embeddings[paper_id] = embedding

        print(f"Created embeddings for {len(self.papers)} papers")

# Initialize dataset
print("üìö Initializing research paper dataset...")
dataset = ResearchPaperDataset()
sample_papers = dataset.generate_sample_data(200)
dataset.create_embeddings(sample_papers)

print(f"‚úÖ Dataset ready: {len(dataset.papers)} papers loaded")

# Create a simple trained model
def create_pretrained_model():
    """Create a pre-trained model for demonstration"""
    state_dim = 386  # 384 (embedding) + 2 (metrics)
    action_dim = len(dataset.papers)

    model = RLRecommender(state_dim, action_dim)

    # Simulate some training by setting reasonable weights
    for layer in model.network:
        if hasattr(layer, 'weight'):
            nn.init.xavier_normal_(layer.weight)
            if hasattr(layer, 'bias'):
                nn.init.constant_(layer.bias, 0)

    return model

trained_model = create_pretrained_model()

# Research Recommendation System
class ResearchRecommendationSystem:
    def __init__(self):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.agent = trained_model.to(self.device)
        self.agent.eval()
        self.paper_embeddings = dataset.paper_embeddings
        self.available_papers = list(self.paper_embeddings.keys())
        self.papers = dataset.papers
        self.user_states = {}
        self.setup_database()

    def setup_database(self):
        """Setup SQLite database for user data"""
        self.conn = sqlite3.connect('gradio_recommendation.db', check_same_thread=False)
        cursor = self.conn.cursor()

        cursor.execute('''
            CREATE TABLE IF NOT EXISTS users (
                user_id TEXT PRIMARY KEY,
                interest_embedding BLOB,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        ''')

        cursor.execute('''
            CREATE TABLE IF NOT EXISTS interactions (
                interaction_id INTEGER PRIMARY KEY AUTOINCREMENT,
                user_id TEXT,
                paper_id TEXT,
                action_type TEXT,
                reward REAL,
                timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        ''')

        self.conn.commit()

    def get_or_create_user(self, user_id: str):
        """Get user data or create new user"""
        cursor = self.conn.cursor()

        cursor.execute('SELECT interest_embedding FROM users WHERE user_id = ?', (user_id,))
        result = cursor.fetchone()

        if result:
            interest_embedding = pickle.loads(result[0])
        else:
            interest_embedding = np.random.normal(0, 0.1, 384)
            cursor.execute(
                'INSERT INTO users (user_id, interest_embedding) VALUES (?, ?)',
                (user_id, pickle.dumps(interest_embedding))
            )
            self.conn.commit()

        return interest_embedding

    def update_user_interests(self, user_id: str, paper_embedding: np.ndarray, reward: float):
        """Update user interests based on interaction"""
        current_embedding = self.get_or_create_user(user_id)

        learning_rate = 0.1 * reward
        new_embedding = (1 - learning_rate) * current_embedding + learning_rate * paper_embedding
        new_embedding = new_embedding / np.linalg.norm(new_embedding)

        cursor = self.conn.cursor()
        cursor.execute(
            'UPDATE users SET interest_embedding = ? WHERE user_id = ?',
            (pickle.dumps(new_embedding), user_id)
        )
        self.conn.commit()

        return new_embedding

    def log_interaction(self, user_id: str, paper_id: str, action_type: str, reward: float):
        """Log user interaction"""
        cursor = self.conn.cursor()
        cursor.execute(
            'INSERT INTO interactions (user_id, paper_id, action_type, reward) VALUES (?, ?, ?, ?)',
            (user_id, paper_id, action_type, reward)
        )
        self.conn.commit()

    def get_user_state(self, user_id: str):
        """Get current user state for RL agent"""
        interest_embedding = self.get_or_create_user(user_id)

        cursor = self.conn.cursor()
        cursor.execute('''
            SELECT reward FROM interactions
            WHERE user_id = ?
            ORDER BY timestamp DESC LIMIT 10
        ''', (user_id,))

        recent_rewards = [row[0] for row in cursor.fetchall()]
        avg_reward = np.mean(recent_rewards) if recent_rewards else 0

        state = np.concatenate([
            interest_embedding,
            [avg_reward, len(recent_rewards)]
        ])

        return state

    def get_recommendations(self, user_id: str, num_recommendations: int = 5):
        """Get personalized paper recommendations"""
        user_state = self.get_user_state(user_id)

        with torch.no_grad():
            state_tensor = torch.FloatTensor(user_state).unsqueeze(0).to(self.device)
            q_values = self.agent(state_tensor)
            top_actions = q_values.argsort(descending=True)[0][:num_recommendations]

        recommendations = []
        for action in top_actions:
            paper_id = self.available_papers[action.item()]
            paper_embedding = self.paper_embeddings[paper_id]

            user_embedding = self.get_or_create_user(user_id)
            interest_score = np.dot(user_embedding, paper_embedding) / (
                np.linalg.norm(user_embedding) * np.linalg.norm(paper_embedding)
            )

            paper_info = self.papers[paper_id].copy()
            paper_info['paper_id'] = paper_id
            paper_info['interest_score'] = float(interest_score)

            recommendations.append(paper_info)

        return sorted(recommendations, key=lambda x: x['interest_score'], reverse=True)

    def process_feedback(self, user_id: str, paper_id: str, action: str):
        """Process user feedback and update models"""
        paper_embedding = self.paper_embeddings.get(paper_id)
        if paper_embedding is None:
            return False

        reward_map = {
            'read': 0.7,
            'save': 1.0,
            'skip': -0.1,
            'dislike': -0.5
        }

        reward = reward_map.get(action, 0.0)

        self.update_user_interests(user_id, paper_embedding, reward)
        self.log_interaction(user_id, paper_id, action, reward)

        return True

    def get_user_stats(self, user_id: str):
        """Get user statistics"""
        cursor = self.conn.cursor()
        cursor.execute('''
            SELECT COUNT(*), AVG(reward) FROM interactions WHERE user_id = ?
        ''', (user_id,))

        count_result = cursor.fetchone()
        total_interactions = count_result[0] if count_result[0] else 0
        avg_reward = count_result[1] if count_result[1] else 0

        cursor.execute('''
            SELECT action_type, COUNT(*) FROM interactions
            WHERE user_id = ? GROUP BY action_type
        ''', (user_id,))

        action_counts = dict(cursor.fetchall())

        return {
            'total_interactions': total_interactions,
            'average_reward': avg_reward,
            'action_counts': action_counts
        }

# Initialize the recommendation system
print("üöÄ Initializing Recommendation System...")
recommender = ResearchRecommendationSystem()

# Gradio Interface Functions
def get_recommendations_interface(user_id, num_recommendations):
    """Gradio interface for getting recommendations"""
    if not user_id.strip():
        return "‚ö†Ô∏è Please enter a User ID", "", ""

    try:
        recommendations = recommender.get_recommendations(user_id, num_recommendations)

        if not recommendations:
            return "‚ùå No recommendations found. Try interacting with some papers first!", "", ""

        # Format recommendations for display
        recommendations_text = ""
        recommendations_html = ""
        paper_ids = []

        for i, paper in enumerate(recommendations, 1):
            match_percent = paper['interest_score'] * 100
            recommendations_text += f"{i}. {paper['title']}\n"
            recommendations_text += f"   Match: {match_percent:.1f}% | Venue: {paper['venue']} | Year: {paper['year']}\n"
            recommendations_text += f"   Categories: {', '.join(paper['categories'])}\n"
            recommendations_text += f"   Citations: {paper['citations']}\n"
            recommendations_text += f"   Abstract: {paper['abstract'][:150]}...\n\n"

            recommendations_html += f"""
            <div style="border: 1px solid #e0e0e0; padding: 15px; margin: 10px 0; border-radius: 8px; background: white;">
                <h3 style="margin: 0 0 10px 0; color: #2c3e50;">{i}. {paper['title']}</h3>
                <div style="display: flex; gap: 10px; margin-bottom: 10px; flex-wrap: wrap;">
                    <span style="background: #e74c3c; color: white; padding: 4px 8px; border-radius: 12px; font-size: 0.9em;">
                        {match_percent:.1f}% Match
                    </span>
                    <span style="background: #3498db; color: white; padding: 4px 8px; border-radius: 12px; font-size: 0.9em;">
                        {paper['venue']} {paper['year']}
                    </span>
                    <span style="background: #27ae60; color: white; padding: 4px 8px; border-radius: 12px; font-size: 0.9em;">
                        {paper['citations']} Citations
                    </span>
                </div>
                <p style="margin: 5px 0; color: #7f8c8d;"><strong>Categories:</strong> {', '.join(paper['categories'])}</p>
                <p style="margin: 10px 0; color: #5d6d7e; line-height: 1.4;">{paper['abstract']}</p>
                <div style="display: flex; gap: 10px; margin-top: 10px;">
                    <button onclick='provideFeedback("{paper['paper_id']}", "read")' style="background: #3498db; color: white; border: none; padding: 8px 15px; border-radius: 5px; cursor: pointer;">üìñ Read</button>
                    <button onclick='provideFeedback("{paper['paper_id']}", "save")' style="background: #27ae60; color: white; border: none; padding: 8px 15px; border-radius: 5px; cursor: pointer;">üíæ Save</button>
                    <button onclick='provideFeedback("{paper['paper_id']}", "skip")' style="background: #95a5a6; color: white; border: none; padding: 8px 15px; border-radius: 5px; cursor: pointer;">‚è≠Ô∏è Skip</button>
                </div>
            </div>
            """
            paper_ids.append(paper['paper_id'])

        stats = recommender.get_user_stats(user_id)
        stats_text = f"User: {user_id} | Total Interactions: {stats['total_interactions']} | Avg Reward: {stats['average_reward']:.2f}"

        return recommendations_text, recommendations_html, stats_text

    except Exception as e:
        return f"‚ùå Error: {str(e)}", "", ""

def provide_feedback_interface(user_id, paper_id, action):
    """Gradio interface for providing feedback"""
    if not user_id.strip() or not paper_id.strip():
        return "‚ö†Ô∏è Please enter both User ID and Paper ID"

    try:
        success = recommender.process_feedback(user_id, paper_id, action)

        if success:
            reward_map = {'read': 0.7, 'save': 1.0, 'skip': -0.1, 'dislike': -0.5}
            reward = reward_map.get(action, 0.0)
            return f"‚úÖ Feedback recorded! {action.capitalize()} action (reward: {reward:+.1f}) - The system is learning from your preferences!"
        else:
            return "‚ùå Failed to record feedback"

    except Exception as e:
        return f"‚ùå Error: {str(e)}"

def get_user_profile_interface(user_id):
    """Gradio interface for getting user profile"""
    if not user_id.strip():
        return "‚ö†Ô∏è Please enter a User ID"

    try:
        stats = recommender.get_user_stats(user_id)

        profile_text = f"üìä User Profile: {user_id}\n"
        profile_text += f"üìà Total Interactions: {stats['total_interactions']}\n"
        profile_text += f"‚≠ê Average Reward: {stats['average_reward']:.2f}\n"
        profile_text += "\nüìã Action Breakdown:\n"

        for action, count in stats['action_counts'].items():
            profile_text += f"   ‚Ä¢ {action.capitalize()}: {count} times\n"

        # Get recent recommendations to show learning progress
        recommendations = recommender.get_recommendations(user_id, 3)
        if recommendations:
            profile_text += f"\nüéØ Recent Top Matches:\n"
            for i, paper in enumerate(recommendations[:3], 1):
                profile_text += f"   {i}. {paper['title']} ({paper['interest_score']:.2f})\n"

        return profile_text

    except Exception as e:
        return f"‚ùå Error: {str(e)}"

# Create Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), title="AI Research Paper Recommender") as demo:
    gr.Markdown("""
    # ü§ñ AI Research Paper Recommendation System
    ## Powered by Reinforcement Learning

    Discover personalized research papers that match your interests! The system learns from your feedback to improve recommendations over time.
    """)

    with gr.Row():
        with gr.Column(scale=1):
            user_id = gr.Textbox(
                label="User ID",
                value="gradio_user_001",
                placeholder="Enter your user ID...",
                info="Try: gradio_user_001, gradio_user_002, or create a new ID"
            )

            num_recommendations = gr.Slider(
                minimum=1,
                maximum=10,
                value=5,
                step=1,
                label="Number of Recommendations"
            )

            with gr.Row():
                get_recs_btn = gr.Button("üéØ Get Recommendations", variant="primary")
                profile_btn = gr.Button("üìä View Profile")

            gr.Markdown("### üîÑ Provide Feedback")
            with gr.Row():
                paper_id_feedback = gr.Textbox(
                    label="Paper ID",
                    placeholder="Enter paper ID from recommendations..."
                )
                action_type = gr.Dropdown(
                    choices=["read", "save", "skip", "dislike"],
                    label="Action",
                    value="read"
                )

            feedback_btn = gr.Button("üíæ Submit Feedback")
            feedback_output = gr.Textbox(label="Feedback Result", interactive=False)

            profile_output = gr.Textbox(
                label="User Profile",
                lines=10,
                max_lines=15,
                interactive=False
            )

        with gr.Column(scale=2):
            recommendations_output = gr.Textbox(
                label="Recommendations (Text)",
                lines=15,
                max_lines=20,
                interactive=False
            )

            html_output = gr.HTML(
                label="Recommendations (Visual)"
            )

            stats_output = gr.Textbox(
                label="User Statistics",
                interactive=False
            )

    # Event handlers
    get_recs_btn.click(
        fn=get_recommendations_interface,
        inputs=[user_id, num_recommendations],
        outputs=[recommendations_output, html_output, stats_output]
    )

    profile_btn.click(
        fn=get_user_profile_interface,
        inputs=[user_id],
        outputs=[profile_output]
    )

    feedback_btn.click(
        fn=provide_feedback_interface,
        inputs=[user_id, paper_id_feedback, action_type],
        outputs=[feedback_output]
    )

    gr.Markdown("""
    ### üéØ How to Use:
    1. **Enter your User ID** (or use a demo user)
    2. **Click "Get Recommendations"** to see personalized papers
    3. **Provide feedback** by entering Paper ID and selecting an action
    4. **View your profile** to see your interaction history
    5. **Watch the system learn** from your preferences!

    ### üìä Reward System:
    - üìñ **Read**: +0.7 points
    - üíæ **Save**: +1.0 points
    - ‚è≠Ô∏è **Skip**: -0.1 points
    - üëé **Dislike**: -0.5 points

    The reinforcement learning model uses these rewards to continuously improve your recommendations!
    """)

# Launch the Gradio interface
print("üöÄ Launching Gradio Interface...")
print("‚úÖ System ready! The interface will open below.")
print("üìö Available research papers:", len(dataset.papers))
print("ü§ñ RL model initialized and ready!")

# Test the system
print("\nüß™ Running quick test...")
test_recommendations = recommender.get_recommendations("test_user", 2)
if test_recommendations:
    print("‚úÖ System test passed! Recommendations generated successfully.")
    for i, paper in enumerate(test_recommendations, 1):
        print(f"   {i}. {paper['title']} ({paper['interest_score']:.2f})")
else:
    print("‚ùå System test failed.")

print("\nüéØ Demo Users to try:")
print("   - gradio_user_001")
print("   - gradio_user_002")
print("   - gradio_user_003")
print("   - Or create any new user ID!")

# Launch the interface
demo.launch(share=True, debug=True)

üìö Initializing research paper dataset...
Creating paper embeddings...
Created embeddings for 200 papers
‚úÖ Dataset ready: 200 papers loaded
üöÄ Initializing Recommendation System...
üöÄ Launching Gradio Interface...
‚úÖ System ready! The interface will open below.
üìö Available research papers: 200
ü§ñ RL model initialized and ready!

üß™ Running quick test...
‚úÖ System test passed! Recommendations generated successfully.
   1. Advancements in Multimodal Representation Learning (0.00)
   2. Advancements in Quantum Machine Learning Applications (-0.00)

üéØ Demo Users to try:
   - gradio_user_001
   - gradio_user_002
   - gradio_user_003
   - Or create any new user ID!
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://709e45bee19c4808c5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the te

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://709e45bee19c4808c5.gradio.live


