In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

gemini_long_context_path = kagglehub.competition_download('gemini-long-context')
kane0068_crime_and_punishment_english_path = kagglehub.dataset_download('kane0068/crime-and-punishment-english')
google_gemini_1_5_pro_api_api_gemini_1_5_pro_1_path = kagglehub.model_download('google/gemini-1.5-pro-api/Api/gemini-1.5-pro/1')

print('Data source import complete.')


**Interview with Rodion Romanovich Raskolnikov, the hero of Fyodor Dostoyevsky's novel Crime and Punishment**

In [None]:
# Importing Libraries
import os
import google.generativeai as genai
import logging
from collections import deque
import time
from dataclasses import dataclass
from typing import List, Dict
import json
from datetime import datetime
# API
from kaggle_secrets import UserSecretsClient

In [None]:
class TokenCounter:
    def __init__(self):
        self.total_prompt_tokens = 0
        self.total_response_tokens = 0
        self.context_window_sizes = []  # Track context window usage

    def estimate_tokens(self, text: str) -> int:
        #Approximate token count estimation
        #     Rough estimation: average English word is ~1.3 tokens
        return len(text.split()) * 1.3

    def update_counts(self, prompt: str, response: str):
        prompt_tokens = self.estimate_tokens(prompt)
        response_tokens = self.estimate_tokens(response)

        self.total_prompt_tokens += prompt_tokens
        self.total_response_tokens += response_tokens

        # Track context window size for this interaction
        self.context_window_sizes.append(prompt_tokens + response_tokens)

    def get_total_tokens(self) -> int:
        return int(self.total_prompt_tokens + self.total_response_tokens)

    def get_stats(self) -> dict:
        stats = {
            "prompt_tokens": int(self.total_prompt_tokens),
            "response_tokens": int(self.total_response_tokens),
            "total_tokens": self.get_total_tokens(),
            "max_window_size": int(max(self.context_window_sizes)) if self.context_window_sizes else 0,
            "avg_window_size": int(sum(self.context_window_sizes) / len(self.context_window_sizes)) if self.context_window_sizes else 0
        }
        return stats

In [None]:
class TextLoader:
    def __init__(self, file_path: str):
        self.file_path = file_path
        self.content = ""
        self.chunks = []
        self.chunk_size = 5000  # Can be increased due to Gemini's large context
        self.token_counter = TokenCounter()
        self.chunk_summaries = []  # Store summaries for each chunk

    def load_and_chunk_text(self):
        try:
            with open(self.file_path, 'r', encoding='utf-8') as file:
                self.content = file.read()

            # Update token count
            self.token_counter.update_counts(self.content, "")

            # Chunk the text
            self.chunks = []
            current_chunk = ""

            paragraphs = self.content.split('\n\n')
            for paragraph in paragraphs:
                if len(current_chunk) + len(paragraph) < self.chunk_size:
                    current_chunk += paragraph + '\n\n'
                else:
                    self.chunks.append(current_chunk)
                    current_chunk = paragraph + '\n\n'

            if current_chunk:
                self.chunks.append(current_chunk)

            return len(self.chunks)
        except Exception as e:
            logging.error(f"Error loading text: {str(e)}")
            raise

In [None]:
class RaskolnikovAI:
    def __init__(self, api_key: str, book_path: str):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel('gemini-1.5-pro-latest')
        self.loader = TextLoader(book_path)
        self.context = ""
        self.training_done = False
        self.logger = logging.getLogger(__name__)
        self.token_counter = TokenCounter()
        self.conversation_memory = deque(maxlen=5)  # Store recent conversations
        self.analysis_results = []  # Store analysis results from training

    def train_with_book(self):
        try:
            num_chunks = self.loader.load_and_chunk_text()
            print(f"Book loaded successfully. Processing {num_chunks} sections...")

            context_prompt = """
            Analyze the following text from Crime and Punishment to deeply understand
            Raskolnikov's character, his psychological state, and key events.
            Focus on:
            1. His motivations and philosophical ideas
            2. His psychological state and emotional journey
            3. Key interactions with other characters
            4. Important events and their impact on him

            Text chunks will follow. Build a comprehensive understanding of the character.
            """

            for i, chunk in enumerate(self.loader.chunks):
                try:
                    prompt = f"{context_prompt}\n\nText chunk {i+1}:\n{chunk}"
                    response = self.model.generate_content(prompt)

                    # Store analysis results
                    self.analysis_results.append(response.text)

                    # Update token counts
                    self.token_counter.update_counts(prompt, response.text)

                    print(f"Processed chunk {i+1}/{num_chunks}")
                    print(f"Accumulated tokens: {self.token_counter.get_total_tokens():,}")

                    # Save progress periodically
                    if (i + 1) % 5 == 0:
                        self._save_progress()

                    time.sleep(2)
                except Exception as e:
                    self.logger.error(f"Error processing chunk {i+1}: {str(e)}")
                    continue

            self.training_done = True
            self._save_final_analysis()

            token_stats = self.token_counter.get_stats()
            print("\nProcessing completed!")
            print(f"Total prompt tokens: {token_stats['prompt_tokens']:,}")
            print(f"Total response tokens: {token_stats['response_tokens']:,}")
            print(f"Total tokens processed: {token_stats['total_tokens']:,}")
            print(f"Maximum context window size: {token_stats['max_window_size']:,}")
            print(f"Average context window size: {token_stats['avg_window_size']:,}")

        except Exception as e:
            self.logger.error(f"Training error: {str(e)}")
            raise


    def generate_response(self, user_input: str) -> str:
        if not self.training_done:
            return "Please complete the book processing first."

        try:
            # Include recent conversation history
            recent_history = "\n".join([f"Q: {q}\nA: {a}" for q, a in self.conversation_memory])

            character_prompt = f"""
            You are Rodion Raskolnikov from Crime and Punishment. Respond as him,
            incorporating his complex psychological state, philosophical beliefs,
            and personal experiences from the novel. Consider:

            1. Your theory of the extraordinary man
            2. Your feelings about the murder and guilt
            3. Your relationship with Sonia and other characters
            4. Your current psychological and emotional state

            Recent conversation history:
            {recent_history}

            Maintain Raskolnikov's intellectual depth and internal conflicts in your response.
            """

            prompt = f"{character_prompt}\n\nQuestion: {user_input}\n\nRespond as Raskolnikov:"

            response = self.model.generate_content(prompt)
            response_text = response.text

            # Update conversation memory
            self.conversation_memory.append((user_input, response_text))

            # Update token counts
            self.token_counter.update_counts(prompt, response_text)

            # Log current token usage
            print(f"\nCurrent total tokens: {self.token_counter.get_total_tokens():,}")

            return response_text

        except Exception as e:
            self.logger.error(f"Error generating response: {str(e)}")
            return "I must gather my thoughts... (An error occurred)"

    def _save_progress(self):
        """Save training progress and analysis results"""
        try:
            progress_data = {
                'timestamp': datetime.now().isoformat(),
                'token_stats': self.token_counter.get_stats(),
                'analysis_results': self.analysis_results
            }

            with open('training_progress.json', 'w', encoding='utf-8') as f:
                json.dump(progress_data, f, indent=2)
        except Exception as e:
            self.logger.error(f"Error saving progress: {str(e)}")

    def _save_final_analysis(self):
        """Save final analysis results and statistics"""
        try:
            final_data = {
                'completion_time': datetime.now().isoformat(),
                'token_stats': self.token_counter.get_stats(),
                'analysis_results': self.analysis_results,
                'training_summary': self._generate_training_summary()
            }

            with open('final_analysis.json', 'w', encoding='utf-8') as f:
                json.dump(final_data, f, indent=2)
        except Exception as e:
            self.logger.error(f"Error saving final analysis: {str(e)}")


    def _generate_training_summary(self) -> dict:
        """Generate summary of training process and results"""
        return {
            'total_chunks': len(self.loader.chunks),
            'token_usage': self.token_counter.get_stats(),
            'training_duration': str(datetime.now())
        }

In [None]:
def main():
    logging.basicConfig(level=logging.INFO)


    user_secrets = UserSecretsClient()
    API_KEY = user_secrets.get_secret("GEMINI_API_KEY")
    BOOK_PATH = "/kaggle/input/crime-and-punishment-english/crime_and_punishment_english.txt"

    try:
        # Initialize and train the model
        raskolnikov = RaskolnikovAI(API_KEY, BOOK_PATH)
        print("Processing Crime and Punishment...")
        raskolnikov.train_with_book()

        print("\nYou can now converse with Raskolnikov. Type 'quit' to exit.")

        while True:
            user_input = input("\nYour question: ").strip()
            if user_input.lower() == 'quit':
                # Display final statistics
                token_stats = raskolnikov.token_counter.get_stats()
                print("\nFinal Token Statistics:")
                print(f"Total prompt tokens: {token_stats['prompt_tokens']:,}")
                print(f"Total response tokens: {token_stats['response_tokens']:,}")
                print(f"Total tokens processed: {token_stats['total_tokens']:,}")
                print(f"Maximum context window: {token_stats['max_window_size']:,}")
                print(f"Average context window: {token_stats['avg_window_size']:,}")
                print(f"Window utilization: {(token_stats['max_window_size'] / 2000000 * 100):.2f}%")

                # Print usage graph
                width = 40
                used = int((token_stats['max_window_size'] / 2000000) * width)
                print("\nWindow Usage: [", end="")
                print("=" * used + " " * (width - used) + "]")
                print(f"{'0':8} {token_stats['max_window_size']:,}{'2M':>8}")

                break

            response = raskolnikov.generate_response(user_input)
            print(f"\nRaskolnikov: {response}")

    except Exception as e:
        print(f"An error occurred: {str(e)}")
        logging.error(f"System error: {str(e)}")

if __name__ == "__main__":
    main()

Example Questions :

- Why did you believe that your crime could be justified as a moral act?
- Do you see yourself as a “superman” above ordinary morality, or was that simply a way to rationalize your actions?
- After committing the murder, did you feel any sense of relief, or did guilt consume you immediately?
- What were you hoping to accomplish by killing the pawnbroker, and do you think it was worth the price you paid emotionally?
- How did your interactions with Sonia influence your view on redemption and morality?
- Do you regret your actions, or do you still believe in your theory that some individuals have the right to bypass moral constraints?
- How did the suffering of those around you, like your mother and sister, affect your own perception of guilt and responsibility?
- What role did poverty and desperation play in pushing you towards committing the crime?
- How would you describe your relationship with Inspector Porfiry, and do you feel he understood your true nature?
- Did you find any meaning or personal growth through the hardships you endured, or do you see it all as needless suffering?