# Stegasus

## Commons

### Colab Specific

In [None]:
!apt-get install openjdk-17-jdk-headless -qq > /dev/null
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-17-openjdk-amd64"
!update-alternatives --set java /usr/lib/jvm/java-17-openjdk-amd64/bin/java
!java -version

In [None]:
!git clone https://github.com/NasoohOlabi/Stegasus.git

In [None]:
!mv Stegasus/* .
!rm -r Stegasus

In [None]:
%pip install -r requirements.txt

In [None]:
ROOT_DIR = '.'

### General Common

In [1]:
#@title random_bit_stream
import random

def random_bit_stream(length=None):
    """Return a random string of zeros and ones of the given length (default: random integer between 0 and 100)."""
    if length is None:
        length = random.randint(0, 100)
    return ''.join(str(random.randint(0, 1)) for _ in range(length))
def int_to_binary_string(n: int, length: int):
    binary_str = bin(n)[2:]  # convert to binary string, remove '0b' prefix
    padded_str = binary_str.rjust(length, '0')  # pad with zeros to length
    return padded_str

In [2]:
import os, sys
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '../'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

## Frustratingly Simple BERT

In [3]:
# %pip install transformers

In [4]:
from FrustratinglySimpleBert import MaskedStego

In [5]:
import nltk
nltk.download('stopwords')

[nltk_data] Error loading stopwords: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>


False

In [6]:
masked_stego = MaskedStego()

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Typoceros

In [7]:
from TypocerosJar import JavaJarWrapper

In [8]:
# !java -jar ./Typoceros4j.jar

In [9]:
Typo = JavaJarWrapper()

In [10]:
Typo.encode("hi, how are you?","1010101001")

('hi, hw are you?', '0101001')

## Emojer

In [11]:
LONG_TEXT = """Text literals and metacharacters make up this string. The compile function is used to create the pattern."""

In [12]:
# https://github.com/huggingface/torchMoji

In [13]:
from Emojier import Emojier

#### Emojier tests

In [None]:
tests = 10
def runTests():
  print(f"Running {tests} tests")
  for i in range(tests):
    data = random_bit_stream(600)
    # text = 'hi, how are you?'
    text = LONG_TEXT
    verbose = False
    encoded_text,rem = Emojier.encode(text,data)
    print('rem=',rem)
    _, deData = Emojier.decode(encoded_text)
    deData += rem
    print(f'text="{text}"\n->\nencoded_text="{encoded_text}" \ndata="{data}"\ndeData="{deData}"\ndata==deData="{data==deData}"')
    print(f'ratio={len(data)-len(rem)} / {len(text)}={(len(data)-len(rem)) / len(text)}')
    assert data==deData
    print('\n')
    print("#"*100)
    print('\n')
# runTests()
# 0000

In [None]:
def test_stream():
  gened = set()
  byte_string_length = 48
  for i in range(2**byte_string_length):
    x = random.randint(0,2**byte_string_length-1)
    while x in gened:
      x = random.randint(0,2**byte_string_length-1)
    gened.add(x)
    yield Emojier.int_to_binary_string(x,byte_string_length)
    # yield x

## Putting it all Together

In [14]:
#@title Pipe
from typing import List, Any, Dict, Callable

def pipe(callbacks: List[Callable], config: Dict[str, Any]={}, index=0):
    def process_callbacks(state, callbacks: List[Callable], config: Dict[str, Any]={}, index=0):
        # Get the current callback
        callback = callbacks[index]

        # Get the next callback (if exists)
        next_callback = None
        if index < len(callbacks) - 1:
            next_callback = lambda s, c, cf=config: process_callbacks(s, callbacks, cf, index + 1)

        # Call the callback with the current state, next callback, and config
        state = callback(state, next_callback, config)

        # Return the final state
        return state

    def _pipe(state):
        return process_callbacks(state, callbacks, config, index)

    return _pipe

def bert_callback(state, next_callback, config):
    if state is None:
        raise ValueError('State is None')

    pipe_verbose = config['pipe_verbose']
    encode = config['encode']
    decode = config['decode']
    message_pipe, bytes_pipe = state

    if encode:
      stega_bert = masked_stego(message_pipe[-1],bytes_pipe[-1], 3, 0.01)
      message_pipe.append(stega_bert.encoded_text)
      bytes_pipe.append(stega_bert.remaining_bytes)

    if next_callback is not None:
        state = next_callback(state, next_callback, config)

    if decode:
      encoded_text = message_pipe.pop()
      remaining_bytes = bytes_pipe.pop()
      encoded_bytes = masked_stego.decode(encoded_text,3,0.01)
      if encode and decode:
        assert encoded_bytes + remaining_bytes == bytes_pipe[-1]
      else:
        message_pipe.append(encoded_text)
        bytes_pipe.append(encoded_bytes + remaining_bytes)

    return state

def emojer_callback(state, next_callback, config):
    if state is None:
        raise ValueError('State is None')

    message_pipe, bytes_pipe = state
    text = message_pipe[-1]
    data = bytes_pipe[-1]
    verbose = config['verbose']

    pipe_verbose = config['pipe_verbose']
    encode = config['encode']
    decode = config['decode']

    if encode:
      encoded_text,rem = Emojier.encode(text,data,verbose=verbose)
      message_pipe.append(encoded_text)
      bytes_pipe.append(rem)

    if next_callback is not None:
        state = next_callback(state, next_callback, config)
    else:
      print(state)

    if decode:
      encoded_pipe_text = message_pipe.pop()
      rem_pipe_bytes = bytes_pipe.pop()

      original_text, deData = Emojier.decode(encoded_pipe_text,verbose=verbose)
      deData += rem_pipe_bytes
      if encode and decode:
        assert deData == bytes_pipe[-1]
        assert original_text == message_pipe[-1]
      else:
        message_pipe.append(original_text)
        bytes_pipe.append(deData)

    return state

def typo_callback(state, next_callback, config):
    if state is None:
        raise ValueError('State is None')

    message_pipe, bytes_pipe = state
    text = message_pipe[-1]
    data = bytes_pipe[-1]
    verbose = config['verbose']
    pipe_verbose = config['pipe_verbose']
    encode = config['encode']
    decode = config['decode']

    if pipe_verbose:
      print(state)
    if encode:

      encoded_text, rem = Typo.encode(Typo.spell(text),data)

      message_pipe.append(encoded_text)
      bytes_pipe.append(rem)

    if next_callback is not None:
        state = next_callback(state, next_callback, config)

    if decode :
      encoded_pipe_text = message_pipe.pop()
      rem_pipe_bytes = bytes_pipe.pop()

      original_string, values = Typo.decode(encoded_pipe_text)
      if encode and decode:
        assert original_string == text
        assert values == data
      else:
        message_pipe.append(original_string)
        bytes_pipe.append()


    return state

callbacks = [bert_callback, emojer_callback,typo_callback]
# # callbacks = [typo_callback]

# # Apply the function with an initial state
# initial_state = [['Hi, How are you?'],[random_bit_stream(30)]]
# p = pipe(callbacks, {"verbose": False,"pipe_verbose": False,"encode":True,"decode":False,"test":False})
# mq, bq = p(initial_state)

# print(mq[-1],bq[-1])



In [15]:
# PersonaGPTBot_Singleton = PersonaGPTBot({'Alice':["I'm a french girl","I love art","my name is Alice"],"Bob" :["I'm a french boy","I love art","my name is Bob"]})

In [16]:
def StegasusEncode(text,bytes_str):
  initial_state = [[text],[bytes_str]]
  callbacks = [bert_callback, emojer_callback,typo_callback]
  p = pipe(callbacks, {"verbose": False,"pipe_verbose": False,"encode":True,"decode":False,"test":False})
  mq, bq = p(initial_state)
  return (mq[-1],bq[-1])
def StegasusDecode(text):
  initial_state = [[text],['']]
  callbacks = [bert_callback, emojer_callback,typo_callback]
  p = pipe(callbacks, {"verbose": False,"pipe_verbose": False,"encode":False,"decode":True,"test":False})
  mq, bq = p(initial_state)
  return (mq[-1],bq[-1])
def StegasusTest(text):
  initial_state = [[text],[random_bit_stream(len(text))]]
  callbacks = [bert_callback, emojer_callback,typo_callback]
  p = pipe(callbacks, {"verbose": False,"pipe_verbose": False,"encode":False,"decode":True,"test":False})
  mq, bq = p(initial_state)
  return (mq[-1],bq[-1])

## OpenAi

In [20]:
LONG_TEXT = '''Metaphysical solipsism is a variety of solipsism. Based on a philosophy of subjective idealism, metaphysical solipsists maintain that the self is the only existing reality and that all other realities, including the external world and other persons.'''

In [21]:
Famous_Demo = 'Hi, How are you?'

In [None]:
%%time
xtx = LONG_TEXT
# xtx = Famous_Demo
data = random_bit_stream(len(xtx))
encoded,rem = StegasusEncode(xtx,data)
print((xtx,encoded,(len(xtx) - len(rem)) / len(xtx)))

In [None]:
from Bot import *

In [None]:
## OpenAi
import openai
import re
demo_post = re.sub(r'\s+', ' ', """235r/ExperiencedDevs•Posted byu/EcstaticAssignment12 hours agoThe backend generalist software engineer

            I think both myself and a lot of my coworkers/friends fall under this category of "backend non web-dev stack agnostic generalist software engineer" that seem to hang out in product companies.While I've gotten experience in domains by virtue of the teams and projects I've worked with, I wouldn't really identify them as being my "specialty". I've also never really identified with my tech stack, both because it changes a lot and because frankly the complexity of my work never seems to boil down to low level implementation expertise. There are almost never any serious design meetings where the main point of contention is anything that is on the layer of programming patterns or language details (but obviously yes to system design). The problems that I mainly solve seem to be more "engineering" than programming, and while I'd say they are complex, they seem to be mostly a function of general analytical reasoning and more system design-level understanding.Is this sort of position actually that common outside of tech companies? I'm asking mostly out of curiosity, but also while I was lucky to land in another tech company after getting laid off in January, if I get laid off again and don't have the same luck, I'm not sure if I should take steps to brand myself as something less generalist when exploring other options.51 commentsAwardsharesave22 people hereu/getsentry·promotedPaste this line into your terminal to use Next.js with Sentry.

            sentry.ioInstallComment as No_Door_3720CommentBoldItalicsLinkStrikethroughInline CodeSuperscriptSpoilerHeadingBulleted ListNumbered ListQuote BlockCode BlockTableMarkdown ModeSort by: best|

              level 1_sw00 · 10 hr. agoLead Developer | 11 YOEGeneralist who works on higher level design, development practices and techniques?Welcome to consulting, brother/sister.156ReplyGive AwardShareReportSaveFollowlevel 2rkeet · 8 hr. agoLead Application Engineer / 9 YoE / NLDCan I... Bother you for some tips? ;)OP sounds like me, and I'm looking into different applicable jobs at this moment.So, I'll take any hints, options, etc. as to what to look at, because I don't know whether to look at Lead Developer, Solution Architect, Facility Manager, Integration/service consultant, or how to find a mix.Bit of a problem when I like what I do. Just not where I do it.20ReplyGive AwardShareReportSaveFollowlevel 2bwainfweeze · 3 hr. agolevel 2mrcrassic · 5 hr. agoYup! Exactly where I landed.2ReplyGive AwardShareReportSaveFollowlevel 1d0s4gw · 7 hr. agoAny given system is not supposed to have a high degree of technical complexity. The point of being a senior or staff engineer is to enable juniors and mid level engineers to deliver impact quickly with low risk. If the system is easy to extend and operate then that’s because the people that designed it did a good job.Your job is to quickly translate vague information into clear requirements into shipped code. No one cares which data structures was used when you recovered $50m a year in opex. Your resume should explain the value that you delivered. The tech stack is ancillary.33ReplyGive AwardShareReportSaveFollowlevel 2cjrun · 4 hr. agoProblem is, if you don’t have those buzzwords on your résumé, even the hiring manager won’t be interested.6ReplyGive AwardShareReportSaveFollowlevel 3d0s4gw · 3 hr. agoYea exactly. But it’s at the end of the block on the resume. It’s not the top line. The focus is the business result.Senior Software Engineer, Company, City, State (Start date - End date)Technical lead for the <name of service>, which <achieved X quantitative result> for <customer type> by <method of solving the problem>distributed systems, Java, SQL, AWS, S3, Linux, and Bash3ReplyGive AwardShareReportSaveFollowlevel 1GargantuChet · 10 hr. agoThis sounds like a joy to me. This describes my role in a big manufacturer, and most of the time I feel like I’m the only one on the planet.Mind if I PM?64ReplyGive AwardShareReportSaveFollowlevel 2EcstaticAssignmentOp · 10 hr. agohaha go for it4ReplyGive AwardShareReportSaveFollowlevel 2bizcs · 3 hr. agoI also work for a manufacturer in this sort of role, though. I consider us to be large but others from megalith manufacturers might beg to differ.1ReplyGive AwardShareReportSaveFollowlevel 1gabs_ · 10 hr. agoI also fit in this category! I'm only a mid-level developer, but I have worked at tech companies previously and I'm now developing a Big Data project at a logistics company.9ReplyGive AwardShareReportSaveFollowlevel 1Inside_Dimension5308 · 8 hr. agoI always advocate the backend generalist software engineer. Tech stacks are replaceable. Knowledge to determine which tech stack to use will be eternal.8ReplyGive AwardShareReportSaveFollowlevel 1nutrecht · 8 hr. agoLead Software Engineer / EU / 18+ YXPThe problem with being a generalist is that, if you're not careful, your experience remains very shallow. You can end up not having 10 years of experience, but 1 year repeated 10 times.For the most part  my 'brand' as a self employed contractor who focusses on the Java ecosystem doesn't have much to do with Java itself, but more with the type of work I do. I focus on complex enterprise systems, often with a ton of different systems interacting, and providing my clients with deep expertise in how to not turn those into big balls of mud. If you're mostly doing the same simple back-end projects (like in wordpress as an extreme example) you don't get that experience.So I don't agree that what you're describing is 'good' or 'bad'. It really depends on how you plan and advance your career. For example as this generalist if you don't now have cloud-native experience you're IMHO falling behind the curve.28ReplyGive AwardShareReportSaveFollowlevel 1ir0nuckles · 6 hr. agoThere are almost never any serious design meetings where the main point of contention is anything that is on the layer of programming patterns or language details (but obviously yes to system design). The problems that I mainly solve seem to be more "engineering" than programming, and while I'd say they are complex, they seem to be mostly a function of general analytical reasoning and more system design-level understanding.I'm confused. Isn't this what being a software engineer is?I've never done "design reviews" of programming patterns. That's for a code review, or if needed, you can engage your team before you start a project to ensure you're following best practices.This post is really strange to me. If you're asking "how do I prepare my skillset for find a job outside of tech" then I would suggest you become really good cloud computing platforms and patterns. Almost every enterprise is using a cloud provider at this point. If you're the expert in AWS, GCP, or Azure, you're probably guaranteed to find some work somewhere in the world working with one of these platforms.4ReplyGive AwardShareReportSaveFollowlevel 1FlutterLovers · 10 hr. agoGeneralist will make you a better engineer, but focus will get you hired. Try to become an expert at one backend framework that is currently in demand, while also learning the basics of adjacent systems.30ReplyGive AwardShareReportSaveFollowlevel 2chrismv48 · 9 hr. agoWhenever I hear this sentiment I feel confused; all the best tech companies I’m aware of are explicitly tech agnostic (FAANG as well as best paying startups). It’s the companies that insist on having experience in a very specific stack that tend to pay poorly in my experience. What am I missing?72ReplyGive AwardShareReportSaveFollowlevel 3Successful_Leg_707 · 8 hr. agoMy understanding is the specific tech stack companies tend to “hire when it hurts”.  They want someone already up to speed on a language and framework in demand.  They are less willing to gamble on long term potential.  You get a salary but no RSU to retain you.Tech agnostic companies hire for long term potential and projected growth.  Amazon for example will use a language like Java but develop their own in house framework, so knowledge in a specific framework like Spring is less useful.  The tech companies will have some sort of leetcode interview process that is an indicator for general cognitive ability and fundamental comp sci concepts.  On top of a base salary, you get the RSUs which are like golden handcuffs that encourage you to stay until they vest41ReplyGive AwardShareReportSaveFollowlevel 4generatedcode · 8 hr. agotech stack companies tend to “hire when it hurts”.you deserve an award !18ReplyGive AwardShareReportSaveFollowlevel 4EcstaticAssignmentOp · 3 hr. agoTech agnostic companies hire for long term potential and projected growth.I think this trend may be part of the picture, but I'm not sure if it's the full picture. The top startups also tend to hire the "general cognitive ability + fundamentals" way, despite having the same short timeline requirements, while some legacy companies that tend to have longer tenures hire the more specific way. It's possible it's more a function of a higher hiring bar tending to correlate with the agnostic approach, whichever way that causation goes.1ReplyGive AwardShareReportSaveFollowlevel 3Acidic-Soil · 8 hr. agolevel 2ExistentialDroid23 · 9 hr. agoI see those claims like "generalists are better engineers" but I don't see the connection. Wouldn't diving to one language/framework deep for 2-3 years give you a deeper understanding that you carry around easier later than fumbling 2-3 frameworks on the same timeframe?I guess what I dislike is the equivalency of "more languages = better engineer" when in fact what matters is the proper use of the tool, not necessarily how many tools you have.3ReplyGive AwardShareReportSaveFollowlevel 3slightly_offtopic · 9 hr. agoEach language/framework has a preference for a certain way of solving problems. Learning several tools is a proxy for learning sev""")


bob = Person(first_name='Bob Doe', gender='male',age=13,city='France') \
  .add_favorite('color','blue') \
  .add_interest('travelling') \
  .add_favorite('dog','Pitbull')
alice = Person(first_name='Alice Ducan', gender='girl',age=13,city='France') \
  .add_favorite('color','pink') \
  .add_interest('fashon') \
  .add_favorite('dog','Corgie')

from dotenv import load_dotenv

load_dotenv()

token = os.getenv('OPEN_AI_KEY')

def askGPT(text):
  openai.api_key = token
  response = openai.Completion.create(
    engine = "text-davinci-003",
    prompt = text,
    temperature = 0.6,
    max_tokens = 150,
  )
  return response.choices[0].text.strip()


In [None]:
chat = Chat(alice,bob,askGPT)

chat.render()

In [None]:
first_prompt = chat.start_conversation_with_post(demo_post)
chat.messages.append(Message(person=chat.correspondents[0],text=chat.askGPT(chat.start_conversation_with_post(demo_post))))

In [None]:
chat.messages

for m in chat.stream():
  print(m.text)

In [None]:
m.person

In [None]:
for m in chat.messages:
  print(f'Message(text="""{m.text}""",person={m.person})')

In [None]:
Message(text="""Hey Bob!

I just read this post on ExperiencedDevs about backend generalist software engineers and their roles in tech companies. It really resonated with me and I wanted to get your take on it. Have you ever been in a similar role, and what did you think of it? Do you think it's common outside of tech companies? What advice would you give to someone looking to become a backend generalist software engineer?""",person=Person(first_name='alice wonderland', last_name=None, age=13, gender='girl', city='France', email=None, favorites={'color': 'pink', 'dog': 'Corgie'}, interests=['fashon']))
Message(text=""" Hi Alice! Yes, I have been in a similar role before and I really enjoyed it. It was a great way to learn different aspects of software engineering and to get a broad view of the development process. I think it's becoming more and more common outside of tech companies. My advice would be to focus on learning the fundamentals of software engineering and to stay up to date with the latest technologies. That way, you can be prepared for any role that comes your way.""",person=Person(first_name='Bob Doe', last_name=None, age=13, gender='male', city='France', email=None, favorites={'color': 'blue', 'dog': 'Pitbull'}, interests=['travelling']))
Message(text=""" That's really great advice! Would you recommend any particular resources for someone wanting to learn more about software engineering? And what do you think sets a good backend generalist software engineer apart from others?
Bob Doe: Sure! I would definitely recommend reading books such as Clean Code by Robert C. Martin, Design Patterns: Elements of Reusable Object-Oriented Software by Erich Gamma, and The Pragmatic Programmer by Andrew Hunt and Dave Thomas. These books provide invaluable insight into software engineering principles and best practices.

As for what sets a good backend generalist software engineer apart from others, I would say it's their ability to think critically and solve complex problems. They should also have a good""",person=Person(first_name='alice wonderland', last_name=None, age=13, gender='girl', city='France', email=None, favorites={'color': 'pink', 'dog': 'Corgie'}, interests=['fashon']))

In [None]:
chat.render()

In [None]:
chat.messages

data = random_bit_stream(10000)

for m in chat.stream():
  enc,rem = StegasusEncode(m.text,data)
  print(enc,rem)
  data = rem

## Benchmarks

In [17]:
from SampleData import ConversationsRepo
import random
from Bot import *

In [18]:
def test_stream(minimum=1,maximum=ConversationsRepo.ConversationsCount):
  gened = set()
  for i in range(maximum - minimum):
    x = random.randint(minimum,maximum)
    while x in gened:
      x = random.randint(minimum,maximum)
    gened.add(x)
    yield x

In [19]:
StegasusEncode("I sure do. You can listen to JUpiter's storms on AM radio",random_bit_stream(1000))

("I sure do. You can listen to Jupiuter's storms on AM radio",
 '11100011101111100111011101110110100010101101001100111011011011010000101010011001000100101110101011100011101110000111101100110000000110100010110011100100100110011010101011001010100101111001000110101010001011110000111101100010111010010100011011111100101100001110000101011011111100011010110100001000000001110110011001101101110101101100111011010011100100110101001111111111111001010010100010110111100010000100100011010000111110000000100011000111101000111110010011011110000101010011101101011111100111100011110110000001100001000000011101110110101100110011110100011100111000000111101010000101010101010111111011001110110000001000000110101100110110000001000110000111011101000010100001011001000010010000011000111101101110111010101100110110110010111111001100010100101001100010100000111000010011010010001100101100100100110010000101101001011110111011011100110010100110010000010000101101111001100001110110100111111100001101001110000010011011101001000

In [20]:
for idx in test_stream():
  conversation = ConversationsRepo.get(idx)
  # chat = Chat(Person('Person','A',18,'male','Damascus','a@gmail.com',{},[]),Person('Person','B',18,'female','Damascus','b@gmail.com',{},[]),lambda x: x)
  people = [Person('Alice','A',18,'male','Damascus','a@gmail.com',{},[]),Person('Bob','B',18,'female','Damascus','b@gmail.com',{},[])]
  estimated_length = sum(map(len,conversation))
  messages = [Message(people[i%2],m) for i,m in enumerate(conversation)]
  chat = Chat(people[0],people[1],lambda x : x)
  chat.messages = messages
  chat.render()
  payload = random_bit_stream(estimated_length * 2)
  for idx in range(len(messages)):
    messages[idx].text, payload = StegasusEncode(messages[idx].text,payload)
    print(messages[idx].text, payload)
  chat.render()
  break

--------------------- Chat ---------------------
Alice: ` Hi, did you know that 86% of the land in Nevada is owned by the Government?  If they were treated like US citizens, they would be forced to sell it to pay for their massive debts.`
Bob: ` Yes, the government does own 86% of Nevada.  Not only that, but did you hear that in 2006, the US passed legislation that would protect government employees from being prosecuted for violating the Geneva Convention?`
Alice: ` Why even have the Geneva convention?  Wow, some con man in France sold the Eiffel Tower twice!`
Bob: ` Yes, but on the better side, in 2008 the Norwegian government donated $1B to help save the Amazon Rainforest. There's still some good left in the world.`
Alice: ` That's cool of them.  That is a lot of money for Norway.  Meanwhile, the president of Indonesia released 4 pop albums. `
Bob: ` I think it was 3 pop albums but I could be wrong. I did hear that Wikileaks published the government's plans to destroy Wikileaks.`
Al

KeyboardInterrupt: 