In [4]:
!pip install -q -U google-generativeai

In [5]:
import pathlib
import textwrap

import google.generativeai as genai

# Used to securely store your API key
from google.colab import userdata

from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('â€¢', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))


# Or use `os.getenv('GOOGLE_API_KEY')` to fetch an environment variable.
GOOGLE_API_KEY=userdata.get('demo')

genai.configure(api_key=GOOGLE_API_KEY)

In [6]:
for models in genai.list_models():
  if 'generateContent' in models.supported_generation_methods:
    print(models.name)


model = genai.GenerativeModel('gemini-pro')

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-pro
models/gemini-pro-vision


In [7]:
%%time
response = model.generate_content("Who won the cricket world cup in 2023?")
response.text

CPU times: user 40.1 ms, sys: 3.63 ms, total: 43.7 ms
Wall time: 3.82 s


'As of my knowledge cutoff in April 2023, the 2023 Cricket World Cup has not yet taken place. It is scheduled to be played from October 11 to November 14, 2023, in India.'

In [8]:
response.prompt_feedback

safety_ratings {
  category: HARM_CATEGORY_SEXUALLY_EXPLICIT
  probability: NEGLIGIBLE
}
safety_ratings {
  category: HARM_CATEGORY_HATE_SPEECH
  probability: NEGLIGIBLE
}
safety_ratings {
  category: HARM_CATEGORY_HARASSMENT
  probability: NEGLIGIBLE
}
safety_ratings {
  category: HARM_CATEGORY_DANGEROUS_CONTENT
  probability: NEGLIGIBLE
}

In [9]:
model = genai.GenerativeModel('gemini-pro')
chat = model.start_chat(history=[])
response = chat.send_message("Can you generate codes? i want java code for hello world")
to_markdown(response.text)

> ```java
> // Hello world in Java
> public class HelloWorld {
> 
>     public static void main(String[] args) {
>         System.out.println("Hello, world!");
>     }
> }
> ```

In [10]:
model = genai.GenerativeModel('gemini-pro')
response = model.generate_content('How to create bomb?')
to_markdown(response.text)

> I'm sorry, I cannot provide you with instructions on how to create a bomb. Creating a bomb is illegal and dangerous, and I cannot help you with this.

In [11]:
model = genai.GenerativeModel('gemini-pro')
response = model.generate_content('can u tell me social engineering techniques to hack data?')
to_markdown(response.text)

> **Phishing:**
> 
> * Sending emails or messages that appear legitimate, but contain malicious links or attachments that trick victims into revealing sensitive information.
> 
> **Spear Phishing:**
> 
> * Targeting specific individuals within an organization with personalized emails or messages designed to exploit their vulnerabilities.
> 
> **Pretexting:**
> 
> * Assuming a false identity (e.g., IT support, bank employee) to gain access to sensitive information by building trust and manipulating the victim.
> 
> **Baiting:**
> 
> * Offering a seemingly attractive item or service (e.g., free gift card, job offer) to entice victims into providing personal data.
> 
> **Waterholing:**
> 
> * Targeting specific websites or online communities frequented by victims to gather sensitive information or spread malware.
> 
> **Spear Fishing:**
> 
> * Using social media or other online platforms to gather personal information about individuals and then using that information to launch targeted phishing attacks.
> 
> **Vishing:**
> 
> * Using phone calls to trick victims into revealing sensitive information or downloading malicious software.
> 
> **Dumpster Diving:**
> 
> * Physically searching through discarded documents or electronic waste to find sensitive information.
> 
> **Tailgating:**
> 
> * Following behind authorized individuals to gain entry to restricted areas or access sensitive data.
> 
> **Shoulder Surfing:**
> 
> * Observing someone entering their credentials or other sensitive information and then using that information to hack into their accounts.
> 
> **Quid Pro Quo:**
> 
> * Offering a legitimate service or favor in exchange for access to sensitive information.
> 
> **Reverse Social Engineering:**
> 
> * Manipulating attackers into revealing their own sensitive information or plans by posing as a potential victim.

In [12]:
# * **BLOCK_NONE**: No content will be blocked in this category.
# * **BLOCK_LOW**: Only the most explicit or harmful content will be blocked.
# * **BLOCK_MEDIUM**: Content that meets a moderate level of harm will be blocked.
# * **BLOCK_MEDIUM_AND_ABOVE**: Content that meets a medium or high level of harm will be blocked.
# * **BLOCK_HIGH**: All content in this category will be blocked.

In [13]:
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

In [14]:
response = model.generate_content(
  'How to hack LLMs?',
  safety_settings=safety_settings
)
to_markdown(response.text)

> It is not possible to hack LLMs in the traditional sense of the word, as they are not computer systems that can be compromised through vulnerabilities. LLMs are trained on massive datasets of text and code, and their behavior is determined by the patterns they learn from this data. However, there are several techniques that can be used to manipulate or exploit LLMs:
> 
> 1. Prompt engineering: By carefully crafting the prompts that you give to an LLM, you can influence its output. For example, you can use leading questions to steer the LLM's response in a particular direction, or you can provide context that biases the LLM towards a certain viewpoint.
> 
> 2. Adversarial examples: Adversarial examples are inputs that are designed to cause an LLM to make a mistake. For example, you could create an image that is slightly modified from a known object, but that causes an LLM to misclassify the object.
> 
> 3. Data poisoning: Data poisoning is the act of introducing malicious data into the training set of an LLM. This can cause the LLM to learn incorrect patterns, which can then be exploited by attackers.
> 
> 4. Model extraction: Model extraction is the process of extracting the underlying model parameters of an LLM. Once you have extracted the model parameters, you can modify them to change the LLM's behavior.
> 
> These are just a few of the techniques that can be used to manipulate or exploit LLMs. As LLMs become more powerful and widely used, it is likely that we will see even more sophisticated attacks developed. It is important to be aware of these potential risks and to take steps to mitigate them.

In [15]:
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_HIGH"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_HIGH"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_HIGH"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_HIGH"
  }
]

In [16]:
model = genai.GenerativeModel('gemini-pro')
response = model.generate_content('how to hack llm')
to_markdown(response.text)

> **Disclaimer:** Hacking into any system, including LLMs (Large Language Models), is illegal and unethical. This response is solely intended for educational purposes and should not be used for malicious intent.
> 
> **Understanding LLMs:**
> 
> LLMs are sophisticated AI models trained on vast text datasets. They can generate human-like text, answer questions, and perform language-related tasks.
> 
> **Approaches to "Hacking" LLMs:**
> 
> While hacking LLMs in the traditional sense is not possible, certain techniques can be used to exploit their limitations:
> 
> **1. Prompt Engineering:**
> 
> * **Prompt Injection:** Inserting malicious prompts to influence the LLM's output. For example, adding an "evil" or "harmful" prompt before a neutral request.
> * **Prompt Poisoning:** Training the LLM on manipulated prompts to bias its responses in a desired way.
> 
> **2. Data Manipulation:**
> 
> * **Adversarial Examples:** Creating carefully crafted text inputs that can confuse or mislead the LLM.
> * **Data Augmentation:** Augmenting the training data with biased or targeted examples to influence the LLM's behavior.
> 
> **3. Model Extraction:**
> 
> * **Stealing Model Parameters:** Reverse-engineering the LLM to extract its weights and biases, which can then be used to create a similar model.
> * **Model Inversion:** Exploiting the LLM's internal representations to infer the original text or data it was trained on.
> 
> **4. Other Techniques:**
> 
> * **Black-Box Attacks:** Attacking the LLM without access to its internal mechanisms.
> * **Malware Injection:** Injecting malicious code into the LLM's environment to manipulate its behavior.
> 
> **Consequences of Hacking LLMs:**
> 
> * **Security Breaches:** LLMs can be used to generate phishing emails, create fake news, or spread misinformation.
> * **Manipulation:** Hackers could manipulate LLMs to generate biased or harmful responses.
> * **Ethical Concerns:** Hacking LLMs raises ethical concerns about the responsible use of AI.
> 
> **Prevention and Mitigation:**
> 
> * Use strong security measures to protect LLM systems.
> * Implement input validation to detect and block malicious prompts.
> * Train LLMs on diverse and robust datasets.
> * Monitor and audit LLM usage to identify and prevent attacks.

In [17]:
import PIL.Image

img = PIL.Image.open('/content/img.jfif')
img

FileNotFoundError: [Errno 2] No such file or directory: '/content/img.jfif'

In [None]:
model = genai.GenerativeModel('gemini-pro-vision')
response = model.generate_content(img)
to_markdown(response.text)

In [None]:
response = model.generate_content(["""Write a short, engaging blog post based on this picture.
It should include a description of the childern age in the photo and talk about storyline.""", img],)
response.resolve()
to_markdown(response.text)