## Setup

This script is modified based on Gemini's official api guide https://ai.google.dev/tutorials/python_quickstart

### Install the Python SDK

The Python SDK for the Gemini API, is contained in the [`google-generativeai`](https://pypi.org/project/google-generativeai/) package. Install the dependency using pip:

In [None]:
!pip install -q -U google-generativeai

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/137.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/137.4 kB[0m [31m1.3 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━[0m [32m92.2/137.4 kB[0m [31m1.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.4/137.4 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25h

### Import packages

Import the necessary packages.

In [None]:
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [None]:
# Used to securely store your API key
from google.colab import userdata

### Setup your API key

Before you can use the Gemini API, you must first obtain an API key. If you don't already have one, create a key with one click in Google AI Studio.

<a class="button button-primary" href="https://makersuite.google.com/app/apikey" target="_blank" rel="noopener noreferrer">Get an API key</a>

In Colab, add the key to the secrets manager under the "🔑" in the left panel. Give it the name `GOOGLE_API_KEY`.

Once you have the API key, pass it to the SDK. You can do this in two ways:

* Put the key in the `GOOGLE_API_KEY` environment variable (the SDK will automatically pick it up from there).
* Pass the key to `genai.configure(api_key=...)`

In [6]:
# Or use `os.getenv('GOOGLE_API_KEY')` to fetch an environment variable.
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

genai.configure(api_key=GOOGLE_API_KEY)

## Make queries


In [7]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-pro
models/gemini-pro-vision


We used the moset updated model that is available by 4/01/2024

In [8]:
model = genai.GenerativeModel('gemini-1.0-pro-001')

In [14]:
import os
import shutil
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
Dir_project = '/content/drive/MyDrive/TwoWordTest'
path = os.path.join(Dir_project,'Stim_2160_shuffle10.csv')
df_data = pd.read_csv(path)
df_data=df_data.loc[df_data.shuffleID!=10]
df_data

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,Idx,promptID,idxInPrompt,Stim,shuffleID,shuffleSeed,rand
0,1,1,1,the truck police.,1,3583.0,0.000215
1,2,1,2,the cage rib.,1,3583.0,0.000287
2,3,1,3,the holiday summer.,1,3583.0,0.000488
3,4,1,4,the soap bottle.,1,3583.0,0.001347
4,5,1,5,the radio prison.,1,3583.0,0.001542
...,...,...,...,...,...,...,...
23755,2156,8,159,the goat cheese.,0,,
23756,2157,8,160,the sand beach.,0,,
23757,2158,8,161,the mouth pig.,0,,
23758,2159,8,162,the field flower.,0,,


#### Gemini does not perform well when the input text is very long (has 300 phrases), it will stop to make judgments after the first several phrases. So for the phrases fold having 300 phrases, we split them into two folds (150 each)

In [15]:
dfs = []
df_data['promptID_gemini'] = df_data['promptID']
for shuffleID in df_data.shuffleID.unique():
  for promptID in df_data.promptID.unique():
    df = df_data.loc[(df_data.promptID==promptID)&(df_data.shuffleID==shuffleID)].reset_index(drop=True)
    if df.shape[0] == 300:
      df.loc[df['idxInPrompt']<=150,'promptID_gemini'] = df.loc[df['idxInPrompt']<=150,'promptID_gemini'].values *10+1
      df.loc[df['idxInPrompt']>150,'promptID_gemini'] = df.loc[df['idxInPrompt']>150,'promptID_gemini'].values *10+2
    dfs.append(df)
dfs = pd.concat(dfs,ignore_index=True)
dfs

Unnamed: 0,Idx,promptID,idxInPrompt,Stim,shuffleID,shuffleSeed,rand,promptID_gemini
0,1,1,1,the truck police.,1,3583.0,0.000215,11
1,2,1,2,the cage rib.,1,3583.0,0.000287,11
2,3,1,3,the holiday summer.,1,3583.0,0.000488,11
3,4,1,4,the soap bottle.,1,3583.0,0.001347,11
4,5,1,5,the radio prison.,1,3583.0,0.001542,11
...,...,...,...,...,...,...,...,...
21595,2156,8,159,the goat cheese.,0,,,8
21596,2157,8,160,the sand beach.,0,,,8
21597,2158,8,161,the mouth pig.,0,,,8
21598,2159,8,162,the field flower.,0,,,8


In [16]:
tag = 'discrete' # change to 'continuous' to get continuous judgments
instruct_txt = os.path.join(Dir_project,f'instruction_{tag}.txt')
model_name = 'gemini-1.0-pro-001'

with open(instruct_txt) as f:
    instruct = f.readlines()

Dir_output = os.path.join(Dir_project,'results',tag,model_name)
if not os.path.exists(Dir_output):
  os.makedirs(Dir_output)

instruct[0]

"Read each phrase, then judge how meaningful it is as a single concept using a 'makes sense' or 'nonsense' response. If the phrase does not make sense as a single concept, the appropriate answer is 'nonsense'. If the phrase makes sense as a single concept, the appropriate answer is 'makes sense'. For example, 'the goat sky' is 'nonsense'. 'The knife army' is 'nonsense'. 'The soap bubble' is 'makes sense'. 'The computer programmer' is 'makes sense'. Give an answer for each phrase and respond in the format of 'phrase: response'"

In [None]:
for idx_shuffle in range(10):
    df_shuffle = dfs.loc[dfs.shuffleID == idx_shuffle].reset_index(drop=True)
    for promptID in df_shuffle.promptID_gemini.unique():
      output_txt = os.path.join(Dir_output,f'{model_name}_{tag}_shuffle{idx_shuffle}_prompt{promptID}.txt')
      if not os.path.exists(output_txt):
        df_prompt = df_shuffle.loc[df_shuffle.promptID_gemini == promptID].reset_index(drop=True)
        lines = '\n'.join(df_prompt.Stim.values)
        lines = instruct[0]+'\n\n'+lines

        response = model.generate_content(lines)

        output = [response.text]
        with open(output_txt, 'a') as f:
            for line in output:
                f.write(line)
        print(idx_shuffle,promptID)

0 11
0 12
0 2
0 31
0 32
0 41
0 42
0 51
0 52
0 61
0 62
0 71
0 72
0 8
1 11
1 12
1 2
1 31
1 32
1 41
1 42
1 51
1 52
1 61
1 62
1 71
1 72
1 8
2 11
2 12
2 2
2 31
2 32
2 41
2 42
2 51
2 52
2 61
2 62
2 71
2 72
2 8
3 11
3 12
3 2
3 31
3 32
3 41
3 42
3 51
3 52
3 61
3 62
3 71
3 72
3 8
4 11
4 12
4 2
4 31
4 32
4 41
4 42
4 51
4 52
4 61
4 62
4 71
4 72
4 8
5 11
5 12


ERROR:tornado.access:503 POST /v1beta/models/gemini-1.0-pro-001:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 889.27ms


5 2
5 31
5 32
5 41
5 42
5 51
5 52
5 61
5 62
5 71
5 72
5 8
6 11
6 12
6 2
6 31
6 32
6 41
6 42
6 51
6 52
6 61
6 62
6 71


ERROR:tornado.access:503 POST /v1beta/models/gemini-1.0-pro-001:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 737.32ms


6 72
6 8
7 11
7 12
7 2
7 31
7 32
7 41
7 42
7 51
7 52
7 61
7 62
7 71
7 72
7 8
8 11
8 12
8 2
8 31
8 32
8 41
8 42
8 51
8 52
8 61
8 62
8 71
8 72
8 8
9 11
9 12
9 2
9 31
9 32
9 41
9 42
9 51
9 52
9 61
9 62
9 71
9 72
9 8
