Part 1: Prompt engineering

In [None]:
!pip install google-generativeai

In [None]:
#we will be exploring google's python wrapper around their ai API
import os
import google.generativeai as genai

#generate your own key on https://aistudio.google.com/apikey
gemini_api_key = "AIzaSyD9gOU2JVUl2dRXm2d_Ys3ADOvFt6ToNnk"

genai.configure(api_key=gemini_api_key)
multimodal_model = genai.GenerativeModel("gemini-1.5-flash-002")


In [None]:
#recall that text prompting and image prompting are both supported
model_response = multimodal_model.generate_content("hi, how was your day?")
model_response.text

In [None]:
from PIL import Image

img = Image.open("harrypotter.webp")
model_response = multimodal_model.generate_content(["how many male and female actors are in this image? return as list of two numbers: ", img])
model_response.text

In [None]:
#using your BERT sentiment analysis code from project 3, repeat the process with the uiuc dataset
from transformers import pipeline
pipe = pipeline("text-classification", model="finiteautomata/bertweet-base-sentiment-analysis")

In [None]:
import pandas as pd
data = pd.read_csv("uiuc.csv")
data['sentiment'] = data['text'].apply(lambda x: pipe(x[:100])[0]['label'])
most_common_sentiment = data['sentiment'].mode()[0]
print(f"The most common sentiment label is: {most_common_sentiment}")

In [None]:
#using the Gemini API, write a prompt to generate sentiment analysis on the same dataset

#make sure to includein the prompt a limit to the type of results (positive, negative, neutral)

#compare the sentiment percentages, what do you notice? Does one method overestimate or underestimate the sentiment of the dataset?

#find a few cases where their judgement differs, what do you think is the reason for the discrepancy? And which answer do you find more convincing?

In [None]:
# Generate sentiment analysis using Gemini API
gemini_sentiments = []
for text in data['text']:
    response = multimodal_model.generate_content(text)
    gemini_sentiments.append(response.candidates[0].content.parts[0].text)

data['gemini_sentiment'] = gemini_sentiments

bert_sentiment_counts = data['sentiment'].value_counts(normalize=True) * 100
gemini_sentiment_counts = data['gemini_sentiment'].value_counts(normalize=True) * 100

print("BERT Sentiment Percentages:")
print(bert_sentiment_counts)
print("\nGemini Sentiment Percentages:")
print(gemini_sentiment_counts)

In [None]:
discrepancies = data[data['sentiment'] != data['gemini_sentiment']]
print("\nDiscrepancies between BERT and Gemini sentiment analysis:")
print(discrepancies[['text', 'sentiment', 'gemini_sentiment']])

In [None]:
for index, row in discrepancies.iterrows():
    print(f"Text: {row['text']}")
    print(f"BERT Sentiment: {row['sentiment']}")
    print(f"Gemini Sentiment: {row['gemini_sentiment']}")
    print()

Part 2: images

In [None]:
#download 10 images from the internet with a feature you're interested in studying. e.g. gender, race, age, action, etc.

#ask the model to annotate the images with the features you're interested in studying

#choose 2 objective (clear right or wrong answer) questions and ask the model to answer them, like how many people are in the image, or what is the color of the object in the image

#choose 2 subjective (open to interpretation) questions and ask the model to answer them, like what is the mood of the person in the image or what race/gender is the person

#look through the responses. Is there anything you disagree with? What do you think is the reason for the discrepancy? Would you trust large scale results generated for this annotation? b

In [None]:
image_urls = [
    "https://www.alamy.com/stock-photo/gender-diverse-people.html", 
    "https://www.dreamstime.com/gender-equality-concept-man-woman-equal-balance-diversity-workplace-female-male-employee-having-equal-gender-image279217389",
    "https://stock.adobe.com/images/diversity-and-inclusion-illustration-depicting-drawings-of-people-of-different-gender-race-age-and-walks-of-life-ai-generated-image/564661401",
    "https://www.shutterstock.com/search/group-young-people-lgbtqia",
    "https://www.naesp.org/resource/5-tips-for-gender-inclusion/",
    "https://www.dreamstime.com/inclusiveness-diversity-equality-concept-abstract-modern-various-people-heads-gender-symbol-equal-sign-equally-raised-image230338972",
    "https://www.dreamstime.com/illustration-theme-gender-diversity-people-non-binary-gender-identity-transgender-people-vector-illustration-image237363156",
    "https://www.alamy.com/gender-neutral-concept-and-diversity-image548114115.html",
    "https://www.dreamstime.com/gender-equality-diversity-concept-vector-flat-illustration-blue-pink-human-heads-male-female-transgender-symbol-isolated-image214062541",
    "https://www.shutterstock.com/search/gender-equality-cartoon",
]

annotations = []
for img_url in image_urls:
    response = multimodal_model.generate_content(["Annotate the image with the features of gender: ", img_url])
    annotations.append(response.candidates[0].content.parts[0].text)

annotations

In [None]:
objective_questions = [
    "How many people are in the image?",
    "What is the color of the object in the image?"
]

objective_answers = []
for img_url in image_urls:
    for question in objective_questions:
        response = multimodal_model.generate_content([question, img_url])
        objective_answers.append(response.candidates[0].content.parts[0].text)

objective_answers

In [None]:
subjective_questions = [
    "What is the mood of the person in the image?",
    "What race/gender is the person?"
]

subjective_answers = []
for img_url in image_urls:
    for question in subjective_questions:
        response = multimodal_model.generate_content([question, img_url])
        subjective_answers.append(response.candidates[0].content.parts[0].text)

subjective_answers

In [None]:
print("Annotations:")
for annotation in annotations:
    print(annotation)

print("\nObjective Answers:")
for answer in objective_answers:
    print(answer)

print("\nSubjective Answers:")
for answer in subjective_answers:
    print(answer)

Part 3: Network Demo

In [None]:
!pip install networkx

In [None]:
import networkx as nx

In [None]:
#new graph
G = nx.Graph()

In [None]:
G.add_node(1)
G.add_nodes_from([2, 3])
#can add additional attributes to the nodes
G.add_nodes_from([(4, {"color": "red"}), (5, {"color": "green"})])

In [None]:
G.nodes[4]

In [None]:
list(G.nodes)

In [None]:
# can manually add edges too
G.add_edge(1, 2)


In [None]:
G.number_of_edges() 

In [None]:
#load edges from csv
import pandas as pd

edges = pd.read_csv("got-edges.csv")

G = nx.from_pandas_edgelist(edges, 'Source', 'Target')

In [None]:
G.number_of_edges()

In [None]:
#visualize the graph

import matplotlib.pyplot as plt

nx.draw(G, with_labels=True)

plt.show()




In [None]:
#calculate the density of the graph

nx.density(G)


In [None]:
#return highest degree nodes

sorted(G.degree, key=lambda x: x[1], reverse=True)

In [None]:

#make dataframes with nodes and a column for each centrality measure
df=pd.DataFrame(list(nx.degree_centrality(G).items()), columns=['node', 'degree'])
#add column for betweeness centrality
df['betweenness'] = list(nx.betweenness_centrality(G).values())
#add column for closeness centrality
df['closeness'] = list(nx.closeness_centrality(G).values())
#add column for eigenvector centrality
df['eigenvector'] = list(nx.eigenvector_centrality(G).values())





3a. explore this dataframe, are there huge differences between these types of centrality? What might cause this?

In [None]:
df

In [None]:
#calculate community structure
import networkx.algorithms.community as nxcom
communities = sorted(nxcom.greedy_modularity_communities(G), key=len, reverse=True)

#add community to node features

for i, community in enumerate(communities):
    for node in community:
        df.loc[df.node == node, "community"] = i

#color nodes by community
colors = df.community / df.community.max()

nx.draw(G, with_labels=True, node_color=colors, cmap=plt.cm.tab20)

plt.show()


Part 4: make your own social network. Take either a short excerpt of a novel, tv show, movie, or real life social network you are familiar with. Make a csv modelled off of the got-edges.csv with a Source, Target, and weight column. You need to decide what constitutes an edge and node, but easiest is characters or people connected by their number of interactions. You should manually type this into the csv. Include at least 25 edges

What kind of potential issues did you run into while converting it into a graph? Any ambiguities that made it difficult to decide? 

use either Gephi or NetworkX to calculate node centrality and community features and add a visualization of the graph here. Does it align with your understanding of the media? 

In [None]:
import csv

social_network_data = [
    ['Source', 'Target', 'Weight'],
    ['Olivia', 'Lydia', 5],
    ['Olivia', 'Mavis', 5],
    ['Olivia', 'Renee', 4],
    ['Olivia', 'Carol', 4],
    ['Olivia', 'David', 1],
    ['Olivia', 'Eugene', 5],
    ['Carol', 'Lydia', 3],
    ['Carol', 'Renee', 1],
    ['Lydia', 'Mavis', 4],
    ['Carol', 'Ivan', 2],
    ['Carol', 'Judy', 3],
    ['Olivia', 'Kayla', 1],
    ['Kayla', 'Niaj', 2],
    ['Renee', 'Olivia', 3],
    ['Olivia', 'Peggy', 1],
    ['Peggy', 'Sybil', 2],
    ['Peggy', 'Trent', 3],
    ['Trent', 'Victor', 1],
    ['Renee', 'Walter', 2],
    ['Kayla', 'Xander', 3],
    ['Olivia', 'Albert', 1],
    ['Albert', 'Zara', 2],
    ['Albert', 'Alice', 3],
    ['Alice', 'David', 1],
    ['Renee', 'Eve', 2]
]

with open('social_network.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(social_network_data)

In [None]:
social_edges = pd.read_csv('social_network.csv')
G_social = nx.from_pandas_edgelist(social_edges, 'Source', 'Target', ['Weight'])

df_social = pd.DataFrame(list(nx.degree_centrality(G_social).items()), columns=['node', 'degree'])
df_social['betweenness'] = list(nx.betweenness_centrality(G_social).values())
df_social['closeness'] = list(nx.closeness_centrality(G_social).values())
df_social['eigenvector'] = list(nx.eigenvector_centrality(G_social).values())

communities_social = sorted(nxcom.greedy_modularity_communities(G_social), key=len, reverse=True)
for i, community in enumerate(communities_social):
    for node in community:
        df_social.loc[df_social.node == node, "community"] = i

colors_social = df_social.community / df_social.community.max()
nx.draw(G_social, with_labels=True, node_color=colors_social, cmap=plt.cm.tab20)
plt.show()