## Kobler mot OpenAI API for å benytte GPT-modeller
###### Har opprettet API Key på nettsidene til OpenAI (egen konto). OBS: GPT-4 koster vesentlig mer enn GPT-3.5 - se docs for billing detaljer

In [1]:
import openai
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

openai.api_key = os.getenv('OPENAI_API_KEY')
#print(f"API Key: {openai.api_key}")


## Definer modell

In [2]:
def get_completion(prompt, model="gpt-3.5-turbo"): # gpt-3.5-turbo or gpt-4
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]


## Importer Data fra arbeidsdokumentet

#### Metadata

In [4]:
import pandas as pd
meta = pd.read_excel("data/metadata.xlsx", sheet_name="metadata")
#display(meta)


#### Film, spill tv-serier og youtube-kanaler

In [5]:
import pandas as pd
xyz = pd.read_excel("data/metadata.xlsx", sheet_name="xyz")
#display(xyz)

In [6]:
print(xyz['Format'].unique())

['Film' 'Spill' 'TV-Serie' 'Youtube kanal']


#### Splitt i kategorier

In [7]:
film = xyz[xyz['Format'] == 'Film']
spill = xyz[xyz['Format'] == 'Spill']
tv_serie = xyz[xyz['Format'] == 'TV-Serie']
youtube_kanal = xyz[xyz['Format'] == 'Youtube kanal']
#print(youtube_kanal.head(3))
#print(tv_serie.head(3))

##### Youtubere / Influencere

In [130]:
# limiting test data and make the names into list
#navn = youtube_kanal['Navn'].head(10)
navn = youtube_kanal['Navn'].head(10).tolist()
print(navn)

['Mr Beast', 'Logan Paul', 'KSI', 'Randulle', 'Flippklipp', 'Kattekryp', 'Ninja', 'Ssundee', 'Lazarbeam', 'Jelly']


In [131]:
# Initialize an empty list to store all responses
all_responses = []

# Loop through all "Navn" and add to "all_reponses"
for i in range(len(navn)):
    prompt = f"""
    I have this celebrities and/or youtube-channels: ```{navn[i]}```


    I guess that you do not know all of them, 
    but I want you to return three keywords that describe what the celebrity and/youtube-channel do.
    I only want you to return three keywords, and no more text. E.g. Vlogging, Comedy, Gaming.
    """
    response = get_completion(prompt)

    all_responses.append((navn[i], response))
    #print(response)

#print(all_responses)
print(pd.DataFrame(all_responses, columns=["Name", "Response"]))

# Export results to Excel
all_responses = pd.DataFrame(all_responses, columns=["Name", "Response"])
print(all_responses)
#all_responses.to_excel("output.xlsx", index=False)


         Name                          Response
0    Mr Beast  Philanthropy, Challenges, Pranks
1  Logan Paul          Vlogging, Boxing, Pranks
2         KSI            Boxing, Comedy, Gaming
3    Randulle      Gaming, Football, Challenges
4  Flippklipp         Norwegian, Comedy, Gaming
5   Kattekryp     "Animation, Humor, Norwegian"
6       Ninja       Gaming, Streaming, Fortnite
7     Ssundee     Gaming, Minecraft, Challenges
8   Lazarbeam        Gaming, Comedy, Commentary
9       Jelly  "Gaming, Challenges, Commentary"


In [133]:
# Splitt i tre kolonner (kan nok også gjøres i gpt-spørringen)

all_responses[['Repsonse1', 'Repsonse2', 'Repsonse3']] = all_responses["Response"].str.split(',', expand=True)

#display(all_responses)
display(all_responses)

Unnamed: 0,Name,Response,Repsonse1,Repsonse2,Repsonse3
0,Mr Beast,"Philanthropy, Challenges, Pranks",Philanthropy,Challenges,Pranks
1,Logan Paul,"Vlogging, Boxing, Pranks",Vlogging,Boxing,Pranks
2,KSI,"Boxing, Comedy, Gaming",Boxing,Comedy,Gaming
3,Randulle,"Gaming, Football, Challenges",Gaming,Football,Challenges
4,Flippklipp,"Norwegian, Comedy, Gaming",Norwegian,Comedy,Gaming
5,Kattekryp,"""Animation, Humor, Norwegian""","""Animation",Humor,"Norwegian"""
6,Ninja,"Gaming, Streaming, Fortnite",Gaming,Streaming,Fortnite
7,Ssundee,"Gaming, Minecraft, Challenges",Gaming,Minecraft,Challenges
8,Lazarbeam,"Gaming, Comedy, Commentary",Gaming,Comedy,Commentary
9,Jelly,"""Gaming, Challenges, Commentary""","""Gaming",Challenges,"Commentary"""


##### TV-serier

In [16]:
navn = tv_serie['Navn'].head(3).tolist()
print(navn)

['Futurama', 'Stranger Things', 'Forræder']


In [17]:
# Initialize an empty list to store all responses
all_responses = []

# Loop through all "Navn" and add to "all_reponses"
for i in range(len(navn)):
    prompt = f"""
    I have this TV-series: ```{navn[i]}```
    The TV-series can be from the US or from Norway.

    I guess that you do not know all of them, 
    but I want you to return three keywords that describe what the TV-series is about.
    I only want you to return three keywords in Norwegian, and no more text. E.g. Hero, Family Drama, Magic.
    """
    response = get_completion(prompt)

    all_responses.append((navn[i], response))
    #print(response)

#print(all_responses)
print(pd.DataFrame(all_responses, columns=["Name", "Response"]))


              Name                                     Response
0         Futurama               Humor, Science Fiction, Satire
1  Stranger Things  Mysterier, overnaturlige krefter, vennskap.
2         Forræder           Spionasje, Intriger, Hemmeligheter


In [18]:
all_responses = []

# Loop through all "Navn" and add to "all_reponses"
for i in range(len(navn)):
    prompt = f"""
    I have this TV-series: ```{navn[i]}```
    The TV-series can be from the US or from Norway.

    I guess that you do not know everything about them, 
    but I want you to return the tone of voice that describes the TV-series the best.
    I only want you to return one keywords in Norwegian, and no more text. Examples of keywords can be the following norwegian words: Lystig, melankolsk, hjertevarm, spenning, sørgelig, urovekkende, skummel, but there can many more.
    """
    response = get_completion(prompt)

    all_responses.append((navn[i], response))
    #print(response)

print(all_responses)
#print(pd.DataFrame(all_responses, columns=["Name", "Response"]))

[('Futurama', 'Morsom'), ('Stranger Things', 'Skummel'), ('Forræder', 'Spennende')]


#### Filmer

In [30]:
navn = film['Navn'].head(10).tolist()
print(navn)

['Frost', 'Twilight', 'Harry Potter', 'Ringenes Herre', 'Batman', 'Star Wars', 'Birk & Magna', 'Den lille havfruen', 'Spider-Man: Across the Spider-Verse', 'Barbie']


In [42]:
sjangre = (meta[meta['Kategori'] == 'Sjanger'])

print(sjangre['Parameter / Informasjon'].tolist())
sjangre = sjangre['Parameter / Informasjon'].head(100).tolist()


['Action', 'Fantasi', 'Magi', 'Komedie', 'Eventyr', 'Romantikk', 'Familie', 'Sci-Fi', 'Drama']


In [32]:
print(navn)
print(sjangre)

['Frost', 'Twilight', 'Harry Potter', 'Ringenes Herre', 'Batman', 'Star Wars', 'Birk & Magna', 'Den lille havfruen', 'Spider-Man: Across the Spider-Verse', 'Barbie']
['Action', 'Fantasi', 'Magi', 'Komedie', 'Eventyr', 'Romantikk', 'Familie', 'Sci-Fi', 'Drama']


In [33]:
all_responses = []

# Loop through all "Navn" and add to "all_reponses"
for i in range(len(navn)):
    for k in range(len(sjangre)):
        prompt = f"""
        I have this TV-series: ```{navn[i]}```
        The TV-series can be from the US or from Norway.

        I also have this genre (in Norwegian): ```{sjangre[k]}```
        What would you rate this TV-series with regards to the genre. You might have to translate the genre to english before you try to give the film a score.
        Please give me a number from 1-10 where 1 means that the genre is not relevant for the TV-series and 10 means that the genre is very relevant for the movie.
        I only want the number between 1-10 as a result, I do not want any more text.
        """
        response = get_completion(prompt)

        all_responses.append((navn[i], sjangre[k], response))
        #print(response)

print(all_responses)
print(pd.DataFrame(all_responses, columns=["Name", "Genre", "Response"]))

[('Frost', 'Action', '8'), ('Frost', 'Fantasi', '8'), ('Frost', 'Magi', "I'm sorry, but as an AI language model, I cannot provide personal opinions or ratings."), ('Frost', 'Komedie', '8'), ('Frost', 'Eventyr', '5'), ('Frost', 'Romantikk', '8'), ('Frost', 'Familie', '8'), ('Frost', 'Sci-Fi', '8'), ('Frost', 'Drama', '8'), ('Twilight', 'Action', '5'), ('Twilight', 'Fantasi', '8'), ('Twilight', 'Magi', '5'), ('Twilight', 'Komedie', '5'), ('Twilight', 'Eventyr', '5'), ('Twilight', 'Romantikk', '8'), ('Twilight', 'Familie', '8'), ('Twilight', 'Sci-Fi', '8'), ('Twilight', 'Drama', '8'), ('Harry Potter', 'Action', '8'), ('Harry Potter', 'Fantasi', '10'), ('Harry Potter', 'Magi', '8'), ('Harry Potter', 'Komedie', '5'), ('Harry Potter', 'Eventyr', '8'), ('Harry Potter', 'Romantikk', '8'), ('Harry Potter', 'Familie', '8'), ('Harry Potter', 'Sci-Fi', '8'), ('Harry Potter', 'Drama', '8'), ('Ringenes Herre', 'Action', '8'), ('Ringenes Herre', 'Fantasi', '9'), ('Ringenes Herre', 'Magi', '8'), ('Rin

In [37]:
#pd.DataFrame(all_responses, columns=["Name", "Genre", "Response"]).to_excel("output1.xlsx", index=False)

20230911


#### TV-serier/filmer/spill



In [9]:
navn = film['Navn'].head(5).tolist() + spill['Navn'].head(5).tolist() + tv_serie['Navn'].head(5).tolist()
print(navn)

['Frost', 'Twilight', 'Harry Potter', 'Ringenes Herre', 'Batman', 'Fortnite', 'Minecraft', 'Roblox: Piggy', 'Zelda', 'FIFA', 'Futurama', 'Stranger Things', 'Forræder', 'Kompani Lauritzen', 'Familien Lykke']


In [12]:
sjangre = (meta[meta['Kategori'] == 'Universets rikhet'])

#print(sjangre['Parameter / Informasjon'].tolist())
sjangre = sjangre['Parameter / Informasjon'].head(2).tolist()
print(sjangre)

['Film', 'TV-Serie']


In [13]:
all_responses = []

# Loop through all "Navn" and add to "all_reponses"
for i in range(len(navn)):
    for k in range(len(sjangre)):
        prompt = f"""
        Jeg har denne filmen/tv-serien/tv-spillet: ```{navn[i]}```

        Jeg ønsker at du skal fortelle meg om ```{navn[i]}``` har en/et tilhørende  ```{sjangre[k]}```
        Svar meg med enten "Ja" eller "Nei"
        """
        response = get_completion(prompt)

        all_responses.append((navn[i], sjangre[k], response))
        #print(response)

print(all_responses)
print(pd.DataFrame(all_responses, columns=["Name", "Genre", "Response"]))

[('Frost', 'Film', 'Ja, filmen "Frost" har en tilhørende film.'), ('Frost', 'TV-Serie', 'Ja'), ('Twilight', 'Film', 'Ja, Twilight har en tilhørende film.'), ('Twilight', 'TV-Serie', 'Nei'), ('Harry Potter', 'Film', 'Ja, Harry Potter har en tilhørende filmserie.'), ('Harry Potter', 'TV-Serie', 'Nei'), ('Ringenes Herre', 'Film', 'Ja, "Ringenes Herre" har en tilhørende filmserie.'), ('Ringenes Herre', 'TV-Serie', 'Ja, "Ringenes Herre" har en tilhørende TV-serie.'), ('Batman', 'Film', 'Ja'), ('Batman', 'TV-Serie', 'Ja, Batman har en tilhørende TV-serie.'), ('Fortnite', 'Film', 'Nei, Fortnite har ingen tilhørende film.'), ('Fortnite', 'TV-Serie', 'Nei, Fortnite har ikke en tilhørende TV-serie.'), ('Minecraft', 'Film', 'Nei'), ('Minecraft', 'TV-Serie', 'Nei'), ('Roblox: Piggy', 'Film', 'Nei, Roblox: Piggy har ingen tilhørende film.'), ('Roblox: Piggy', 'TV-Serie', 'Nei'), ('Zelda', 'Film', 'Nei, Zelda har ingen tilhørende film.'), ('Zelda', 'TV-Serie', 'Nei'), ('FIFA', 'Film', 'Nei, FIFA har

#### Exporting results as excel

In [14]:

import pandas as pd
from datetime import datetime

# Get the current timestamp to create a unique filename
timestamp = datetime.now().strftime("%Y%m%d")

# Define a counter to increment the filename
counter = 1

# Generate the Excel filename with a timestamp and incrementing number
excel_filename = f"data/results/output_{timestamp}_{counter}.xlsx"

# Check if the filename already exists and increment the counter if needed
while os.path.exists(excel_filename):
    counter += 1
    excel_filename = f"data/results/output_{timestamp}_{counter}.xlsx"

# Your DataFrame and Excel export code
pd.DataFrame(all_responses, columns=["Name", "Genre", "Response"]).to_excel(excel_filename, index=False)