# Data generation

In [1]:
import datetime
import os
import openai
import sys

from dotenv import load_dotenv

In [2]:
load_dotenv("azure.env")

# Azure Open AI
openai.api_type: str = "azure"
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("OPENAI_API_VERSION")

print("Open AI version:", openai.__version__)

Open AI version: 0.28.1


In [3]:
sys.version

'3.10.10 (main, Mar 21 2023, 18:45:11) [GCC 11.2.0]'

In [4]:
print("Today is:", datetime.datetime.today().strftime("%d-%b-%Y %H:%M:%S"))

Today is: 12-Oct-2023 14:59:01


In [5]:
def datagen(text):
    """
    Data generation with Azure Open AI
    """
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=4000,
        temperature=0.7,
    )

    result = response["choices"][0]["text"]

    return result

## Testing

In [6]:
prompt = "Create a three-column json file of 10 top 70's USA movies along with the year of release and the director."
print(datagen(prompt))



[
    {
        "Movie": "The Godfather",
        "Year": 1972,
        "Director": "Francis Ford Coppola"
    },
    {
        "Movie": "Star Wars",
        "Year": 1977,
        "Director": "George Lucas"
    },
    {
        "Movie": "Jaws",
        "Year": 1975,
        "Director": "Steven Spielberg"
    },
    {
        "Movie": "Rocky",
        "Year": 1976,
        "Director": "John G. Avildsen"
    },
    {
        "Movie": "The Exorcist",
        "Year": 1973,
        "Director": "William Friedkin"
    },
    {
        "Movie": "Taxi Driver",
        "Year": 1976,
        "Director": "Martin Scorsese"
    },
    {
        "Movie": "The French Connection",
        "Year": 1971,
        "Director": "William Friedkin"
    },
    {
        "Movie": "The Deer Hunter",
        "Year": 1978,
        "Director": "Michael Cimino"
    },
    {
        "Movie": "Close Encounters of the Third Kind",
        "Year": 1977,
        "Director": "Steven Spielberg"
    },
    {
        "Movie

In [7]:
prompt = "Create a text file of all pink floyd albums along with the year of release and the band."
print(datagen(prompt))



The Piper at the Gates of Dawn (1967) - Pink Floyd
A Saucerful of Secrets (1968) - Pink Floyd
More (1969) - Pink Floyd
Ummagumma (1969) - Pink Floyd
Atom Heart Mother (1970) - Pink Floyd
Meddle (1971) - Pink Floyd
Obscured by Clouds (1972) - Pink Floyd
The Dark Side of the Moon (1973) - Pink Floyd
Wish You Were Here (1975) - Pink Floyd
Animals (1977) - Pink Floyd
The Wall (1979) - Pink Floyd
The Final Cut (1983) - Pink Floyd
A Momentary Lapse of Reason (1987) - Pink Floyd
The Division Bell (1994) - Pink Floyd
The Endless River (2014) - Pink Floyd


In [8]:
prompt = "Create a XML file of all the Led Zeppelin records made between 1968 and 1975"
print(datagen(prompt))



<?xml version="1.0" encoding="UTF-8" ?>
<ledzeppelin>
	<albums>
		<album>
			<title>Led Zeppelin</title>
			<year>1968</year>
		</album>
		<album>
			<title>Led Zeppelin II</title>
			<year>1969</year>
		</album>
		<album>
			<title>Led Zeppelin III</title>
			<year>1970</year>
		</album>
		<album>
			<title>Led Zeppelin IV</title>
			<year>1971</year>
		</album>
		<album>
			<title>Houses of the Holy</title>
			<year>1973</year>
		</album>
		<album>
			<title>Physical Graffiti</title>
			<year>1975</year>
		</album>
		<album>
			<title>Presence</title>
			<year>1976</year>
		</album>
	</albums>
</ledzeppelin>


In [9]:
prompt = "Create a ; separated file of all Stephen King books with the published year"
print(datagen(prompt))



Carrie;1974
'Salem's Lot;1975
The Shining;1977
The Stand;1978
The Dead Zone;1979
Firestarter;1980
Cujo;1981
The Dark Tower: The Gunslinger;1982
Christine;1983
Pet Sematary;1983
Cycle of the Werewolf;1983
The Talisman;1984
It;1986
Misery;1987
The Tommyknockers;1987
The Dark Half;1989
Needful Things;1991
Gerald's Game;1992
Dolores Claiborne;1993
Insomnia;1994
Rose Madder;1995
Desperation;1996
The Green Mile;1996
Bag of Bones;1998
The Girl Who Loved Tom Gordon;1999
Dreamcatcher;2001
From a Buick 8;2002
The Dark Tower VII: The Dark Tower;2004
Cell;2006
Lisey's Story;2006
Duma Key;2008
Under the Dome;2009
11/22/63;2011
Joyland;2013
Doctor Sleep;2013
Revival;2014
Finders Keepers;2015
Sleeping Beauties;2017


In [10]:
prompt = "Create a json file of 5 Stephen King books with a summary"
print(datagen(prompt))



{
    "The Shining": {
        "Summary": "Jack Torrance, a struggling writer, takes a job as the winter caretaker of the Overlook Hotel in the Colorado Rockies and moves in with his wife and son. But the family soon learns that the hotel is haunted by a violent past.",
    },
    "It": {
        "Summary": "A group of seven outcast kids in Derry, Maine, are about to face their worst nightmare — an ancient, shape-shifting evil that emerges from the sewer every 27 years to prey on the town's children.",
    },
    "Carrie": {
        "Summary": "Carrie White is an outcast at school and tortured by her deeply religious mother at home. But when she discovers she has telekinetic powers, she begins to use them to exact revenge on her tormentors.",
    },
    "The Stand": {
        "Summary": "When a devastating plague wipes out most of the world's population, a group of survivors is forced to decide between good and evil as they battle for control of the remnants of civilization.",
    },

In [11]:
prompt = "Generate 10 lines of a csv fake file with some firstnames, lastnames, gender, date of birth, city, country"
print(datagen(prompt))



John,Smith,Male,02/04/1985,New York,USA
Maria,Garcia,Female,12/06/1979,Los Angeles,USA
James,Brown,Male,01/02/1980,Chicago,USA
Jessica,Davis,Female,03/09/1990,Houston,USA
Steven,Wilson,Male,08/05/1975,Philadelphia,USA
Sandra,Taylor,Female,07/10/1970,Phoenix,USA
Michael,Martin,Male,11/11/1995,San Antonio,USA
Anna,Anderson,Female,05/03/1985,San Diego,USA
David,Thompson,Male,04/07/1992,Dallas,USA
Elizabeth,Robinson,Female,09/12/1988,San Jose,USA
