# HYG full database

In [2]:
import json
import pandas as pd
import os
from openai import OpenAI
from dotenv import load_dotenv

## Data Preprocessing

In [33]:
dataframe = pd.read_csv("hygfull.csv")
dataframe = dataframe[dataframe["ProperName"].notnull()]
dataframe = dataframe.loc[dataframe['ProperName'].str.strip() != ""]

dataframe.info()

<class 'pandas.core.frame.DataFrame'>
Index: 86 entries, 494 to 82775
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   StarID          86 non-null     int64  
 1   Hip             86 non-null     int64  
 2   HD              86 non-null     object 
 3   HR              76 non-null     object 
 4   Gliese          39 non-null     object 
 5   BayerFlamsteed  76 non-null     object 
 6   ProperName      86 non-null     object 
 7   RA              86 non-null     float64
 8   Dec             86 non-null     float64
 9   Distance        86 non-null     float64
 10  Mag             86 non-null     float64
 11  AbsMag          86 non-null     float64
 12  Spectrum        86 non-null     object 
 13  ColorIndex      86 non-null     object 
dtypes: float64(5), int64(2), object(7)
memory usage: 10.1+ KB


In [34]:
def trim_float(value, decimals=2):
    return round(value, decimals)

# Apply custom formatting function to float columns
dataframe['Distance'] = dataframe['Distance'].map(lambda x: trim_float(x, 2))
dataframe['AbsMag'] = dataframe['AbsMag'].map(lambda x: trim_float(x, 2))

dataframe.dropna(axis=1, how='any', inplace=True)
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
Index: 86 entries, 494 to 82775
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   StarID      86 non-null     int64  
 1   Hip         86 non-null     int64  
 2   HD          86 non-null     object 
 3   ProperName  86 non-null     object 
 4   RA          86 non-null     float64
 5   Dec         86 non-null     float64
 6   Distance    86 non-null     float64
 7   Mag         86 non-null     float64
 8   AbsMag      86 non-null     float64
 9   Spectrum    86 non-null     object 
 10  ColorIndex  86 non-null     object 
dtypes: float64(5), int64(2), object(4)
memory usage: 8.1+ KB


In [35]:
json_data = dataframe.to_dict(orient='records')
json_data

[{'StarID': 495,
  'Hip': 677,
  'HD': '   358',
  'ProperName': 'Alpheratz',
  'RA': 0.13976888,
  'Dec': 29.09082805,
  'Distance': 29.76,
  'Mag': 2.07,
  'AbsMag': -0.3,
  'Spectrum': 'B9p         ',
  'ColorIndex': '-0.038'},
 {'StarID': 544,
  'Hip': 746,
  'HD': '   432',
  'ProperName': 'Caph',
  'RA': 0.15280269,
  'Dec': 59.15021814,
  'Distance': 16.7,
  'Mag': 2.28,
  'AbsMag': 1.17,
  'Spectrum': 'F2III-IV    ',
  'ColorIndex': ' 0.380'},
 {'StarID': 782,
  'Hip': 1067,
  'HD': '   886',
  'ProperName': 'Algenib',
  'RA': 0.22059721,
  'Dec': 15.18361593,
  'Distance': 102.15,
  'Mag': 2.83,
  'AbsMag': -2.22,
  'Spectrum': 'B2IV        ',
  'ColorIndex': '-0.190'},
 {'StarID': 1489,
  'Hip': 2081,
  'HD': '  2261',
  'ProperName': 'Ankaa',
  'RA': 0.43801871,
  'Dec': -42.30512197,
  'Distance': 23.73,
  'Mag': 2.4,
  'AbsMag': 0.52,
  'Spectrum': 'K0III...    ',
  'ColorIndex': ' 1.083'},
 {'StarID': 2243,
  'Hip': 3179,
  'HD': '  3712',
  'ProperName': 'Shedir',
  'RA'

## Detail generation

In [36]:
load_dotenv()
client = OpenAI(
    api_key=os.getenv("CHATGPT_API_KEY"),
)

for entry in json_data:
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Write a very short informative paragraph over the star named {entry["ProperName"]}",
            }
        ],
        model="gpt-3.5-turbo",
    )
    answer = chat_completion.choices[0].message.content
    entry["GeneralInformation"] = answer
    print(answer)

Alpheratz, also known as Alpha Andromedae, is a binary star system located approximately 97 light-years away from Earth in the constellation of Andromeda. It is the brightest star in the constellation and is visible to the naked eye in the night sky. Alpheratz consists of two stars that orbit each other closely, with the larger star being a white giant and the smaller star being a hot blue-white subgiant. This intriguing binary star system holds great scientific interest for astronomers studying stellar evolution and dynamics.
Caph is a binary star system located in the constellation Cassiopeia. It is approximately 54 light-years away from Earth and consists of two stars that orbit around a common center of mass. The primary star is a yellow-white subgiant, while the secondary star is a yellow dwarf. Caph is visible to the naked eye and is one of the brightest stars in the night sky.
Algenib, also known as Gamma Pegasi, is a blue-white giant star located in the constellation of Pegasus

## Price function

In [32]:
import random
json_data = {}
with open('hygfull.json', 'r', encoding="utf-8") as file:
    json_data = json.load(file)

for entry in json_data:
    dist = entry["Distance"]
    mag = entry["Mag"]

    entry["Price"] = int(((10**4)*(abs(mag*100)/(dist)))**(1.1))

with open('hygfull.json', 'w', encoding="utf-8") as file:
    x = json.dumps(json_data, indent=4)
    file.write(x)

## Assign Colors

In [9]:
import random
load_dotenv()
client = OpenAI(
    api_key=os.getenv("CHATGPT_API_KEY"),
)
    
json_data = {}
with open('hygfull.json', 'r', encoding="utf-8") as file:
    json_data = json.load(file)

for entry in json_data:
    answer =""
    colors = ["yellowWhiteStar", "yellowStar", "whiteStar", "orangeStar", "orangeRedStar", "blueStar", "blueWhiteStar"]
    while answer not in colors:
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": f"reply only the most fitting value of the following {colors} it should fit to the knwon star '{entry["ProperName"]}' reply with a simple string only without quotations",
                }
            ],
            model="gpt-3.5-turbo",
        )
        
        
        answer = chat_completion.choices[0].message.content
    entry["Color"] = answer
    print(answer)

with open('hygfull.json', 'w', encoding="utf-8") as file:
    x = json.dumps(json_data, indent=4)
    file.write(x)

blueWhiteStar
blueStar
blueWhiteStar
yellowStar
orangeStar
orangeRedStar
blueStar
blueStar
orangeRedStar
blueWhiteStar
blueWhiteStar
orangeRedStar
yellowWhiteStar
yellowStar
orangeRedStar
blueWhiteStar
yellowWhiteStar
blueWhiteStar
blueWhiteStar
orangeStar
orangeRedStar
blueWhiteStar
blueWhiteStar
blueWhiteStar
blueWhiteStar
blueStar
blueStar
blueWhiteStar
blueWhiteStar
blueStar
orangeRedStar
whiteStar
blueWhiteStar
blueWhiteStar
blueWhiteStar
whiteStar
blueWhiteStar
whiteStar
yellowWhiteStar
orangeRedStar
blueWhiteStar
yellowWhiteStar
orangeRedStar
blueWhiteStar
whiteStar
blueWhiteStar
whiteStar
blueWhiteStar
blueWhiteStar
blueWhiteStar
blueWhiteStar
blueWhiteStar
blueWhiteStar
blueWhiteStar
blueWhiteStar
blueWhiteStar
orangeRedStar
orangeRedStar
blueStar
blueStar
blueWhiteStar
orangeStar
yellowWhiteStar
blueWhiteStar
orangeRedStar
orangeRedStar
orangeRedStar
blueWhiteStar
blueStar
blueWhiteStar
orangeRedStar
blueWhiteStar
blueWhiteStar
blueStar
blueWhiteStar
blueWhiteStar
blueWhiteSt