In [19]:
import os
import math
import random
import pandas as pd
import wget

## File paths
atlasFile = 'atlas.csv'
listFile = 'listOfIds.txt'
webFile = 'websites.txt'
resultsFolder = 'results'
library = 'library.txt'

# initializers
ra, dec, radius = 0, 0, 0.0014
id = '0'
stars = []
allCSVs = []
bands = ["I4"]#['I1', 'I2', 'I3', 'I4', 'M1', 'M2', 'M3']
atlasFile = pd.read_csv(atlasFile)
starter = 'https://irsa.ipac.caltech.edu/SIA?COLLECTION=spitzer_sha&RESPONSEFORMAT=CSV&POS=circle'

# Convert Panda series to arrays
objidA = pd.Series(atlasFile['objid']).array
raA = pd.Series(atlasFile['ra']).array
decA = pd.Series(atlasFile['dec']).array

atlasFile

Unnamed: 0,objid,ra,dec
0,SSTSL2 J152742.67+023551.8,231.927823,2.597747
1,SSTSL2 J053049.72-670554.9,82.707184,-67.098598
2,SSTSL2 J153607.88+384214.4,234.032847,38.704015
3,SSTSL2 J224656.98+602924.5,341.737456,60.490152
4,SSTSL2 J022523.15-733726.6,36.346464,-73.624064
...,...,...,...
17459,SSTSL2 J053328.89-683636.5,83.370390,-68.610158
17460,SSTSL2 J194613.16+263840.3,296.554874,26.644537
17461,SSTSL2 J193929.88+233823.8,294.874522,23.639946
17462,SSTSL2 J174449.11-333318.8,266.204645,-33.555244


In [20]:
# takes ids in listOfIds.txt into a list
def listOut():
    list = []
    myfile = open(listFile, "rt")
    contents = myfile.read() + '\n'
    myfile.close()
    while len(contents) > 0:
        index = contents.find('\n')
        id = contents[0:index]
        list.append(id)
        contents = contents[index+1:]
    return(list)

# Asks for Object ID, then returns ra and dec of the object is available
# If not available, display error and ends program
# Edit: replaces spaces in ID with underscores
def search(id):
    for i in range(len(objidA)):
        if id == objidA[i]:
            ra = raA[i]
            dec = decA[i]
            id = id.replace(' ', '_')
            return (id, ra, dec)
    print(f'Error: search()\nInvalid id: {id}')
    exit()

# Rounds input to the 5th decimal and outputs string
def roundCords(num):
    temp = (num*100000)+0.5
    temp = math.floor(temp)
    temp = str(float(temp)/100000)
    return(temp)

# Outputs website
def makeWeb(r, d, rad):
    endP = f'{starter}+{r}+{d}+{rad}'
    return(endP)

# Makes the website and adds to a list
def makeEntry(id):
    id, ra, dec = search(id)
    website = (makeWeb(roundCords(ra),roundCords(dec),roundCords(radius)))
    check = False

    #checks if the entry is already made
    for i in stars:
        if i[0] == id:
            check = True

    if check:
        print(f'{id} entry already in listOfIds.txt file')
    else:
        print(f'{id} entry made!')
        entry = [id, ra, dec, radius, website]
        stars.append(entry)
    return(stars)

# Makes a number of random obj #s
def randomIds(randomN):
    for i in range(randomN):
        websites = makeEntry(objidA[random.randrange(0, len(objidA))])
    return(websites)

# Takes a URL and returns the ending parts
def breakUpUrl(string):
    # Reverse the string to search from the end
    reversed_string = string[::-1]
    # Find the index of the first "\" from the end
    slash_index = reversed_string.find("/")
    # Get the substring after the "\" and reverse it back
    after_slash = reversed_string[:slash_index][::-1]
    # Split the string by "_" and return the result
    parts = after_slash.split("_")
    return parts

# Splits url to look for the wanted bands and if it's a maic.fits, and marks which bands are included
def checkBands(urls):
    rowsToSave = []
    includedBands = []
    linkCounter = 0
    for i in range(len(urls)):
        parts = breakUpUrl(urls[i])
        for band in bands:
            if band == parts[1]:
                if 'maic.fits' == parts[6]:
                    rowsToSave.append(i)
                    linkCounter += 1
                    if band in includedBands:
                        pass
                    else:
                        includedBands.append(band)
    return(rowsToSave, includedBands, linkCounter)

# Only grabs the urls that we are looking for
def trimFat(entry):
    filePath = f'{resultsFolder}/{entry[0]}.csv'
    fat = pd.read_csv(filePath)
    s_ras = pd.Series(fat['s_ra'])
    s_decs = pd.Series(fat['s_dec'])
    obs_ids = pd.Series(fat['obs_id']).array
    urls = pd.Series(fat['access_url']).array
    
    #Searches for what rows to save
    rowsToSave, includedBands, linkCounter = checkBands(urls)
    entry.append(includedBands)
    entry.append(linkCounter)
    
    # Overwrites a CSV file, keeping the 's_ra', 's_dec', 'obs_id', and 'access_url'
    theMeat = []
    for row in rowsToSave:
        oneRow = []
        oneRow.append(obs_ids[row])
        oneRow.append(s_ras[row])
        oneRow.append(s_decs[row])
        oneRow.append(urls[row])
        theMeat.append(oneRow)
    theMeatDf = pd.DataFrame(theMeat, columns=['obs_id', 's_ra', 's_dec', 'access_url'])
    theMeatDf.to_csv(filePath, mode='w')
    return(entry)
    
# Download CSV from provided list from 'stars'
def grabCSV(entry):
    id = entry[0]
    link = entry[4]

    # adds all file names in 'results' in a list
    results = []
    for file_path in os.listdir(resultsFolder):
        if os.path.isfile(os.path.join(resultsFolder, file_path)) or os.path.isdir(os.path.join(resultsFolder, file_path)):
            results.append(file_path)

    #checks if the csv is already downloaded
    if (id+'.csv') in results:
        print(f'{id}.csv already in results folder\n')
    elif id in results:
        print(f'{id} folder already in results folder\n')
    else:
        print('Grabbing...')
        wget.download(link, f'{id}.csv')
        os.rename(f'{id}.csv', f'{resultsFolder}/{id}.csv')
        print(f'Downloaded: {id}.csv\n')
        entry = trimFat(entry)
        return(entry)

# Adds the ID, Ra, Dec, how many links, included bands, and reference csv link
def updateLibrary():
    wordsInFile = ''
    print(stars)
    for entry in stars:
        print(entry)
        wordsInFile = wordsInFile + f'ID: {entry[0]}\nRA: {entry[1]} DEC: {entry[2]} RADIUS: {entry[3]}\nNumber of Links: {entry[6]}\nIncluded Bands: '
        for i in range(len(entry[5])):
            wordsInFile = wordsInFile + f'{entry[5][i]} '
        wordsInFile = wordsInFile + f'\n{entry[4]}\n\n'
    f = open(library,'w')
    f.write(wordsInFile)
    f.close
    print('Updated Library!\n')

# Self-explanatory tbh
def downloadFITS(limit):
    # Lists out all CSVs
    referenceCSVs = []
    for file_path in os.listdir(resultsFolder):
        if file_path[-3:] == "csv":
            if os.path.isfile(os.path.join(resultsFolder, file_path)):
                referenceCSVs.append(file_path)

    # Creates a directory for each CSV with the CSV's object id
    for csv in referenceCSVs:
        folderPath = f'{resultsFolder}/{csv[:-4]}'
        os.mkdir(folderPath)

        # Downloads the FITS files until the limit
        csvDF = pd.read_csv(f'{resultsFolder}/{csv}')
        obsIDs = pd.Series(csvDF['obs_id']).array
        urls = pd.Series(csvDF['access_url'])
        counter = 0
        print(f'Downloading from {csv}...\n')
        while counter < limit or counter < len(referenceCSVs):
            parts = breakUpUrl(urls[counter])
            band = parts[1]
            wget.download(urls[counter], f'{folderPath}/({band}){obsIDs[counter]}.fits')
            print(f'\tDownloaded ({band}){obsIDs[counter]}.fits\n')
            counter += 1

        # Renames the CSV 'reference.csv' and moves it into the directory
        os.rename(f'{resultsFolder}/{csv}', f'{folderPath}/reference.csv')
        print(f'Finished downloading from {csv}\n')
    print(f'Finished downloading FITs files\n')


# Self-explanatory again
def askToDownload():
    ask = input('Download FITS flies?(y/n): ')
    if ask.lower() == 'y':
        number = input('Limit of links per star: ')
        if ask.isdigit:
            downloadFITS(math.floor(int(number)))
        else:
            print('Invalid')
    elif ask.lower != 'n':
        print('Invalid')

# Random links option
def randomLinks():
    number = input('How many stars: ')
    if number.isdigit:
        stars = randomIds(math.floor(int(number)))
        for i in range(len(stars)):
            stars[i] = grabCSV(stars[i])
        print('Downloaded CSV(s)!')
        updateLibrary()
    else:
        print('Invalid')

# NonRandom links option
def nonRandomLinks():
    listOfIds = listOut()
    for obj in range(len(listOfIds)):
        stars = makeEntry(listOfIds[obj])
    for i in range(len(stars)):
        stars[i] = grabCSV(stars[i])
    print('Downloaded CSV(s)!')
    updateLibrary()


In [21]:
start = input('Look for links?(y/n): ')
if start.lower() == 'y':
    # links = input('Random links?(y/n): ')
    # if links.lower() == 'y':
    #     randomLinks()
    #     askToDownload()
    # elif links.lower() == 'n':
    nonRandomLinks()
    askToDownload()
    # else:
    #     print('Invalid')
elif start.lower() == 'n':
    askToDownload()
else:
    print('Invalid')

SSTSL2_J083825.07-531910.5 entry made!
SSTSL2_J084041.74-530834.4 entry made!
SSTSL2_J004151.36-712355.6 entry made!
SSTSL2_J053354.87-005229.9 entry made!
SSTSL2_J021602.36-742622.2 entry made!
SSTSL2_J004337.13-734709.9 entry made!
SSTSL2_J053123.18-015323.0 entry made!
SSTSL2_J003121.18-740243.0 entry made!
SSTSL2_J050738.35-701837.8 entry made!
Grabbing...
Downloaded: SSTSL2_J083825.07-531910.5.csv

Grabbing...
Downloaded: SSTSL2_J084041.74-530834.4.csv

Grabbing...
Downloaded: SSTSL2_J004151.36-712355.6.csv

Grabbing...
Downloaded: SSTSL2_J053354.87-005229.9.csv

Grabbing...
Downloaded: SSTSL2_J021602.36-742622.2.csv

Grabbing...
Downloaded: SSTSL2_J004337.13-734709.9.csv

Grabbing...
Downloaded: SSTSL2_J053123.18-015323.0.csv

Grabbing...
Downloaded: SSTSL2_J003121.18-740243.0.csv

Grabbing...
Downloaded: SSTSL2_J050738.35-701837.8.csv

Downloaded CSV(s)!
[['SSTSL2_J083825.07-531910.5', 129.604478, -53.319597, 0.0014, 'https://irsa.ipac.caltech.edu/SIA?COLLECTION=spitzer_sha&RESP

FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'results/.gitig'