## 銀河イメージデータのダウンロード

In [1]:
import os, shutil
import sys, time
import numpy as np
from astropy.io import fits
import urllib.request, urllib.parse
from bs4 import BeautifulSoup

# 作業用ディレクトリ定義（要カスタマイズ）
galaxy_data_directory = '/home/satoshi/Galaxy/'

#### 'Galaxy Zoo 2'カタログを「楕円銀河」カタログと「渦巻銀河」カタログに分割する

In [2]:
# Galaxy Zoo 2"カタログをhttp://gz2hart.s3.amazonaws.com/gz2_hart16.fits.gzからダウンロードし解凍し、galaxy_data_directory配下にコピー
gz2_catalog = galaxy_data_directory  + 'gz2_hart16.fits'

with fits.open(gz2_catalog) as hdul:
    data = hdul[1].data
    
    print('all data rows = ', data.shape[0])
    gz2class=np.empty(data.shape[0], dtype=np.object)
    
    for i in range(data.shape[0]):    
        gz2class[i]=data.field('gz2_class')[i][0]

    # 分類クラス名が'E'ではじまるクラス（＝「楕円銀河」）のデータを抽出する
    mask= gz2class=='E'
    newdata=data[mask]
    print('elliptical data rows = ', newdata.shape[0])
    
    # 「楕円銀河」カタログを分離
    hdul=fits.BinTableHDU(data=newdata)
    hdul.writeto(galaxy_data_directory + 'elliptical.fits') 

   # 分類クラス名が'S'ではじまるクラス（＝「渦巻銀河」）のデータを抽出する    
    mask= gz2class=='S' 
    newdata=data[mask]
    print('spiral data rows = ', newdata.shape[0])

    # 「渦巻銀河」カタログを分離
    hdul=fits.BinTableHDU(data=newdata)
    hdul.writeto(galaxy_data_directory + 'spiral.fits') 

all data rows =  239695
elliptical data rows =  97670
spiral data rows =  141430


#### 「楕円銀河」イメージデータのダウンロード

In [3]:
# 「楕円銀河」カタログ
elliptical_catalog = galaxy_data_directory + 'elliptical.fits'
elliptical_directory = galaxy_data_directory + 'elliptical'
os.mkdir(elliptical_directory)

# 「楕円銀河」カタログ最初の4000件のイメージデータをダウンロード
for i in range(4000):    

    with fits.open(elliptical_catalog) as hdul:
        data = hdul[1].data
        ra = str(data[i].field('ra'))
        dec = str(data[i].field('dec'))
        objid = str(data[i].field('dr7objid'))
        gz2class = data[i].field('gz2_class')

    # SDSS DR7 RDB Galaxyデーブル検索SQL
    query_params = {
        'format': 'html', 'cmd': 'SELECT objID, petroR90_r FROM Galaxy WHERE objID = ' + objid
    }
    
    q = urllib.parse.urlencode(query_params)
    
    # SDSS DR7 RDB検索サービス
    dr7_sql_url = 'http://cas.sdss.org/dr7/en/tools/search/x_sql.asp?' + q
    
    for _ in range(3):
        try:
            with urllib.request.urlopen(dr7_sql_url, timeout=5) as res:
                html = res.read().decode("utf-8")
        except Exception as e:
            pass
        else:
            break
    else:
        pass  
            
    soup = BeautifulSoup(html, "html.parser")
    list = soup.find_all("td")
    # scale = 0.02 x  petroR90_r　（ペトロシアン半径）
    scale = str(0.02 * float(list[3].string) )          
    # 128pixel * 128pixel
    galaxy_params = {
        'ra': ra, 'dec': dec, 'scale': scale, 'width': '128', 'height': '128'
    } 
    p = urllib.parse.urlencode(galaxy_params)
    
    # SDSS DR7 ImageCutout サービス
    dr7_imagecutout_url = 'http://skyservice.pha.jhu.edu/DR7/ImgCutout/getjpeg.aspx?' + p
   
    for _ in range(3):
        try:
            with urllib.request.urlopen(dr7_imagecutout_url, timeout=5) as res:
                jpeg_data = res.read()
        except Exception as e:
            pass
        else:
            break
    else:
        pass 
    
    # 「楕円銀河」jpegイメージデータファイル名
    elliptical_filename = elliptical_directory + '/' + objid + '_' + gz2class + '_' + scale[:4] + '.jpeg'
       
    with open(elliptical_filename, 'wb') as f:
        f.write(jpeg_data)
        
    sys.stdout.write('\r{}' .format(i))
    sys.stdout.flush()
    time.sleep(1.0)
    
print('    completed')

3999    completed


#### 「渦巻銀河」イメージデータのダウンロード

In [4]:
# 「渦巻銀河」カタログ
spiral_catalog = galaxy_data_directory + 'spiral.fits'
spiral_directory = galaxy_data_directory + 'spiral'
os.mkdir(spiral_directory)

# 「渦巻銀河」カタログ最初の4000件のイメージデータをダウンロード
for i in range(4000):   
          
    with fits.open(spiral_catalog) as hdul:
        data = hdul[1].data
        ra = str(data[i].field('ra'))
        dec = str(data[i].field('dec'))
        objid = str(data[i].field('dr7objid'))
        gz2class = data[i].field('gz2_class')

     # SDSS DR7 RDB Galaxyデーブル検索SQL
    query_params = {
        'format': 'html', 'cmd': 'SELECT objID, petroR90_r FROM Galaxy WHERE objID = ' + objid
    }
    
    q = urllib.parse.urlencode(query_params)
    
    # SDSS DR7 RDB検索サービス
    dr7_sql_url = 'http://cas.sdss.org/dr7/en/tools/search/x_sql.asp?' + q
    
    for _ in range(3):
        try:
            with urllib.request.urlopen(dr7_sql_url, timeout=5) as res:
                html = res.read().decode("utf-8")
        except Exception as e:
            pass
        else:
            break
    else:
        pass       
                
    soup = BeautifulSoup(html, "html.parser")
    list = soup.find_all("td")
   # scale = 0.02 x  petroR90_r　（ペトロシアン半径） 
    scale = str(0.03 * float(list[3].string) )   
    # 128pixel * 128pixel      
    galaxy_params = {
        'ra': ra, 'dec': dec, 'scale': scale, 'width': '128', 'height': '128'
    } 
    p = urllib.parse.urlencode(galaxy_params)
    
   # SDSS DR7 ImageCutout サービス 
    dr7_imagecutout_url = 'http://skyservice.pha.jhu.edu/DR7/ImgCutout/getjpeg.aspx?' + p
    
    for _ in range(3):
        try:
            with urllib.request.urlopen(dr7_imagecutout_url, timeout=5) as res:
                jpeg_data = res.read()
        except Exception as e:
            pass
        else:
            break
    else:
            pass
        
    # 「渦巻銀河」jpegイメージデータファイル名
    spiral_filename = spiral_directory + '/' + objid + '_' + gz2class + '_' + scale[:4] + '.jpeg'
        
    with open(spiral_filename, 'wb') as f:
        f.write(jpeg_data)
    
    sys.stdout.write('\r{}' .format(i))
    sys.stdout.flush()
    time.sleep(1.0)
    
print('    completed')

3999    completed
