# Activity 3.2.3 – The redshift distribution of QSOs in the SDSS

Import libraries:

In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as tic

## Method 1

First we write a small python routine so we can create a suitable SQL query based on the desired number of bins we want:

In [17]:
num_bins = 100
min_x = 0
max_x = 7.0
delta_x = (max_x-min_x)/num_bins

print("SELECT temp.bin, COUNT(*)")
print("FROM (")
print("  SELECT CASE")

for i in range(num_bins):
    left_x = i*delta_x
    right_x = (i+1)*delta_x
    average_x = (left_x+right_x)/2
    print("    WHEN z BETWEEN {0:.2f} and {1:.2f} THEN ' {0:.2f} - {1:.2f}'".format(left_x, right_x))

print("    ELSE '> 7.0'")
print("  END AS bin")
print("  FROM SpecObj")
print("  WHERE class='QSO'")
print("  AND zWarning=0) temp")
print("GROUP BY temp.bin")
print("ORDER by temp.bin")

SELECT temp.bin, COUNT(*)
FROM (
  SELECT CASE
    WHEN z BETWEEN 0.00 and 0.07 THEN '0.04'
    WHEN z BETWEEN 0.07 and 0.14 THEN '0.11'
    WHEN z BETWEEN 0.14 and 0.21 THEN '0.18'
    WHEN z BETWEEN 0.21 and 0.28 THEN '0.25'
    WHEN z BETWEEN 0.28 and 0.35 THEN '0.32'
    WHEN z BETWEEN 0.35 and 0.42 THEN '0.39'
    WHEN z BETWEEN 0.42 and 0.49 THEN '0.46'
    WHEN z BETWEEN 0.49 and 0.56 THEN '0.53'
    WHEN z BETWEEN 0.56 and 0.63 THEN '0.60'
    WHEN z BETWEEN 0.63 and 0.70 THEN '0.67'
    WHEN z BETWEEN 0.70 and 0.77 THEN '0.74'
    WHEN z BETWEEN 0.77 and 0.84 THEN '0.81'
    WHEN z BETWEEN 0.84 and 0.91 THEN '0.88'
    WHEN z BETWEEN 0.91 and 0.98 THEN '0.95'
    WHEN z BETWEEN 0.98 and 1.05 THEN '1.02'
    WHEN z BETWEEN 1.05 and 1.12 THEN '1.08'
    WHEN z BETWEEN 1.12 and 1.19 THEN '1.16'
    WHEN z BETWEEN 1.19 and 1.26 THEN '1.23'
    WHEN z BETWEEN 1.26 and 1.33 THEN '1.30'
    WHEN z BETWEEN 1.33 and 1.40 THEN '1.37'
    WHEN z BETWEEN 1.40 and 1.47 THEN '1.44'
    WHEN

Copy & Paste the query into the Skyserver SQL search; and export the result to a CSV-file such that it can be loaded into a spreadsheet program: 

## Method 2

Import the query output file:

In [18]:
# Read the data file into a dataframe using pandas
qsoData = pd.read_csv('./Activity3.2.3/Output2.txt', skiprows=1)

Plot the distribution:

In [19]:
num_bins = 100
# the histogram of the data
#n, bins, patches = plt.hist(x, num_bins, normed=1, facecolor='green', alpha=0.5)

# Plot graph using matplotlib
fig = plt.figure()
ax0 = fig.add_subplot(1, 1, 1)

ax0.hist(qsoData['z'], num_bins, facecolor='blue', alpha=0.5)
ax0.set_title('The redshift distribution of QSOs in the SDSS')

ax0.set_xlim(-1.0, 7.5)

ax0.set_xlabel('Redshift $z$')
ax0.set_ylabel('Number of quasars $n$')

# Create a grid
ax0.grid(True)

# Set the tick marks
xMinorLocator = tic.AutoMinorLocator(5)
ax0.xaxis.set_minor_locator(xMinorLocator)

yMinorLocator = tic.AutoMinorLocator(5)
ax0.yaxis.set_minor_locator(yMinorLocator)

ax0.tick_params(which = 'both', direction = 'in')

plt.show()