In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tabula
from astropy.coordinates import SkyCoord
from astropy import units as u

In [18]:
df_known = pd.DataFrame(columns=['Name', 'ra', 'dec', 'n'])

# Lemmon database

In [19]:
df_lemon = pd.read_csv('data/known_gls.csv')
df_lemon

Unnamed: 0,RA,DEC,Name,z_qso,z_lens,separation,N_images,W1,W2,smss
0,2.834350,-8.764300,J0011-0845,1.7,-,1.89,2,15.190,14.455,False
1,3.348077,51.318300,J0013+5119,2.63,-,2.92,2,14.677,13.920,True
2,7.093690,6.531700,PSJ0028+0631,1.06,-,2.81,2,14.160,13.363,False
3,7.563600,-15.417700,PSJ0030-1525,3.36,measured,1.78,4,14.312,13.761,False
4,11.946594,25.241100,J0047+2514,1.20,-,1.73,2,14.942,14.269,True
...,...,...,...,...,...,...,...,...,...,...
215,353.080500,-18.868500,PSJ2332-1852,1.49,-,1.97,2,14.744,13.787,False
216,355.799750,-0.842860,ULASJ2343-0050,0.787,0.3,1.32,2,14.647,13.686,False
217,356.070600,-30.940560,WISE2344-3056,1.298,1.30,2.18,4,14.758,13.889,False
218,357.491900,-45.314700,DESJ2349-4518,2.89,-,2.11,2,14.939,14.322,False


In [20]:
df_known['Name'] = df_lemon['Name']
df_known['ra'] = df_lemon['RA']
df_known['dec'] = df_lemon['DEC'].astype(float)
df_known['n'] = df_lemon['N_images']
df_known

Unnamed: 0,Name,ra,dec,n
0,J0011-0845,2.834350,-8.764300,2
1,J0013+5119,3.348077,51.318300,2
2,PSJ0028+0631,7.093690,6.531700,2
3,PSJ0030-1525,7.563600,-15.417700,4
4,J0047+2514,11.946594,25.241100,2
...,...,...,...,...
215,PSJ2332-1852,353.080500,-18.868500,2
216,ULASJ2343-0050,355.799750,-0.842860,2
217,WISE2344-3056,356.070600,-30.940560,4
218,DESJ2349-4518,357.491900,-45.314700,2


In [21]:
df_known.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220 entries, 0 to 219
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    220 non-null    object 
 1   ra      220 non-null    float64
 2   dec     220 non-null    float64
 3   n       220 non-null    int64  
dtypes: float64(2), int64(1), object(1)
memory usage: 7.0+ KB


In [44]:
"""
java -jar .venv/lib/python3.12/site-packages/tabula/tabula-1.0.5-jar-with-dependencies.jar -p 14 -a 100,10,410,600 -o ./data/strides_quads.csv "/Users/alexeysergeyev/Zotero/storage/GYY7NDJT/Schmidt et al. - 2023 - STRIDES automated uniform models for 30 quadruply imaged quasars.pdf"
"""
df_strides = pd.read_csv('data/strides_quads.csv', header=None)

df_strides.replace(' ', '', regex=True, inplace=True)
# Replace non-standard minus sign with standard minus sign
df_strides[2] = df_strides[2].astype('str').str.replace('−', '-')

# Convert the column to float
df_strides[1] = df_strides[1].astype(float)
df_strides[2] = df_strides[2].astype(float)
# df_strides[2].astype('str').replace('−', '-', regex=True, inplace=True)
df_strides

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,J0029−3814,7.419298,-38.2406,−0.134,−0.155,1.156,−0.513,−0.592,−0.490,−0.975,0.071,−0.321,0.384
1,PSJ0030−1525,7.563492,-15.4178,−0.098,0.020,0.746,−0.908,−0.868,0.002,0.778,0.887,1.005,0.608
2,DESJ0053−2012,13.435033,-20.209147,−0.422,0.429,−0.529,1.036,1.414,0.012,0.691,−0.731,−1.424,−1.111
3,PSJ0147+4630,26.792372,46.511872,−0.145,−1.137,−0.317,−2.296,−1.218,0.836,0.029,0.936,1.191,0.526
4,WG0214−2105,33.568175,-21.093137,0.071,−0.015,0.556,−0.868,−0.706,−0.136,−0.260,0.790,0.633,0.517
5,SDSSJ0248+1913,42.203067,19.225228,0.105,0.071,0.451,−0.748,−0.549,−0.135,−0.404,0.699,0.503,0.661
6,WISEJ0259−1635,44.928533,-16.59537,0.058,−0.039,0.035,−0.730,−0.727,0.216,0.434,0.537,0.752,−0.342
7,J0343−2828,55.79765,-28.477948,−0.869,0.894,−1.251,1.352,1.959,1.349,1.364,−0.746,−1.329,−1.952
8,DESJ0405−3308,61.49896,-33.14741,−0.014,−0.045,0.691,−0.279,−0.374,−0.605,−0.529,0.416,0.349,0.556
9,DESJ0420−4037,65.194823,-40.624087,0.113,−0.001,0.821,−0.579,−0.586,−0.349,−0.339,0.675,0.287,0.796


In [46]:
coords_known = SkyCoord(ra=df_known['ra'], dec=df_known['dec'], unit=(u.deg, u.deg))
coords_strides = SkyCoord(ra=df_strides[1], dec=df_strides[2], unit=(u.deg, u.deg))
idx, d2d, _, = coords_strides.match_to_catalog_sky(coords_known)
df_strides['sep'] = d2d.to(u.arcsec).value
cond = df_strides['sep'] > 10
df_add = df_strides.loc[cond, [0, 1, 2]]
# df_add['n'] = 4 * np.ones(df_add.shape[0])
df_add[1] = df_add[1].astype(float)
df_add[2] = df_add[2].astype(float)
df_add

Unnamed: 0,0,1,2
0,J0029−3814,7.419298,-38.2406
7,J0343−2828,55.79765,-28.477948
9,DESJ0420−4037,65.194823,-40.624087
12,J0659+1629,104.766545,16.485908
13,J0818−2613,124.617817,-26.22374
28,J2205−3727,331.434422,-37.450361


In [47]:
df_known = pd.concat([df_known, df_add[[0, 1, 2]].rename(columns={0: 'Name', 1: 'ra', 2: 'dec'})], ignore_index=True)
df_known


Unnamed: 0,Name,ra,dec,n
0,J0011-0845,2.834350,-8.764300,2.0
1,J0013+5119,3.348077,51.318300,2.0
2,PSJ0028+0631,7.093690,6.531700,2.0
3,PSJ0030-1525,7.563600,-15.417700,4.0
4,J0047+2514,11.946594,25.241100,2.0
...,...,...,...,...
221,J0343−2828,55.797650,-28.477948,
222,DESJ0420−4037,65.194823,-40.624087,
223,J0659+1629,104.766545,16.485908,
224,J0818−2613,124.617817,-26.223740,


In [48]:
df_known.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 226 entries, 0 to 225
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    226 non-null    object 
 1   ra      226 non-null    float64
 2   dec     226 non-null    float64
 3   n       220 non-null    float64
dtypes: float64(3), object(1)
memory usage: 7.2+ KB


# 150 candidates

In [49]:
"""
java -jar .venv/lib/python3.12/site-packages/tabula/tabula-1.0.5-jar-with-dependencies.jar -p 4-5 -a 130,40,800,600 -o ./data/lemon150_1-2.csv "/Users/alexeysergeyev/Zotero/storage/BCFE69ZR/Lemon et al. - 2022 - Gravitationally lensed quasars in Gaia -- IV. 150 new lenses, quasar pairs, and projected quasars.pdf"
"""
"""
java -jar .venv/lib/python3.12/site-packages/tabula/tabula-1.0.5-jar-with-dependencies.jar -p 6 -a 120,40,660,600 -o ./data/lemon150_3.csv "/Users/alexeysergeyev/Zotero/storage/BCFE69ZR/Lemon et al. - 2022 - Gravitationally lensed quasars in Gaia -- IV. 150 new lenses, quasar pairs, and projected quasars.pdf"
"""

'\njava -jar .venv/lib/python3.12/site-packages/tabula/tabula-1.0.5-jar-with-dependencies.jar -p 6 -a 120,40,660,600 -o ./data/lemon150_3.csv "/Users/alexeysergeyev/Zotero/storage/BCFE69ZR/Lemon et al. - 2022 - Gravitationally lensed quasars in Gaia -- IV. 150 new lenses, quasar pairs, and projected quasars.pdf"\n'

In [50]:
df1 = pd.read_csv('data/lemon150_1-2.csv')
df2 = pd.read_csv('data/lemon150_3.csv')
df1.dropna(subset=['Name'], inplace=True)
df2.dropna(subset=['Name'], inplace=True)
df_lemon150 = pd.concat([df1, df2])
df_lemon150

Unnamed: 0,Name,R.A.,Dec.,Selection,Gaia G,Sep. (′′),PMSIG,Run,Classification,Gaia P.M. sig.
0,J0021+1927,5.4936,19.4646,"WD, MD","20.77, 19.96",2.89,"1.05, 2.74",NOT,"projected QSOs, z=1.045, 1.09",
1,J0027+0438,6.9580,4.6443,"WD, MD","17.65, 20.18",1.92,"0.56, —",NOT,"projected QSOs, z=0.1935, 1.972",
2,J0029–0414,7.3861,-4.2472,"WD, MD","19.27, 20.43",1.11,"1.13, —",NTT3,z=0.518 QSO + star,
3,J0030–3358,7.6740,-33.9767,GP,20.89,2.03,—,NTT1,"lens, z=1.58, zlens=0.715",
4,J0032–4523,8.1130,-45.3884,"WD, MD","20.57, 20.21",2.33,"0.12, 0.56",NTT3,"projected QSOs, z=1.667, 1.74",
...,...,...,...,...,...,...,...,...,...,...
48,J2318+0250,349.5541,2.8411,"WD, MD","19.59, 19.43",3.23,,NOT,"projected QSOs, z=1.83, 1.945","2.88, 0.78"
49,J2322+0916,350.6413,9.2796,"WD, MD","20.51, 18.18",4.51,,NOT,"QSO pair, z=1.20","1.50, 1.37"
50,J2334+5036,353.5277,50.6114,WD,"19.88, 19.18",2.13,,NOT,Stars,"1.99, 5.49"
51,J2341–1557,355.4664,-15.9501,WD,"20.71, 20.06",1.02,,NTT3,"NIQ, z=1.535","—, 0.85"


In [51]:
cond = df_lemon150['Classification'].str.contains('lens')
df_lens = df_lemon150[cond]
df_lens['Dec.'].astype('str').replace('−', '-', regex=True, inplace=True)
df_lens.loc[:, 'Name'] = df_lens['Name'].astype('str')
df_lens

Unnamed: 0,Name,R.A.,Dec.,Selection,Gaia G,Sep. (′′),PMSIG,Run,Classification,Gaia P.M. sig.
3,J0030–3358,7.6740,-33.9767,GP,20.89,2.03,—,NTT1,"lens, z=1.58, zlens=0.715",
6,J0045–3937,11.3665,-39.6262,WD,"19.70, 18.98",1.14,"1.39, 0.60",NTT2,"lens, z=1.85",
7,J0116+4052,19.1635,40.8811,"WD, MD","19.84, 18.96",1.28,"—, 1.07",NOT,"lens, z=1.85",
15,J0149–6532,27.2906,-65.5404,GP,20.58,2.74,—,NTT1,"lens(?), z=0.944?, z=0.395",
16,J0152–2448,28.0797,-24.8105,"WD, MD","20.12, 18.66",1.91,"3.81, 2.05",NOT,"lens, z=1.69",
...,...,...,...,...,...,...,...,...,...,...
34,J2147–1340,326.9957,-13.6772,WD,"19.85, 20.23",1.33,,NOT,"lens, z=1.382","1.87, —"
35,J2205+1019,331.4161,10.3307,"WD, MD","18.53, 18.65",1.34,,NOT,"lens, z=1.78","—, 1.69"
38,J2213–5926,333.3363,-59.4376,GP,20.83,2.59,,NTT3 (/NTT1),"lens, z=1.72",—
43,J2308+3201,347.0777,32.0294,WD,"20.31, 19.54",2.63,,NOT,"lens, z=2.30","1.41, 2.00"


In [52]:
df_lens.info()

<class 'pandas.core.frame.DataFrame'>
Index: 79 entries, 3 to 47
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Name            79 non-null     object 
 1   R.A.            79 non-null     float64
 2   Dec.            79 non-null     float64
 3   Selection       79 non-null     object 
 4   Gaia G          78 non-null     object 
 5   Sep. (′′)       79 non-null     float64
 6   PMSIG           60 non-null     object 
 7   Run             79 non-null     object 
 8   Classification  79 non-null     object 
 9   Gaia P.M. sig.  18 non-null     object 
dtypes: float64(3), object(7)
memory usage: 6.8+ KB


In [53]:
coords_known = SkyCoord(ra=df_known['ra'], dec=df_known['dec'], unit=(u.deg, u.deg))
coords = SkyCoord(ra=df_lens['R.A.'], dec=df_lens['Dec.'], unit=(u.deg, u.deg))
idx, d2d, _, = coords.match_to_catalog_sky(coords_known)
df_lens.loc[:, 'sep'] = d2d.to(u.arcsec).value
cond = df_lens['sep'] > 10
df_add = df_lens[cond].reset_index(drop=True)
# df_add['n'] = 4
df_add

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_lens.loc[:, 'sep'] = d2d.to(u.arcsec).value


Unnamed: 0,Name,R.A.,Dec.,Selection,Gaia G,Sep. (′′),PMSIG,Run,Classification,Gaia P.M. sig.,sep
0,J0030–3358,7.6740,-33.9767,GP,20.89,2.03,—,NTT1,"lens, z=1.58, zlens=0.715",,15367.883209
1,J0045–3937,11.3665,-39.6262,WD,"19.70, 18.98",1.14,"1.39, 0.60",NTT2,"lens, z=1.85",,12125.450728
2,J0116+4052,19.1635,40.8811,"WD, MD","19.84, 18.96",1.28,"—, 1.07",NOT,"lens, z=1.85",,14740.080196
3,J0149–6532,27.2906,-65.5404,GP,20.58,2.74,—,NTT1,"lens(?), z=0.944?, z=0.395",,23380.560427
4,J0152–2448,28.0797,-24.8105,"WD, MD","20.12, 18.66",1.91,"3.81, 2.05",NOT,"lens, z=1.69",,22581.117926
...,...,...,...,...,...,...,...,...,...,...,...
73,J2147–1340,326.9957,-13.6772,WD,"19.85, 20.23",1.33,,NOT,"lens, z=1.382","1.87, —",46386.042648
74,J2205+1019,331.4161,10.3307,"WD, MD","18.53, 18.65",1.34,,NOT,"lens, z=1.78","—, 1.69",33350.412477
75,J2213–5926,333.3363,-59.4376,GP,20.83,2.59,,NTT3 (/NTT1),"lens, z=1.72",—,8176.343098
76,J2308+3201,347.0777,32.0294,WD,"20.31, 19.54",2.63,,NOT,"lens, z=2.30","1.41, 2.00",18837.491854


In [54]:
df_known = pd.concat([df_known, df_add[['Name', 'R.A.', 'Dec.']].rename(columns={'R.A.': 'ra', 'Dec.': 'dec'})], ignore_index=True)
df_known

Unnamed: 0,Name,ra,dec,n
0,J0011-0845,2.834350,-8.7643,2.0
1,J0013+5119,3.348077,51.3183,2.0
2,PSJ0028+0631,7.093690,6.5317,2.0
3,PSJ0030-1525,7.563600,-15.4177,4.0
4,J0047+2514,11.946594,25.2411,2.0
...,...,...,...,...
299,J2147–1340,326.995700,-13.6772,
300,J2205+1019,331.416100,10.3307,
301,J2213–5926,333.336300,-59.4376,
302,J2308+3201,347.077700,32.0294,


In [55]:
df_known.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 304 entries, 0 to 303
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    304 non-null    object 
 1   ra      304 non-null    float64
 2   dec     304 non-null    float64
 3   n       220 non-null    float64
dtypes: float64(3), object(1)
memory usage: 9.6+ KB


In [56]:
df_known.to_csv('data/my_gls.csv', index=False)

In [2]:
df_known = pd.read_csv('data/my_gls.csv')
df_known

Unnamed: 0,Name,ra,dec,n
0,J0011-0845,2.834350,-8.7643,2.0
1,J0013+5119,3.348077,51.3183,2.0
2,PSJ0028+0631,7.093690,6.5317,2.0
3,PSJ0030-1525,7.563600,-15.4177,4.0
4,J0047+2514,11.946594,25.2411,2.0
...,...,...,...,...
299,J2147-1340,326.995700,-13.6772,
300,J2205+1019,331.416100,10.3307,
301,J2213-5926,333.336300,-59.4376,
302,J2308+3201,347.077700,32.0294,
