In [2]:
import pandas as pd
import numpy as np
from tabulate import tabulate
pd.options.mode.chained_assignment = None  # default='warn'

#Load in the dataset from the ESO archive, containing the data from the Sphere Polarimetry observations
df = pd.read_fwf("Dataset_full.txt" , engine='python', encoding='utf-8-sig')

#Check wether the data is formatted correctly
#df
#len(df) #this value should be ~75000
#len(df.columns) #this value shoul be 19 columns

In [3]:
#We only need the data from the SPHERE_irdis_dpi_obs and 'Type' == OBJECT, filter out the rest

df = df.loc[df['TPL ID'] == "SPHERE_irdis_dpi_obs"]
df_mod = df.loc[df['Type'] == "OBJECT"]

#df_mod

In [14]:
#Get rid of the Double measurements

#df_mod.sort_values("OBJECT", inplace = True)

# dropping duplicate values
df_single = df_mod.drop_duplicates(subset=['OBJECT'], keep='first') #Are there other identifiers we want to keep?

df_single


Unnamed: 0,OBJECT,RA,DEC,Program_ID,Instrument,Category,Type,Mode,Dataset ID,Release_Date,TPL ID,TPL START,Exptime,filter_lambda_min,filter_lambda_max,Filter,MJD-OBS,Airmass,DIMM Seeing at Start
1,V WX CHA,11:09:58.60,-77:37:08.8,0102.C-0243(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2019-04-01T05:44:45.476,Apr 1 2020,SPHERE_irdis_dpi_obs,2019-04-01T05:43:31,0.837,,,B_H,58574.239415,1.779,0.73
2,TW HYA,11:01:51.65,-34:42:13.9,095.C-0273(A),SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2015-05-02T01:58:15.967,May 2 2016,SPHERE_irdis_dpi_obs,2015-05-02T01:51:19,0.837,,,B_J,57144.082129,1.037,1.03
3,RU CEN,12:09:23.59,-45:25:34.4,0102.D-0696(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2019-01-12T06:49:02.090,Jan 12 2020,SPHERE_irdis_dpi_obs,2019-01-12T06:13:56,4.000,,,B_H,58495.284052,1.264,0.90
4,IRAS15469-5311,15:50:43.57,-53:20:41.7,0102.D-0696(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2019-03-24T05:35:21.750,Mar 24 2020,SPHERE_irdis_dpi_obs,2019-03-24T04:54:45,2.000,,,B_H,58566.232891,1.369,0.62
8,IRAS08544-4431,08:56:14.01,-44:43:08.5,0102.D-0696(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2018-10-25T08:11:51.073,Oct 25 2019,SPHERE_irdis_dpi_obs,2018-10-25T08:02:47,8.000,,,B_H,58416.341563,1.377,0.53
12,EPS SCO,16:50:09.00,-34:17:39.3,095.D-0458(B),SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2015-06-25T22:59:18.846,Jun 25 2016,SPHERE_irdis_dpi_obs,2015-06-25T22:55:24,0.837,,,N_BRG,57198.957857,1.792,-1.00
15,V UY AUR,04:51:47.34,+30:47:16.2,1104.C-0415(G,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2021-01-01T02:53:03.975,Jan 1 2022,SPHERE_irdis_dpi_obs,2021-01-01T02:38:51,2.000,,,"B_H,P0-90",59215.120185,1.759,0.67
16,TITAN,18:11:53.52,-22:37:25.1,0101.C-0881(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2018-08-13T03:21:23.962,Aug 13 2019,SPHERE_irdis_dpi_obs,2018-08-13T02:49:01,64.000,,,N_HEI,58343.139861,1.111,0.78
18,V HP CHA,11:08:15.26,-77:33:52.0,0102.C-0561(B,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2019-03-27T03:53:43.777,Mar 27 2020,SPHERE_irdis_dpi_obs,2019-03-27T03:46:30,4.000,,,B_J,58569.162312,1.662,0.54
21,HR 4049,10:18:07.41,-28:59:29.4,0102.D-0696(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2018-11-29T07:47:50.482,Nov 29 2019,SPHERE_irdis_dpi_obs,2018-11-29T07:13:32,4.000,,,B_H,58451.324890,1.233,1.04


In [15]:
#Save the new catalogue to the directory in the same fwf_format

formats = '%+22s %+15s %+15s %+15s %+10s %+10s %+10s %+25s %+35s %+15s %+25s %+25s %+20s %+20s %+20s %+15s %+20s %+10s %+25s'

headers = [format(str(x),y.replace('%+','>')) 
              for x, y in zip(df_single.columns,formats.split())]

np.savetxt('modified_data.txt', df_single.values, fmt=formats,
           header=' '.join(headers), comments='')

In [16]:
#Check if the new dataset is saved correctly
load = pd.read_fwf("modified_data.txt" , engine='python', encoding='utf-8-sig')

load

#problem with first and last colum? "DIMM seeing at start" in 4 seperate columns

Unnamed: 0,OBJECT,RA,DEC,Program_ID,Instrument,Category,Type,Mode,Dataset ID,Release_Date,...,Exptime,filter_lambda_min,filter_lambda_max,Filter,MJD-OBS,Airmass,DIMM,Seeing,at,Start
0,V WX CHA,11:09:58.60,-77:37:08.8,0102.C-0243(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2019-04-01T05:44:45.476,Apr 1 2020,...,0.837,,,B_H,58574.239415,1.779,,,,0.73
1,TW HYA,11:01:51.65,-34:42:13.9,095.C-0273(A),SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2015-05-02T01:58:15.967,May 2 2016,...,0.837,,,B_J,57144.082129,1.037,,,,1.03
2,RU CEN,12:09:23.59,-45:25:34.4,0102.D-0696(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2019-01-12T06:49:02.090,Jan 12 2020,...,4.000,,,B_H,58495.284052,1.264,,,,0.90
3,IRAS15469-5311,15:50:43.57,-53:20:41.7,0102.D-0696(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2019-03-24T05:35:21.750,Mar 24 2020,...,2.000,,,B_H,58566.232891,1.369,,,,0.62
4,IRAS08544-4431,08:56:14.01,-44:43:08.5,0102.D-0696(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2018-10-25T08:11:51.073,Oct 25 2019,...,8.000,,,B_H,58416.341563,1.377,,,,0.53
5,EPS SCO,16:50:09.00,-34:17:39.3,095.D-0458(B),SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2015-06-25T22:59:18.846,Jun 25 2016,...,0.837,,,N_BRG,57198.957857,1.792,,,,-1.00
6,V UY AUR,04:51:47.34,+30:47:16.2,1104.C-0415(G,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2021-01-01T02:53:03.975,Jan 1 2022,...,2.000,,,"B_H,P0-90",59215.120185,1.759,,,,0.67
7,TITAN,18:11:53.52,-22:37:25.1,0101.C-0881(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2018-08-13T03:21:23.962,Aug 13 2019,...,64.000,,,N_HEI,58343.139861,1.111,,,,0.78
8,V HP CHA,11:08:15.26,-77:33:52.0,0102.C-0561(B,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2019-03-27T03:53:43.777,Mar 27 2020,...,4.000,,,B_J,58569.162312,1.662,,,,0.54
9,HR 4049,10:18:07.41,-28:59:29.4,0102.D-0696(A,SPHERE,SCIENCE,OBJECT,POLARIMETRY,SPHER.2018-11-29T07:47:50.482,Nov 29 2019,...,4.000,,,B_H,58451.324890,1.233,,,,1.04
