# Import des librairies

In [107]:
import numpy as np
import pandas as pd
from bokeh.io import output_notebook,output_file, show,curdoc
from bokeh.models import ColumnDataSource, FactorRange, HoverTool, Div, Range1d, LinearColorMapper,ColorBar,LabelSet, CustomJS, Slider
from bokeh.plotting import figure
from bokeh.layouts import row, column
from bokeh.palettes import magma, viridis
from bokeh.transform import transform, cumsum,factor_cmap
from bokeh.themes import Theme
from math import pi

output_notebook()
curdoc().theme = Theme(filename="cattpuccin.yml")

# Import Dataframe

In [108]:
df_constWikiScrap_Stars = pd.read_csv('constWikiScrap_Stars.csv')
df_constellations = pd.read_csv('./DonnéesAstro/88-constellations.csv', delimiter=';')
df_messier = pd.read_csv('./DonnéesAstro/catalogue-de-messier.csv', delimiter=';')

# Etudes metadonnées

In [109]:
def get_dataframe_info(df):
    """Get DataFrame_Info
    input
       df -> DataFrame
    output
       df_null_counts -> DataFrame Info (sorted)
    """

    df_types = pd.DataFrame(df.dtypes)
    df_nulls = df.count()
    
    df_null_count = pd.concat([df_types, df_nulls], axis=1)
    df_null_count = df_null_count.reset_index()
    
    # Reassign column names
    col_names = ["features", "types", "non_null_counts"]
    df_null_count.columns = col_names
    
    # Add this to sort
    df_null_count = df_null_count.sort_values(by=["non_null_counts"], ascending=False)
    
    return df_null_count

In [110]:
def metadata(df:pd.DataFrame,title,yrange=1600):
    """Generate Graph on Metadata
    Input:
        df -> DataFrame
        title -> Title of Graph (str)
        yrange -> Y-Axis Range (int)
    Output:
        Bokeh Figure
    """   
    df['types'] = df['types'].astype('str') 
    x=df['features'].tolist()
    y=df['non_null_counts'].tolist()
    types=df['types'].tolist()
    source = ColumnDataSource(data=dict(x=x, y=y,types=types))
    hover = HoverTool(
            tooltips=[
                ("Colonne", "@x"),
                ("Nombre de valeurs","@y"),
                ("type", "@types"),
            ]
        )
    curdoc().theme = Theme(filename="cattpuccin.yml")
    p = figure(x_range=x,title=title,tools=[hover, 'pan', 'wheel_zoom','reset'],toolbar_location="right",y_range=(0,yrange),width=1000,height=800)
    p.vbar(x='x', top='y', width=0.9, source=source, color='#94e2d5')
    p.xaxis.axis_label="Nom de la métadonnée"
    p.yaxis.axis_label="Nombre de métadonnée"
    p.xaxis.major_label_orientation = pi/4
    return p

In [111]:
constWikiScrap_Stars_metaAnalysis=metadata(get_dataframe_info(df_constWikiScrap_Stars),'constWikiScrap_Stars_metaAnalysis',11500)
constellation_metaAnalysis=metadata(get_dataframe_info(df_constellations),'constellation_metaAnalysis',100)
messier_metaAnalysis=metadata(get_dataframe_info(df_messier),'messier_metaAnalysis',120)

In [112]:
show(column(constWikiScrap_Stars_metaAnalysis,constellation_metaAnalysis,messier_metaAnalysis))

### constWikiScrap_Stars_metaAnalysis
Il y a un problème sur 70 donnée RA et Dec

In [113]:
df_constWikiScrap_Stars.head()

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.,abs.mag.
0,Andromeda,α And,00h 08m 23.17s,+29° 05′ 27.0″,97,2.07,−0.30
1,Andromeda,β And,01h 09m 43.80s,+35° 37′ 15.0″,199,2.07,−1.86
2,Andromeda,γ1 And,02h 03m 53.92s,+42° 19′ 47.5″,355,2.10,−3.08
3,Andromeda,δ And,00h 39m 19.60s,+30° 51′ 40.4″,101,3.27,0.81
4,Andromeda,Andromeda Galaxy,00h 42m 44.31s,+41° 16′ 09.4″,2540000,3.44[3] (nebulous),


#### Valeurs manquantes dans la colonne RA

In [114]:
df_missing_RA=df_constWikiScrap_Stars[df_constWikiScrap_Stars['RA'].isnull()]

In [115]:
df_missing_RA['Constellation'].value_counts()

Monoceros    62
Mensa         8
Name: Constellation, dtype: int64

In [116]:
df_constWikiScrap_Stars[df_constWikiScrap_Stars['Constellation']=='Mensa']

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.,abs.mag.
6989,Mensa,α Men,06h 10m 14.20s,−74° 45′ 09.1″,33,5.08,5.05
6990,Mensa,\n,25918,05h 31m 52.66s,2.73,−76° 20′ 30.0″,5.18
6991,Mensa,K4III,33285,23467,5.30,05h 02m 43.00s,−71° 18′ 51.6″
6992,Mensa,642,,54239,−79° 25′ 12.7″,33384,06h 56m 34.48s
6993,Mensa,0.26,κ,,05h 50m 16.80s,40953,27566
6994,Mensa,5.46,η Men,η,22871,,32440
6995,Mensa,−74° 56′ 13.2″,suspected variable\n,μ Men,30612,μ,
6996,Mensa,04h 43m 03.95s,B9IV,\n,,ε Men,ε
6997,Mensa,36039,466,K2/K3III,ζ,\n,ζ Men
6998,Mensa,50506,0.14,404,π Men,A5III,\n


In [117]:
df_constWikiScrap_Stars[df_constWikiScrap_Stars['Constellation']=='Monoceros']

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.,abs.mag.
7092,Monoceros,α Mon,07h 41m 14.88s,−09° 33′ 03.9″,144,3.94,0.71
7093,Monoceros,\n,29651,06h 14m 51.34s,−2.49,−06° 16′ 29.0″,3.99
7094,Monoceros,K3III,55185,34769,4.15,07h 11m 51.86s,−00° 29′ 34.0″
7095,Monoceros,375,,67594,−02° 59′ 01.6″,39863,08h 08m 35.66s
7096,Monoceros,−4.41,8,,06h 23m 46.10s,44769,30419
...,...,...,...,...,...,...,...
7330,Monoceros,5.40,,CW,06h 36m 54.58s,,
7331,Monoceros,17.2,,,,,
7332,Monoceros,−08° 46′ 49.9″,AFGL 961,,,,
7333,Monoceros,06h 34m 37.63s,young stellar object\n,Monoceros R2 IRS 3,,,


In [118]:
df_constWikiScrap_Stars_clensed=df_constWikiScrap_Stars[df_constWikiScrap_Stars['Constellation']!='Mensa']
df_constWikiScrap_Stars_clensed=df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['Constellation']!='Monoceros']
show(metadata(get_dataframe_info(df_constWikiScrap_Stars_clensed),'constWikiScrap_Stars_metaAnalysis_clensed',11500))

#### Visibile Magnitude Analysis

In [119]:
df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['vis.mag.'].isnull()].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 145 entries, 646 to 11211
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Constellation  145 non-null    object
 1   Name           145 non-null    object
 2   RA             145 non-null    object
 3   Dec            145 non-null    object
 4   Dist. (ly)     32 non-null     object
 5   vis.mag.       0 non-null      object
 6   abs.mag.       1 non-null      object
dtypes: object(7)
memory usage: 9.1+ KB


In [120]:
df_constWikiScrap_Stars_clensed = df_constWikiScrap_Stars_clensed.drop(columns=['abs.mag.'])
df_constWikiScrap_Stars_clensed = df_constWikiScrap_Stars_clensed[~df_constWikiScrap_Stars_clensed['vis.mag.'].isnull()]
df_constWikiScrap_Stars_clensed = df_constWikiScrap_Stars_clensed[~df_constWikiScrap_Stars_clensed['Dist. (ly)'].isnull()]
show(metadata(get_dataframe_info(df_constWikiScrap_Stars_clensed),'constWikiScrap_Stars_metaAnalysis_clensed',11500))

#### Creation d'un dataset temporaire sans les données problématiques

In [121]:
df_constWikiScrap_Stars_clensed.to_csv('constWikiScrap_Stars_clensed.csv',index=False)

In [122]:
df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['Name'].isnull()]

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.
50,Andromeda,,00h 37m 21.23s,+35° 23′ 58.2″,1022,5.45
55,Andromeda,,01h 40m 39.56s,+43° 17′ 51.9″,268,5.63
64,Andromeda,,00h 20m 45.54s,+32° 54′ 40.4″,646,5.79
71,Andromeda,,00h 18m 38.22s,+31° 31′ 02.0″,543,5.88
74,Andromeda,,01h 55m 54.47s,+37° 16′ 40.1″,991,5.89
...,...,...,...,...,...,...
11261,Virgo,,13h 17m 15.62s,+13° 40′ 32.3″,298,5.33
11289,Virgo,,13h 12m 32.95s,+11° 33′ 22.2″,811,5.76
11292,Virgo,,13h 09m 12.42s,+10° 01′ 20.9″,352,5.79
11372,Virgo,,13h 24m 30.54s,+12° 25′ 54.3″,538,6.44


In [123]:
andro = df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['Constellation']=='Andromeda']
andro['vis.mag.'].describe()

count      178
unique     126
top       5.81
freq         4
Name: vis.mag., dtype: object

### messier metaAnalysis
Il manque des valeurs pour les colonnes Constellation(EN/FR) RA et DEC

In [124]:
df_messier.head()

Unnamed: 0,Messier,NGC,Object type / Type d'objet,Season / Saison,Magnitude,Constellation (EN),Constellation (FR),Constellation (Latin),RA (Right Ascension),Dec (Declinaison),Distance (l.y / a. l.),Size / Dimensions,Discoverer / Découvreur,Year / Année,Image,URL de l'image,Constellation
0,M91,NGC 4548,Galaxy / Galaxie,Spring / Printemps,10,Hair of Berenice,Chevelure de Bérénice,Coma Berenices,12:35:26.45,+14:29:46.8,37000000.0,"5,4' x 4,4'",Messier,1781.0,http://www.lasam.ca/messier/M091.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,Com
1,M75,NGC 6864,Globular Cluster / Amas Globulaire,Summer / Été,8,Archer,Sagittaire,Sagittarius,20:06:04.84,-21:55:20.0,78500.0,"4,6'",Méchain,1780.0,http://www.lasam.ca/messier/M075.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,Sgr
2,M95,NGC 3351,Galaxy / Galaxie,Spring / Printemps,9,Lion,Lion,Leo,10:43:57.70,+11:42:13.7,20300000.0,"7,4' x 5,1'",Méchain,1781.0,http://www.lasam.ca/messier/M095.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,Leo
3,M41,NGC 2287,Open Cluster / Amas Ouvert,Winter / Hiver,4,Great Dog,Grand Chien,Canis Major,06:45:59.94,-20:45:15.2,1600.0,"30,0'",Hodierna,1654.0,http://www.lasam.ca/messier/M041.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,CMa
4,M46,NGC 2437,Open Cluster / Amas Ouvert,Winter / Hiver,6,"Stern,Poop deck",Poupe,Puppis,07:41:46.82,-14:48:36.0,3200.0,"24,0'",Messier,1771.0,http://www.lasam.ca/messier/M046.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,Pup


In [125]:
df_messier[df_messier['RA (Right Ascension)'].isnull()]

Unnamed: 0,Messier,NGC,Object type / Type d'objet,Season / Saison,Magnitude,Constellation (EN),Constellation (FR),Constellation (Latin),RA (Right Ascension),Dec (Declinaison),Distance (l.y / a. l.),Size / Dimensions,Discoverer / Découvreur,Year / Année,Image,URL de l'image,Constellation
18,M102,NGC 5866,Galaxy / Galaxie,Spring / Printemps,10,,,,,,23000000.0,"5,2' x 2,3'",Méchain,1781.0,http://www.lasam.ca/messier/M102.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,
24,M40,Winnecke 4,Double star / Étoile Double,Spring / Printemps,9,,,,,,,,Hevelius,1660.0,http://www.lasam.ca/messier/M040.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,
53,M45,,Open Cluster / Amas Ouvert,Winter / Hiver,1,,,,,,410.0,"120,0'",,,http://www.lasam.ca/messier/M045.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,


# Conversion des données déclinaisons et asension droite

In [126]:
df_constWikiScrap_Stars_clensed.head()

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.
0,Andromeda,α And,00h 08m 23.17s,+29° 05′ 27.0″,97,2.07
1,Andromeda,β And,01h 09m 43.80s,+35° 37′ 15.0″,199,2.07
2,Andromeda,γ1 And,02h 03m 53.92s,+42° 19′ 47.5″,355,2.10
3,Andromeda,δ And,00h 39m 19.60s,+30° 51′ 40.4″,101,3.27
4,Andromeda,Andromeda Galaxy,00h 42m 44.31s,+41° 16′ 09.4″,2540000,3.44[3] (nebulous)


In [130]:
df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['Constellation']=='Volans']

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.
11424,Volans,γ2 Vol,07h 08m 44.82s,−70° 29′ 57.1″,142.0,3.62
11425,Volans,β Vol,08h 25m 44.25s,−66° 08′ 11.5″,108.0,3.77
11426,Volans,ζ Vol,07h 41m 49.20s,−72° 36′ 22.1″,134.0,3.93
11427,Volans,δ Vol,07h 16m 49.83s,−67° 57′ 25.8″,660.0,3.97
11428,Volans,α Vol,09h 02m 26.80s,−66° 23′ 45.0″,124.0,4.0
11429,Volans,ε Vol,08h 07m 55.84s,−68° 37′ 01.7″,560.0,4.35
11430,Volans,24 G. Vol,08h 18m 18.78s,−65° 36′ 47.7″,296.0,5.06
11431,Volans,6 G. Vol,06h 59m 50.58s,−67° 55′ 01.2″,290.0,5.18
11432,Volans,θ Vol,08h 39m 05.13s,−70° 23′ 11.9″,239.0,5.19
11433,Volans,η Vol,08h 22m 04.52s,−73° 24′ 00.2″,356.0,5.28


In [136]:
df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['RA']==' 06 42h 25.55m'] 

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.


In [137]:
#df_constWikiScrap_Stars_clensed=df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['RA']!='07h 33 21.80m ']
#df_constWikiScrap_Stars_clensed=df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['RA']!='06 42h 25.55m'] 
df_constWikiScrap_Stars_clensed=df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['Constellation']!='Volans']

In [146]:
def hms_to_degrees(hms):
    h, m, s = hms.split(':')
    #print(hms)
    return float(h) + float(m)/60 + float(s)/3600

In [139]:
df_constWikiScrap_Stars_clensed.head()

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.
0,Andromeda,α And,00h 08m 23.17s,+29° 05′ 27.0″,97,2.07
1,Andromeda,β And,01h 09m 43.80s,+35° 37′ 15.0″,199,2.07
2,Andromeda,γ1 And,02h 03m 53.92s,+42° 19′ 47.5″,355,2.10
3,Andromeda,δ And,00h 39m 19.60s,+30° 51′ 40.4″,101,3.27
4,Andromeda,Andromeda Galaxy,00h 42m 44.31s,+41° 16′ 09.4″,2540000,3.44[3] (nebulous)


In [203]:
RA = df_constWikiScrap_Stars_clensed['RA'].tolist()

In [204]:
RA = [x.replace("h", ":") for x in RA]
RA = [x.replace("m", ":") for x in RA]
RA = [x.replace("s", "") for x in RA]
RA = [x.replace(" ", "") for x in RA]

In [205]:
RA[0:5]

['00:08:23.17', '01:09:43.80', '02:03:53.92', '00:39:19.60', '00:42:44.31']

In [206]:
print([x for x in RA if RA==''])

[]


In [207]:
RA_deg = [hms_to_degrees(hms) for hms in RA]

In [208]:
RA_deg[0:5]

[0.13976944444444445,
 1.1621666666666666,
 2.0649777777777776,
 0.6554444444444445,
 0.7123083333333333]

In [209]:
def dms_to_decimal(dms):
    """Convert declination in DMS format to decimal format."""
    d, m, s = dms.split(':')
    print(d,m,s)
    return float(d) + float(m)/60 + float(s)/3600


In [210]:
dec = df_constWikiScrap_Stars_clensed['Dec'].tolist()
dec[0:5]


['+29°\xa005′\xa027.0″',
 '+35°\xa037′\xa015.0″',
 '+42°\xa019′\xa047.5″',
 '+30°\xa051′\xa040.4″',
 '+41°\xa016′\xa009.4″']

In [214]:
dec = [x.replace("°", ":") for x in dec]
dec = [x.replace("′", ":") for x in dec]
dec = [x.replace('″', "") for x in dec]
dec = [x.replace('\xa0', "") for x in dec]
dec = [x.replace("−", "-") for x in dec]

In [215]:
dec[0:5]

['+29:05:27.0', '+35:37:15.0', '+42:19:47.5', '+30:51:40.4', '+41:16:09.4']

In [216]:
dec_deg = [dms_to_decimal(dms) for dms in dec]

+29 05 27.0
+35 37 15.0
+42 19 47.5
+30 51 40.4
+41 16 09.4
+48 37 42.6
+42 19 33.5
+46 27 33.0
+38 29 57.3
+24 16 02.6
+41 24 23.0
+44 20 02.3
+47 14 30.6
+43 16 05.1
+33 43 09.7
+29 18 44.5
+23 25 03.9
+36 47 07.2
+41 04 44.2
+49 24 21.5
+38 40 54.0
+50 03 06.1
+50 16 43.2
+37 51 33.1
+49 00 55.0
+45 24 25.0
+42 19 51.0
+44 13 54.1
+45 31 43.5
+40 34 37.6
+42 36 49.7
+46 25 13.0
+46 04 20.2
+44 23 10.1
+43 56 32.1
+42 45 28.1
+44 29 18.6
+37 58 07.3
+44 23 40.2
+50 00 23.9
+29 45 06.1
+39 14 11.0
+47 00 26.6
+39 27 31.2
+46 23 14.3
+51 03 58.4
+47 22 48.0
+50 28 18.3
+40 43 47.3
+48 37 30.7
+35 23 58.2
+23 37 42.4
+40 14 11.6
+50 09 05.5
+37 42 54.0
+43 17 51.9
+42 04 53.7
+49 17 43.6
+44 42 47.9
+37 15 06.5
+41 02 08.6
+37 18 44.2
+42 54 43.1
+38 10 56.9
+32 54 40.4
+37 43 26.9
+41 23 47.5
+42 04 40.9
+44 25 44.5
+33 43 26.9
+36 25 31.4
+31 31 02.0
+33 34 54.1
+30 56 08.2
+37 16 40.1
+37 14 13.9
+38 44 02.3
+43 32 41.1
+37 57 10.6
+41 20 42.7
+43 27 28.4
+41 46 25.3
+33 57 03.8
+47 

In [217]:
dec_deg[0:5]

[29.090833333333332,
 35.62083333333334,
 42.329861111111114,
 30.861222222222224,
 41.26927777777777]

In [218]:
df_const_deg = df_constWikiScrap_Stars_clensed.copy()

In [219]:
df_const_deg['RA']=RA_deg
df_const_deg['Dec']=dec_deg

In [220]:
df_const_deg.head()

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.
0,Andromeda,α And,0.139769,29.090833,97,2.07
1,Andromeda,β And,1.162167,35.620833,199,2.07
2,Andromeda,γ1 And,2.064978,42.329861,355,2.10
3,Andromeda,δ And,0.655444,30.861222,101,3.27
4,Andromeda,Andromeda Galaxy,0.712308,41.269278,2540000,3.44[3] (nebulous)


In [221]:
df_const_deg.to_csv('constWikiScrap_Stars_clensed_deg.csv',index=False)