# Import des librairies

In [1]:
import numpy as np
import pandas as pd
from bokeh.io import output_notebook,output_file, show,curdoc
from bokeh.models import ColumnDataSource, FactorRange, HoverTool, Div, Range1d, LinearColorMapper,ColorBar,LabelSet, CustomJS, Slider
from bokeh.plotting import figure
from bokeh.layouts import row, column
from bokeh.palettes import magma, viridis
from bokeh.transform import transform, cumsum,factor_cmap
from bokeh.themes import Theme
from math import pi

output_notebook()
curdoc().theme = Theme(filename="cattpuccin.yml")

# Import Dataframe

In [2]:
df_constWikiScrap_Stars = pd.read_csv('constWikiScrap_Stars.csv')
df_constellations = pd.read_csv('./DonnéesAstro/88-constellations.csv', delimiter=';')
df_messier = pd.read_csv('./DonnéesAstro/catalogue-de-messier.csv', delimiter=';')

# Etudes metadonnées

In [3]:
def get_dataframe_info(df):
    """Get DataFrame_Info
    input
       df -> DataFrame
    output
       df_null_counts -> DataFrame Info (sorted)
    """

    df_types = pd.DataFrame(df.dtypes)
    df_nulls = df.count()
    
    df_null_count = pd.concat([df_types, df_nulls], axis=1)
    df_null_count = df_null_count.reset_index()
    
    # Reassign column names
    col_names = ["features", "types", "non_null_counts"]
    df_null_count.columns = col_names
    
    # Add this to sort
    df_null_count = df_null_count.sort_values(by=["non_null_counts"], ascending=False)
    
    return df_null_count

In [4]:
def metadata(df:pd.DataFrame,title,yrange=1600):
    """Generate Graph on Metadata
    Input:
        df -> DataFrame
        title -> Title of Graph (str)
        yrange -> Y-Axis Range (int)
    Output:
        Bokeh Figure
    """   
    df['types'] = df['types'].astype('str') 
    x=df['features'].tolist()
    y=df['non_null_counts'].tolist()
    types=df['types'].tolist()
    source = ColumnDataSource(data=dict(x=x, y=y,types=types))
    hover = HoverTool(
            tooltips=[
                ("Colonne", "@x"),
                ("Nombre de valeurs","@y"),
                ("type", "@types"),
            ]
        )
    curdoc().theme = Theme(filename="cattpuccin.yml")
    p = figure(x_range=x,title=title,tools=[hover, 'pan', 'wheel_zoom','reset'],toolbar_location="right",y_range=(0,yrange),width=1000,height=800)
    p.vbar(x='x', top='y', width=0.9, source=source, color='#94e2d5')
    p.xaxis.axis_label="Nom de la métadonnée"
    p.yaxis.axis_label="Nombre de métadonnée"
    p.xaxis.major_label_orientation = pi/4
    return p

In [5]:
constWikiScrap_Stars_metaAnalysis=metadata(get_dataframe_info(df_constWikiScrap_Stars),'constWikiScrap_Stars_metaAnalysis',11500)
constellation_metaAnalysis=metadata(get_dataframe_info(df_constellations),'constellation_metaAnalysis',100)
messier_metaAnalysis=metadata(get_dataframe_info(df_messier),'messier_metaAnalysis',120)

In [6]:
show(column(constWikiScrap_Stars_metaAnalysis,constellation_metaAnalysis,messier_metaAnalysis))

### constWikiScrap_Stars_metaAnalysis
Il y a un problème sur 70 donnée RA et Dec

In [7]:
df_constWikiScrap_Stars.head()

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.,abs.mag.
0,Andromeda,α And,00h 08m 23.17s,+29° 05′ 27.0″,97,2.07,−0.30
1,Andromeda,β And,01h 09m 43.80s,+35° 37′ 15.0″,199,2.07,−1.86
2,Andromeda,γ1 And,02h 03m 53.92s,+42° 19′ 47.5″,355,2.10,−3.08
3,Andromeda,δ And,00h 39m 19.60s,+30° 51′ 40.4″,101,3.27,0.81
4,Andromeda,Andromeda Galaxy,00h 42m 44.31s,+41° 16′ 09.4″,2540000,3.44[3] (nebulous),


### Valeurs manquantes dans la colonne RA

In [8]:
df_missing_RA=df_constWikiScrap_Stars[df_constWikiScrap_Stars['RA'].isnull()]

In [9]:
df_missing_RA['Constellation'].value_counts()

Monoceros    62
Mensa         8
Name: Constellation, dtype: int64

In [10]:
df_constWikiScrap_Stars[df_constWikiScrap_Stars['Constellation']=='Mensa']

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.,abs.mag.
6989,Mensa,α Men,06h 10m 14.20s,−74° 45′ 09.1″,33,5.08,5.05
6990,Mensa,\n,25918,05h 31m 52.66s,2.73,−76° 20′ 30.0″,5.18
6991,Mensa,K4III,33285,23467,5.30,05h 02m 43.00s,−71° 18′ 51.6″
6992,Mensa,642,,54239,−79° 25′ 12.7″,33384,06h 56m 34.48s
6993,Mensa,0.26,κ,,05h 50m 16.80s,40953,27566
6994,Mensa,5.46,η Men,η,22871,,32440
6995,Mensa,−74° 56′ 13.2″,suspected variable\n,μ Men,30612,μ,
6996,Mensa,04h 43m 03.95s,B9IV,\n,,ε Men,ε
6997,Mensa,36039,466,K2/K3III,ζ,\n,ζ Men
6998,Mensa,50506,0.14,404,π Men,A5III,\n


In [11]:
df_constWikiScrap_Stars[df_constWikiScrap_Stars['Constellation']=='Monoceros']

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.,abs.mag.
7092,Monoceros,α Mon,07h 41m 14.88s,−09° 33′ 03.9″,144,3.94,0.71
7093,Monoceros,\n,29651,06h 14m 51.34s,−2.49,−06° 16′ 29.0″,3.99
7094,Monoceros,K3III,55185,34769,4.15,07h 11m 51.86s,−00° 29′ 34.0″
7095,Monoceros,375,,67594,−02° 59′ 01.6″,39863,08h 08m 35.66s
7096,Monoceros,−4.41,8,,06h 23m 46.10s,44769,30419
...,...,...,...,...,...,...,...
7330,Monoceros,5.40,,CW,06h 36m 54.58s,,
7331,Monoceros,17.2,,,,,
7332,Monoceros,−08° 46′ 49.9″,AFGL 961,,,,
7333,Monoceros,06h 34m 37.63s,young stellar object\n,Monoceros R2 IRS 3,,,


In [15]:
df_constWikiScrap_Stars_clensed=df_constWikiScrap_Stars[df_constWikiScrap_Stars['Constellation']!='Mensa']
df_constWikiScrap_Stars_clensed=df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['Constellation']!='Monoceros']
show(metadata(get_dataframe_info(df_constWikiScrap_Stars_clensed),'constWikiScrap_Stars_metaAnalysis_clensed',11500))

### Visibile Magnitude Analysis

In [20]:
df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['vis.mag.'].isnull()].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 145 entries, 646 to 11211
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Constellation  145 non-null    object
 1   Name           145 non-null    object
 2   RA             145 non-null    object
 3   Dec            145 non-null    object
 4   Dist. (ly)     32 non-null     object
 5   vis.mag.       0 non-null      object
 6   abs.mag.       1 non-null      object
dtypes: object(7)
memory usage: 9.1+ KB


In [28]:
df_constWikiScrap_Stars_clensed = df_constWikiScrap_Stars_clensed.drop(columns=['abs.mag.'])
df_constWikiScrap_Stars_clensed = df_constWikiScrap_Stars_clensed[~df_constWikiScrap_Stars_clensed['vis.mag.'].isnull()]
df_constWikiScrap_Stars_clensed = df_constWikiScrap_Stars_clensed[~df_constWikiScrap_Stars_clensed['Dist. (ly)'].isnull()]
show(metadata(get_dataframe_info(df_constWikiScrap_Stars_clensed),'constWikiScrap_Stars_metaAnalysis_clensed',11500))

In [33]:
df_constWikiScrap_Stars_clensed[df_constWikiScrap_Stars_clensed['Name'].isnull()]

Unnamed: 0,Constellation,Name,RA,Dec,Dist. (ly),vis.mag.
50,Andromeda,,00h 37m 21.23s,+35° 23′ 58.2″,1022,5.45
55,Andromeda,,01h 40m 39.56s,+43° 17′ 51.9″,268,5.63
64,Andromeda,,00h 20m 45.54s,+32° 54′ 40.4″,646,5.79
71,Andromeda,,00h 18m 38.22s,+31° 31′ 02.0″,543,5.88
74,Andromeda,,01h 55m 54.47s,+37° 16′ 40.1″,991,5.89
...,...,...,...,...,...,...
11261,Virgo,,13h 17m 15.62s,+13° 40′ 32.3″,298,5.33
11289,Virgo,,13h 12m 32.95s,+11° 33′ 22.2″,811,5.76
11292,Virgo,,13h 09m 12.42s,+10° 01′ 20.9″,352,5.79
11372,Virgo,,13h 24m 30.54s,+12° 25′ 54.3″,538,6.44


### messier metaAnalysis
Il manque des valeurs pour les colonnes Constellation(EN/FR) RA et DEC

In [12]:
df_messier.head()

Unnamed: 0,Messier,NGC,Object type / Type d'objet,Season / Saison,Magnitude,Constellation (EN),Constellation (FR),Constellation (Latin),RA (Right Ascension),Dec (Declinaison),Distance (l.y / a. l.),Size / Dimensions,Discoverer / Découvreur,Year / Année,Image,URL de l'image,Constellation
0,M91,NGC 4548,Galaxy / Galaxie,Spring / Printemps,10,Hair of Berenice,Chevelure de Bérénice,Coma Berenices,12:35:26.45,+14:29:46.8,37000000.0,"5,4' x 4,4'",Messier,1781.0,http://www.lasam.ca/messier/M091.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,Com
1,M75,NGC 6864,Globular Cluster / Amas Globulaire,Summer / Été,8,Archer,Sagittaire,Sagittarius,20:06:04.84,-21:55:20.0,78500.0,"4,6'",Méchain,1780.0,http://www.lasam.ca/messier/M075.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,Sgr
2,M95,NGC 3351,Galaxy / Galaxie,Spring / Printemps,9,Lion,Lion,Leo,10:43:57.70,+11:42:13.7,20300000.0,"7,4' x 5,1'",Méchain,1781.0,http://www.lasam.ca/messier/M095.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,Leo
3,M41,NGC 2287,Open Cluster / Amas Ouvert,Winter / Hiver,4,Great Dog,Grand Chien,Canis Major,06:45:59.94,-20:45:15.2,1600.0,"30,0'",Hodierna,1654.0,http://www.lasam.ca/messier/M041.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,CMa
4,M46,NGC 2437,Open Cluster / Amas Ouvert,Winter / Hiver,6,"Stern,Poop deck",Poupe,Puppis,07:41:46.82,-14:48:36.0,3200.0,"24,0'",Messier,1771.0,http://www.lasam.ca/messier/M046.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,Pup


In [13]:
df_messier[df_messier['RA (Right Ascension)'].isnull()]

Unnamed: 0,Messier,NGC,Object type / Type d'objet,Season / Saison,Magnitude,Constellation (EN),Constellation (FR),Constellation (Latin),RA (Right Ascension),Dec (Declinaison),Distance (l.y / a. l.),Size / Dimensions,Discoverer / Découvreur,Year / Année,Image,URL de l'image,Constellation
18,M102,NGC 5866,Galaxy / Galaxie,Spring / Printemps,10,,,,,,23000000.0,"5,2' x 2,3'",Méchain,1781.0,http://www.lasam.ca/messier/M102.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,
24,M40,Winnecke 4,Double star / Étoile Double,Spring / Printemps,9,,,,,,,,Hevelius,1660.0,http://www.lasam.ca/messier/M040.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,
53,M45,,Open Cluster / Amas Ouvert,Winter / Hiver,1,,,,,,410.0,"120,0'",,,http://www.lasam.ca/messier/M045.JPG,https://www.datastro.eu/api/v2/catalog/dataset...,
