# Final project

In [22]:
# Importing dependencies

import os
from glob import glob
import itertools

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go


In [23]:
# QNPy dependencies

import QNPy #Importing the package

from QNPy import Preprocess as pr #Importing Preprocess module from the package
from QNPy.Preprocess import transform #importing the funcion transform for transformation the data
from QNPy.Preprocess import * #importing all external packages from Preprocess

In [24]:
import pandas as pd
import os

# Path to the folder containing the files
folder_path = 'kriveu/'

# Get a list of all files in the folder
all_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

all_dataframes = []

# Read each file into a DataFrame and store it in the list
for file_name in all_files:
    try:
        # Construct the full file path
        file_path = os.path.join(folder_path, file_name)
        
        # Read the DataFrame and create a variable with the file name
        df = pd.read_csv(file_path)
        globals()['lc_' + os.path.splitext(file_name)[0]] = df
        
        # Add the DataFrame to the list
        all_dataframes.append(df)
    except Exception as e:
        print(f"Error reading {file_name}: {e}")


In [25]:
# Get the shape of all dataframes

for i, df in enumerate(all_dataframes):
    print(f"DataFrame {i + 1}: {df.shape}")

DataFrame 1: (136, 3)
DataFrame 2: (117, 3)
DataFrame 3: (113, 3)
DataFrame 4: (111, 3)
DataFrame 5: (142, 3)
DataFrame 6: (120, 3)
DataFrame 7: (107, 3)
DataFrame 8: (125, 3)
DataFrame 9: (115, 3)
DataFrame 10: (142, 3)
DataFrame 11: (151, 3)
DataFrame 12: (122, 3)
DataFrame 13: (119, 3)
DataFrame 14: (144, 3)
DataFrame 15: (131, 3)
DataFrame 16: (109, 3)


We can see that our dataframes are of different shapes.
Their first dimension differs which means they are not equal in the number of rows which represent the number of timestamps. This means we will have to **SOMETHING**


In [26]:
# Visualizing lightcurves

for i, df in enumerate(all_dataframes):
    fig = go.Figure()

    trace = go.Scatter(x=df['mjd'], y=df['mag'], mode='markers', name='mag', marker=dict(size=4))

    error_bars = go.Scatter(
        x=df['mjd'],
        y=df['mag'],
        error_y=dict(
            type='data',
            array=df['magerr'],
            visible=True
        ),
        mode='markers',
        marker=dict(size=4),
        name='mag with error bars'
    )

    fig.add_trace(trace)
    fig.add_trace(error_bars)

    fig.update_xaxes(title_text='MJD (Modified Julian Date)')
    fig.update_yaxes(title_text='Magnitude')

    fig.update_layout(title_text=f"Time Series with Error Bars - Plot {i + 1}", showlegend=True)
    fig.show()


In [28]:
# Concatenating all the files 

data = pd.concat(all_dataframes, ignore_index=True)

In [27]:
# Visualize all light curves on one graph

fig = px.scatter()

color_cycle = itertools.cycle(px.colors.qualitative.Set1)

for i, df in enumerate(all_dataframes):
    trace = go.Scatter(
        x=df['mjd'],
        y=df['mag'],
        mode='markers',
        name=f'Plot {i + 1}',
        marker=dict(size=4, color=next(color_cycle))
    )

    error_bars = go.Scatter(
        x=df['mjd'],
        y=df['mag'],
        error_y=dict(
            type='data',
            array=df['magerr'],
            visible=True
        ),
        mode='markers',
        marker=dict(size=4, color=next(color_cycle)),
        showlegend=False
    )

    fig.add_trace(trace)
    fig.add_trace(error_bars)

fig.update_xaxes(title_text='MJD (Modified Julian Date)')
fig.update_yaxes(title_text='Magnitude')

fig.update_layout(title_text="Time Series with Error Bars - All Plots", showlegend=True)
fig.show()

In [28]:
# Visualizing box and whisker plots and histograms for all individual light curves

for i, df in enumerate(all_dataframes):
    fig = px.histogram(df, x='mag', marginal='box', nbins=30, title=f"KDE Plot for 'mag' - Plot {i + 1}")

    fig.update_xaxes(title_text='Magnitude')
    fig.update_yaxes(title_text='Density')

    fig.show()

In [20]:
import pandas as pd
import os
from glob import glob

# Path to the folder containing the files
folder_path = 'kriveu/'

# Get a list of all files in the folder
all_files = glob(os.path.join(folder_path, '*'))

all_dataframes = []

# Read each file into a DataFrame and store it in the list
for file_path in all_files:
    try:
        # Extract the file name without extension
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        
        # Read the DataFrame
        df = pd.read_csv(file_path)
        
        # Add the DataFrame to the list
        all_dataframes.append(df)
    except Exception as e:
        print(f"Error reading {file_path}: {e}")

# Calculate describe for each DataFrame excluding 'mjd'
for i, df in enumerate(all_dataframes):
    # Exclude the 'mjd' column
    df_describe = df.drop('mjd', axis=1).describe()
    
    # Display the describe statistics
    print(f"DataFrame {i + 1} - {all_files[i]}:")
    print(df_describe)
    print("\n" + "="*30 + "\n")


DataFrame 1 - kriveu/1397099:
              mag      magerr
count  136.000000  136.000000
mean    21.218165    0.182378
std      0.248631    0.099913
min     20.577721    0.075628
25%     21.030034    0.115389
50%     21.206480    0.152943
75%     21.379022    0.206686
max     22.071302    0.663094


DataFrame 2 - kriveu/1397703:
              mag      magerr
count  117.000000  117.000000
mean    20.545220    0.096162
std      0.185631    0.067997
min     19.967167    0.048207
25%     20.439396    0.062171
50%     20.549681    0.075529
75%     20.650959    0.111715
max     21.177103    0.679570


DataFrame 3 - kriveu/1461135:
              mag      magerr
count  113.000000  113.000000
mean    20.083718    0.072061
std      0.337188    0.039216
min     19.377098    0.036599
25%     19.875856    0.047516
50%     20.046000    0.056554
75%     20.203922    0.081622
max     21.741570    0.250659


DataFrame 4 - kriveu/1385693:
              mag      magerr
count  111.000000  111.000000
mean