# MOST Imagery Data Analysis

## Initialization

In [1]:
import os
import tarfile

In [2]:
import numpy as np
import pandas as pd

In [3]:
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import GridSearchCV

In [4]:
from astropy.io import fits
from astropy.visualization import astropy_mpl_style

In [5]:
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
plt.style.use(astropy_mpl_style)
%matplotlib inline

## Loading Data

In [7]:
# locate the dataset directory relative to this notebook
notebook_path = os.path.abspath("predict_magnetic_field.ipynb")
tar_dir = os.path.join(os.path.dirname(os.path.dirname(notebook_path)), "datasets/")

In [8]:
exposure_dates = []
images = []

for root, dirs, files in os.walk(tar_dir):
    for f in files:
        if os.path.splitext(f)[1] == ".tar":
            
            # extract .tar file in memory
            with tarfile.open(
                name=os.path.join(root, f),
                mode="r"
            ) as tar_obj:
                
                # for every file in the tar file
                for member in tar_obj.getnames():
                    if os.path.splitext(member)[1] == ".fits":
                        extracted = tar_obj.extractfile(member)
                        
                        # open extracted .fits file
                        with fits.open(extracted) as hdul:
                            header_data = hdul[0].header
                            image_data = hdul[0].data
                            
                            exposure_dates.append(header_data["DATE-OBS"])
                            images.append(image_data)

In [10]:
images

[array([[ 42283,  41806,  41768,  41955,  41813,  41901,  41866,  41846,
          41979,  42503,  42042,  41949,  41791,  41844,  41941,  41966,
          41866,  42074,  41896,  41838],
        [ 41927,  41739,  42003,  41945,  41802,  41996,  41830,  42083,
          42509,  42311,  41911,  41917,  41806,  42084,  42982,  41897,
          41845,  41971,  41981,  42700],
        [ 42148,  41968,  42006,  41892,  42117,  41944,  41878,  41880,
          41978,  42329,  42043,  41888,  41798,  41799,  42257,  41811,
          41858,  41870,  41976,  41940],
        [ 42460,  41813,  41966,  41910,  42390,  42035,  42028,  42145,
          41964,  42070,  42072,  41873,  42497,  41852,  42399,  41836,
          41858,  42068,  42031,  41870],
        [ 41967,  41917,  41969,  41803,  41965,  42037,  42127,  42109,
          42242,  42197,  42181,  41971,  42526,  42052,  42375,  41855,
          42041,  41889,  41866,  42196],
        [ 41825,  41777,  41880,  41955,  41921,  42064,  42

In [9]:
df = pd.DataFrame(images, index=exposure_dates)

ValueError: Must pass 2-d input

In [None]:
df.head()

In [None]:
plt.figure(figsize=(14, 8))
plt.imshow(df, cmap="gray")
plt.colorbar()
plt.show()