# Eigenfaces

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition._factor_analysis import _ortho_rotation
# Load data
lfw_dataset = fetch_lfw_people(min_faces_per_person=100)
_, h, w = lfw_dataset.images.shape
X = lfw_dataset.data
y = lfw_dataset.target
target_names = lfw_dataset.target_names
# split into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
# Compute a PCA 
n_components = 10
pca = PCA(n_components=n_components, whiten=True).fit(X_train)
# apply PCA transformation
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

In [None]:

def plot_gallery(images, height, width, rows=3, cols=4, titles=None):
    """Plot an iterable of images as tiles."""
    fig = plt.figure()
    for i in range(rows * cols):
        plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i].reshape((height, width)))
        if titles is not None:
            plt.title(titles[i])
        plt.xticks(())
        plt.yticks(())
    plt.tight_layout()
    #return fig

def plot_pca_components_gallery(pca, height, width, rows, cols):
    titles = [f"EVR: {evr:.2f}" for evr in pca.explained_variance_ratio_]
    plot_gallery(pca.components_, height=height, width=width, rows=rows, cols=cols, titles=titles)
    #return fig
        
def weight_by_longitude(data, longitude_dimension_name="longitude"):
    data_tmp = data.copy()
    data_tmp["longitudinal_weights"] = 1/np.cos(_degrees_to_radians(data_tmp[longitude_dimension_name]))
    data_tmp *= data_tmp["longitudinal_weights"]
    return data_tmp
    
def _degrees_to_radians(d):
    return np.radians(d)

def plot_pca_components_gallery(pca, height, width, rows, cols):
    titles = [f"EVR: {evr:.2f}" for evr in pca.explained_variance_ratio_]
    fig = plot_gallery(pca.components_, height=height, width=width, rows=rows, cols=cols, titles=titles)
    return fig

In [None]:
plot_gallery(X_train[:16], h,w, rows=4, cols=4)

In [None]:
plot_gallery([pca.mean_], height=h,width=w,rows=1,cols=1)

In [None]:
_ = plot_pca_components_gallery(pca, h, w, 3, 3)

In [None]:
plot_gallery(pca.inverse_transform(X_train_pca[:16]), h, w, rows=4, cols=4)

In [None]:
plot_gallery(X_train[:16], h,w, rows=4, cols=4)

In [None]:
rotated_faces = _ortho_rotation(pca.components_.T)

In [None]:
plot_gallery(rotated_faces[:16], h,w, rows=3, cols=3)

In [None]:
plt.plot(pca.transform(lfw_dataset.images.reshape(-1,62*47))[:,0],pca.transform(lfw_dataset.images.reshape(-1, 62*47))[:,1], '.')
#pca.transform(lfw_dataset.images)
lfw_dataset.images.shape

In [None]:
import climetlab as cml
weather_ml = cml.load_dataset("maelstrom-weather-model-level", date="2019-01-02")
data = weather_ml.to_xarray()


In [None]:
data

In [None]:
data["u"].isel({"time": 0, "level":9}).plot()

In [None]:
data_fixed_level = data.isel({"level":9})

In [None]:
data_fixed_level

In [None]:
temperatures = data_fixed_level["t"]

In [None]:
temperatures_flattened = temperatures.transpose("time", ...).values.reshape(24, 551*351)

In [None]:
temperatures_flattened.shape

In [None]:
pca_temperature = PCA(n_components=9)

In [None]:
pca_temperature.fit(temperatures_flattened)

In [None]:
pca_temperature.components_.shape

In [None]:
%matplotlib notebook
plot_pca_components_gallery(pca_temperature, 351, 551, rows=3, cols=3)

In [None]:
plt.plot(pca_temperature.transform(temperature)[:,0], pca_temperature.transform(temperature)[:,1],'.')

In [None]:
plot_gallery([pca_temperature.mean_], 351, 551, rows=1, cols=1)

In [None]:
plot_gallery(pca_temperature.inverse_transform(pca_temperature.transform(temperatures_flattened)).reshape(24, 351, 551), 351, 551, rows=4, cols=6)

In [None]:
plot_gallery(temperatures_flattened.reshape(24, 351, 551), 351, 551, rows=4, cols=6)

In [None]:
import xarray as xr
datafiles = [f"ml_20190{i}01_00.nc" for i in range(1,7)]
ds = xr.open_mfdataset(datafiles)

In [None]:
ds_fixed_level = ds.isel({"level":1})

In [None]:
temperature = ds_fixed_level.transpose("time", ...)["t"].values.reshape(294, 551*351)

In [None]:
pca_t = PCA(n_components=16)
pca_t.fit(temperature)

In [None]:
plot_gallery(temperature[:4], 351, 551, rows=2, cols=2)

In [None]:
plot_pca_components_gallery(pca_t, 351, 551, rows=4, cols=4)

In [None]:
plot_gallery([pca_t.mean_], 351, 551, rows=1, cols=1)

# TODO 
 

- Compute explained variance from rotation

In [None]:
ds_weighted = weight_by_longitude(ds)

In [None]:
ds_weighted_fixed_level = ds_weighted.isel({"level":1})

In [None]:
temperature_weighted = ds_weighted_fixed_level.transpose("time", ...)["t"].values.reshape(294, 551*351)

In [None]:
pca_t_weighted = PCA(n_components=16)
pca_t_weighted.fit(temperature_weighted)

In [None]:
plot_pca_components_gallery(pca_t_weighted, 351, 551, rows=4, cols=4)

In [None]:
rotated = _ortho_rotation(pca_t_weighted.components_.T)

In [None]:
plot_gallery(rotated, 351, 551, rows=4, cols=4)