In [1]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr
import math as math
from scipy.optimize import curve_fit

In [2]:
mighti_2021  = xr.open_dataset('eqcross_mv05iv06_r00_mti_2021 (1).nc')
ivm_2021  = xr.open_dataset('eqcross_mv05iv06_r00_ivm_2021.nc')

Cannot find the ecCodes library


In [3]:
mighti_2021

In [4]:
ivm_2021

In [5]:
dsm = mighti_2021
dsi = ivm_2021
dm0 = dsm[['u','v', 'q']].sel(dt=0).isel(alt=slice(2,-2)) # Only u, v variables, but also quality (q) variable for possible later filtering
di0 = dsi[['drift_mer','slt', 'time', 'dm_flag', 'rpa_flag']].sel(dt=0) # only use drift_mer variable (but also save other variables for context: local time, actual time, IVM data quality flags)

d = xr.merge((dm0, di0))
d = d.dropna(dim='num', how='any', subset=['u','drift_mer']) # Drop missing data

# Only keep later LTs and data where the ivm quality flag is good
#d = d.where((d.slt > 11) & (d.dm_flag == 0) & (d.rpa_flag == 0), drop=True)
#d = d.where((d.slt > 8) & (d.slt < 16) & (d.q.mean(dim='alt') > 0.95), drop=True)

d = d.where((d.slt > 8) & (d.slt < 16) & (d.q.mean(dim='alt') > 0.95) & (d.dm_flag == 0) & (d.rpa_flag == 0), drop=True)


In [6]:
d

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale 


df_u = d.u.to_dataframe().unstack()['u']
df_drift_mer = d.drift_mer.to_dataframe().unstack()['drift_mer']


In [8]:
#WITHOUT PCA

X = df_u
y = df_drift_mer


#Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

model = LinearRegression(fit_intercept=True)
model.fit(X_train, y_train)
print('Intercept (no PCA) = ', model.intercept_)
#print('Coefficients (no PCA)=', model.coef_)

y_pred_test = model.predict(X_test)
y_pred_train = model.predict(X_train)

from sklearn.metrics import r2_score
r2_train_sk = r2_score(y_train, y_pred_train)
r2_test_sk = r2_score(y_test, y_pred_test)

print('r^2 for Training Set Using SKLearn', r2_train_sk)
print('r^2 for Test Set Using SKLearn', r2_test_sk)


Intercept (no PCA) =  7.562643684558019
r^2 for Training Set Using SKLearn 0.36326234856359874
r^2 for Test Set Using SKLearn 0.2825754130899647


In [10]:
#WITH PCA
X = df_u
y = df_drift_mer


#Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(X_train)

X_reduced_train = sc.transform(X_train)
X_reduced_test = sc.transform(X_test)

pca = PCA()
pca.fit(X_reduced_train)
X_train_pca = pca.transform(X_reduced_train)
X_test_pca = pca.transform(X_reduced_test)


lin_reg_pc = LinearRegression(fit_intercept=True)
lin_reg_pc.fit(X_train_pca, y_train)

print('Intercept (PCA) = ', lin_reg_pc.intercept_)
print('Coefficients (PCA)=', lin_reg_pc.coef_)

y_pred_train_pca = lin_reg_pc.predict(X_train_pca) 
y_pred_test_pca = lin_reg_pc.predict(X_test_pca)

from sklearn.metrics import r2_score
r2_train_sk = r2_score(y_train, y_pred_train_pca)
r2_test_sk = r2_score(y_test, y_pred_test_pca)

print('r^2 for Training Set Using SKLearn', r2_train_sk)
print('r^2 for Test Set Using SKLearn', r2_test_sk)


Intercept (PCA) =  15.502096809558019
Coefficients (PCA)= [ 5.99049452e-01  7.35514893e-01  5.76279464e-01  6.21704738e-01
 -1.15740382e+00  2.21730105e+00 -5.42488144e-01  3.12420874e-01
 -3.97717517e-01 -2.15090372e-01 -9.90817487e-01  1.31924517e+00
  1.21331408e-01  2.11870466e+00 -1.08602828e+00  6.59463821e-01
  1.93126043e+00  7.08027937e-01  8.53952420e-02  9.18200444e-01
  1.16010860e+00 -2.36652646e+00 -9.35723086e-01 -1.10726899e+00
  8.01756447e-01  3.50370310e-01  6.74938037e-01  2.55919482e+00
 -1.59749988e-01  2.25644148e+00  3.27771400e+00  1.37276739e+00
  4.66394201e-01 -2.84123749e-01 -8.79790272e-01  5.32764772e-01
 -1.38874899e+00  1.27589467e+00  1.75752652e+00  6.94206902e-01
  3.35145120e+00  3.82933962e+00  4.21884749e-15 -1.83186799e-15
 -2.99760217e-15 -4.85028684e-15  4.44089210e-16  1.85962357e-15
  4.21884749e-15  1.83880688e-15 -2.91433544e-15 -2.52575738e-15
 -2.62290190e-15 -1.72084569e-15 -1.47104551e-15  1.27675648e-15
  9.15933995e-16  2.94902991e-17