### This script performs a PCA (R-verified)

In [1]:
%load_ext rpy2.ipython
%config InlineBackend.figure_format = 'retina'

import pandas as pd
from sklearn.decomposition import FactorAnalysis
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import numpy as np

import warnings
warnings.filterwarnings('ignore')

#### set directory and import dataset

In [2]:
datadir = '/Volumes/Maxtor/phdresearch/som_new/'
outdir = '/Volumes/Maxtor/phdresearch/som_new/'

In [3]:
data = pd.read_csv(datadir + 'SAfr_700hpa_era_newAB.csv',names = ['0,0','0,1','0,2','1,0','1,1','1,2','2,0','2,1','2,2','3,0','3,1'
         ,'3,2','MEI_AVG','AAO_AVG','ITCZInt_AVG','MJO_AVG','DMI_AVG'])

#### select specific columns in dataset

In [4]:
df = data.iloc[:,0:15]

In [5]:
type(df); df.head()

Unnamed: 0,"0,0","0,1","0,2","1,0","1,1","1,2","2,0","2,1","2,2","3,0","3,1","3,2",MEI_AVG,AAO_AVG,ITCZInt_AVG
JFM80,0,0,6,0,5,9,8,5,13,12,20,13,0.652,-0.95,6.093
FMA80,0,2,6,4,6,13,7,5,9,11,16,11,0.729,-1.491,6.427
MAM80,8,9,10,7,3,12,6,6,8,8,8,7,0.853,-1.324,6.113
AMJ80,15,15,9,9,6,13,6,6,5,6,0,1,0.925,-0.754,5.903
MJJ80,15,15,20,6,7,16,3,5,5,0,0,0,0.872,-0.712,6.273


#### extract values at each node of the SOM frequency dataset

In [6]:
nodes = ['0,0','0,1','0,2','1,0','1,1','1,2','2,0','2,1','2,2','3,0','3,1'
         ,'3,2','MEI_AVG','AAO_AVG','ITCZInt_AVG']

## Separating out the nodes
x = df.loc[:, nodes].values

# Standardizing the nodes
x = StandardScaler().fit_transform(x)

In [7]:
type (x); x.shape      #x type is a numpy array

(406, 15)

#### perform factor analysis with varimax rotation

In [8]:
era = FactorAnalysis(n_components=3, rotation='varimax').fit(x)

In [9]:
era.get_covariance();

In [10]:
era.components_.T

array([[-0.81502447, -0.03480992,  0.02812297],
       [-0.82913934,  0.11197453,  0.19759255],
       [-0.68419625,  0.36594673,  0.29815912],
       [-0.6794119 ,  0.05111988, -0.28662233],
       [-0.30974335,  0.4438227 , -0.29076044],
       [-0.00837923,  0.72039307,  0.03095742],
       [-0.11678019, -0.09102721, -0.68182675],
       [ 0.4544653 ,  0.09565417, -0.4286571 ],
       [ 0.66459206,  0.2858315 ,  0.19576088],
       [ 0.54842232, -0.55911666, -0.10681822],
       [ 0.73261859, -0.48129461,  0.19716769],
       [ 0.74314022, -0.2576395 ,  0.29560811],
       [-0.23096913,  0.54929702,  0.0923048 ],
       [ 0.06120481, -0.06960255,  0.23025851],
       [ 0.09859103, -0.19750385, -0.0816199 ]])

In [17]:
era_scores = era.transform(x)

In [12]:
#scoresy = era.fit_transform(x); print(scoresy)

In [18]:
df_scores = pd.DataFrame(era_scores,columns = ['RC1', 'RC2','RC3'])
df_scores

Unnamed: 0,RC1,RC2,RC3
0,1.156191,-0.060210,0.422885
1,0.844915,0.176111,0.122292
2,-0.007165,0.132582,0.319812
3,-0.766558,0.268395,0.047018
4,-0.901574,0.825561,0.736015
5,-0.679149,1.189052,0.916992
6,-0.630750,0.894283,0.483377
7,-1.006724,0.040748,0.280071
8,-0.740854,-0.348403,-1.077929
9,-0.009384,-0.601458,-0.970929


In [15]:
mat_loc = '/Volumes/Maxtor/CMIP5_models/cmip5_smhi/cmip_trbi/roll_freq_matrix/'

In [None]:
mat_rix =  pd.read_csv(mat_loc + 'zg700_day_CanESM2_rcp85_r1i1p1_19800101-20131231_matrix.csv'