# Imports and Loads

In [1]:
from base import *
from scipy import stats as st

In [2]:
ATLANTICO_NORTE = AtlanticoNorte()
ATLANTICO_SUL = AtlanticoSul()
PACIFICO_SUL = PacificoSul()
NINO = Nino()
COORDENADAS = Coordenadas()
PRECIPITACAO = Precipitacao()

# Check Data Structures

In [3]:
def check_dimensions(datas):
    for r in datas:
        print("{}   -  Rows: {}   Columns: {}".format(r.name, r.shape[0], r.shape[1]))
        print()

In [4]:
def check_nan_fields(datas):
    for r in datas:
        print(str(r.name)+"  -  "+str(sum(r.isna().sum().values)))
        print()

In [5]:
check_nan_fields([ATLANTICO_NORTE.data, ATLANTICO_SUL.data, PACIFICO_SUL.data, NINO.data, COORDENADAS.data, PRECIPITACAO.data])

ATLANTICO_NORTE  -  0

ATLANTICO_SUL  -  0

PACIFICO_SUL  -  0

NINO  -  0

COORDENADAS  -  0

PRECIPITACAO  -  0



In [6]:
check_dimensions([ATLANTICO_NORTE.data, ATLANTICO_SUL.data, PACIFICO_SUL.data, NINO.data, COORDENADAS.data, PRECIPITACAO.data])

ATLANTICO_NORTE   -  Rows: 468   Columns: 20

ATLANTICO_SUL   -  Rows: 468   Columns: 40

PACIFICO_SUL   -  Rows: 468   Columns: 35

NINO   -  Rows: 468   Columns: 5

COORDENADAS   -  Rows: 24   Columns: 3

PRECIPITACAO   -  Rows: 468   Columns: 25



In [7]:
PRECIPITACAO.data['1'].value_counts()

NORMAL                    134
MUITO ABAIXO DO NORMAL    100
ABAIXO DO NORMAL           98
ACIMA DO NORMAL            86
MUITO ACIMA DO NORMAL      50
Name: 1, dtype: int64

# Approach I

- Correlate TSM Areas x PRM Areas, considering times shifts from 0 to 12 Months and select vars with the highest correlation per area.

- In: TSM Areas transformed in vars with time shifts. Example: TSM-AN(3) - Temperature Media in North Atlantic displaced 3 months in time since the first date of the registers 1982-01-01.

- OUT: Class of Precipitation in 24 areas. Ex: PRM1 - NORMAL



In [8]:
ATLANTICO_NORTE.transpose_to_time()
ATLANTICO_SUL.transpose_to_time()
PACIFICO_SUL.transpose_to_time()
NINO.transpose_to_time()
PRECIPITACAO.transpose_to_time()

In [9]:
PRECIPITACAO.encoder()

In [10]:
PRECIPITACAO.data

Unnamed: 0,1982-01-01,1982-02-01,1982-03-01,1982-04-01,1982-05-01,1982-06-01,1982-07-01,1982-08-01,1982-09-01,1982-10-01,...,2020-03-01,2020-04-01,2020-05-01,2020-06-01,2020-07-01,2020-08-01,2020-09-01,2020-10-01,2020-11-01,2020-12-01
1,2,0,-1,1,-2,-1,-1,1,1,1,...,-1,-1,0,0,-2,1,-2,-2,2,-1
2,2,0,-2,1,-2,-1,-1,1,1,1,...,-2,0,0,0,-1,1,-2,-2,2,-1
3,2,0,-1,1,-1,-1,-1,0,0,0,...,-2,0,0,0,-1,1,-2,-2,2,0
4,1,1,-2,0,0,-1,-2,-1,-1,0,...,0,0,0,-1,0,1,-1,-2,2,-1
5,1,1,-2,0,0,0,-2,-1,-1,0,...,0,0,-1,-1,0,0,0,-1,1,-1
6,2,0,-1,0,0,1,-2,0,0,0,...,-1,-1,-1,-1,1,0,-1,0,1,-2
7,2,0,-1,1,-2,0,-1,1,1,0,...,0,0,0,-1,-2,-1,-1,-1,2,-2
8,2,0,-2,1,-2,0,0,1,1,-1,...,-1,-1,0,0,-2,0,-2,-2,2,-2
9,2,0,-2,1,-1,0,-1,0,0,-1,...,0,-1,0,-1,-2,0,-2,-2,2,-2
10,1,0,-2,0,-2,0,-2,-1,0,-1,...,0,0,-1,-1,-1,0,0,-1,1,-1


In [11]:
def time_lag(x,y, delta,ocean):
    x = x[x.columns[:len(x.columns)-delta]]#.loc[x.index[index]]#ATLANTICO_NORTE.data[ATLANTICO_NORTE.data.columns[:len(ATLANTICO_NORTE.data.columns)-12]].loc[ATLANTICO_NORTE.data.index[0]]
    y = y[y.columns[delta:]]
    lags = {}
    for i in x.index:
        lags["TSM-"+ocean+x.loc[x.index[int(i)-1]].name+"("+str(delta)+")"] = x.loc[x.index[int(i)-1]].values
    for r in y.index:
        lags["PRM"+y.loc[y.index[int(r)-1]].name] = y.loc[y.index[int(r)-1]].values 
    return pd.DataFrame(lags)

In [12]:
## Deslocamentos

def generate_displacement(data_in,data_out,ocean_flag):
    flags = {"ATLN": "AN", "ATLS": "AS", "PCS": "PS", "NINO": "NI"}
    out_columns = ["PRM1","PRM2","PRM3","PRM4","PRM5","PRM6","PRM7","PRM8","PRM9","PRM10","PRM11",
                 "PRM12","PRM13","PRM14","PRM15","PRM16","PRM17","PRM18","PRM19","PRM20","PRM21",
                 "PRM22","PRM23","PRM24"]
    DELTA = {}
    for r in range(1,13):
        x = data_in[data_in.columns[:len(data_in.columns) - r]].T
        y = data_out[data_out.columns[r:]].T
        columns = []
        for c in x.columns:
            columns.append("TSM-"+flags[ocean_flag]+str(c)+"("+str(r)+")")
        x.columns = columns
        y.columns = out_columns
        DELTA["DELTA "+str(r)] = {"X": x, "y": y}  
    return DELTA

In [13]:
atln = generate_displacement(ATLANTICO_NORTE.data, PRECIPITACAO.data, "ATLN")
atls = generate_displacement(ATLANTICO_SUL.data, PRECIPITACAO.data, "ATLS")
pcs = generate_displacement(PACIFICO_SUL.data, PRECIPITACAO.data, "PCS")
nino = generate_displacement(NINO.data, PRECIPITACAO.data, "NINO")

In [22]:
atln['DELTA 12']['X']

Unnamed: 0,TSM-AN1(12),TSM-AN2(12),TSM-AN3(12),TSM-AN4(12),TSM-AN5(12),TSM-AN6(12),TSM-AN7(12),TSM-AN8(12),TSM-AN9(12),TSM-AN10(12),TSM-AN11(12),TSM-AN12(12),TSM-AN13(12),TSM-AN14(12),TSM-AN15(12),TSM-AN16(12),TSM-AN17(12),TSM-AN18(12),TSM-AN19(12)
1982-01-01,26.0098,25.3862,24.3421,22.973,26.8484,26.1652,25.2629,24.6777,26.8626,26.7427,26.4668,26.604,27.7325,27.284,27.3158,27.503,28.2153,28.3471,28.4736
1982-02-01,25.4937,24.6246,23.706,22.2753,26.4223,25.4757,24.638,23.7625,26.7571,26.2851,26.1257,25.9726,26.011,27.3053,27.2771,27.7486,28.5934,28.5578,28.8561
1982-03-01,25.3498,24.4863,23.534,22.1519,26.3487,25.4243,24.6487,23.6527,26.7935,26.8084,26.4926,26.1522,26.1699,27.3521,27.534,28.1318,28.897,29.1426,29.1116
1982-04-01,25.779,24.651,23.4159,22.3043,26.6982,25.3215,24.6814,23.611,27.1823,26.8794,26.5401,26.2872,27.1713,27.6002,27.6694,28.0068,28.8802,29.1436,28.6862
1982-05-01,26.6424,25.5152,24.1482,23.0335,27.3998,26.17,25.3244,24.5587,27.5308,27.3391,26.938,27.0607,27.8449,27.7344,27.6691,28.0665,28.1371,28.0639,27.7348
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-01,28.3878,27.2397,26.1895,25.2551,28.6688,27.7191,27.2307,27.1386,28.9099,28.6232,27.7557,27.3673,26.8774,27.8133,27.2798,26.4639,25.835,25.6514,26.2485
2019-09-01,28.9423,27.9556,26.9795,26.4659,29.2131,28.6041,27.9552,27.7709,29.3495,29.1864,28.453,27.957,27.3123,27.9983,27.618,26.9439,26.4558,25.8576,26.6755
2019-10-01,29.0423,27.9018,27.0465,26.5341,29.127,28.5396,27.9117,28.1049,29.2269,29.2897,29.0279,28.8493,28.2492,28.2514,28.0478,27.964,27.2641,26.4353,27.1444
2019-11-01,27.9465,26.9379,26.3661,25.4565,28.5828,27.3883,27.0997,27.2374,28.9635,28.2067,28.3246,28.5011,29.0431,27.8961,27.9979,28.2904,28.1149,28.0067,28.1491


In [23]:
atln['DELTA 12']['y']

Unnamed: 0,PRM1,PRM2,PRM3,PRM4,PRM5,PRM6,PRM7,PRM8,PRM9,PRM10,...,PRM15,PRM16,PRM17,PRM18,PRM19,PRM20,PRM21,PRM22,PRM23,PRM24
1983-01-01,-2,-2,-1,-1,-1,-1,-1,-1,0,0,...,0,0,0,1,-1,-1,0,0,1,2
1983-02-01,0,0,-1,-2,-2,-1,0,0,-1,-1,...,-1,-1,0,0,0,0,-1,-2,-1,0
1983-03-01,0,0,0,-1,-1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1983-04-01,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,...,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2
1983-05-01,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,...,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-01,1,1,1,1,0,0,-1,0,0,0,...,0,1,0,0,-2,-1,0,1,1,0
2020-09-01,-2,-2,-2,-1,0,-1,-1,-2,-2,0,...,-2,0,2,1,-2,-2,-2,-2,-1,-2
2020-10-01,-2,-2,-2,-2,-1,0,-1,-2,-2,-1,...,-2,-2,-1,-1,-1,-2,-2,-2,-2,-2
2020-11-01,2,2,2,2,1,1,2,2,2,1,...,1,2,2,2,0,1,1,2,2,1


In [14]:
def generate_correlation_tables(data):
    CORR = {}
    indices = ["PRM1","PRM2","PRM3","PRM4","PRM5","PRM6","PRM7","PRM8","PRM9","PRM10","PRM11",
                 "PRM12","PRM13","PRM14","PRM15","PRM16","PRM17","PRM18","PRM19","PRM20","PRM21",
                 "PRM22","PRM23","PRM24"]
    for k in data.keys():
        var = {}
        for x in data[k]['X'].columns:
            var[x] = []
            for y in data[k]['y'].columns:
                c = st.kendalltau(data[k]['X'][x],data[k]['y'][y])[0]
                var[x].append(c)
        CORR[k] = pd.DataFrame(var).T
        CORR[k].columns = indices
    return CORR


def concat_data_frames(dfs):
    df_list = []
    for k in dfs.keys():
        df_list.append(dfs[k])
    return pd.concat(df_list)

In [15]:
corr_atln = concat_data_frames(generate_correlation_tables(atln))
corr_atls = concat_data_frames(generate_correlation_tables(atls))
corr_pcs = concat_data_frames(generate_correlation_tables(pcs))
corr_nino = concat_data_frames(generate_correlation_tables(nino))

final_corr = pd.concat([corr_atln,corr_atls,corr_pcs,corr_nino])

In [13]:
final_corr.to_excel('correlacoes.xlsx')

In [16]:
final_corr

Unnamed: 0,PRM1,PRM2,PRM3,PRM4,PRM5,PRM6,PRM7,PRM8,PRM9,PRM10,...,PRM15,PRM16,PRM17,PRM18,PRM19,PRM20,PRM21,PRM22,PRM23,PRM24
TSM-AN1(1),-0.068730,-0.076285,-0.068967,-0.061169,-0.052251,-0.059741,-0.075599,-0.095640,-0.097642,-0.072605,...,-0.097927,-0.080971,-0.057035,-0.061845,-0.100377,-0.111970,-0.118749,-0.095077,-0.084337,-0.071059
TSM-AN2(1),-0.045166,-0.052866,-0.053818,-0.049981,-0.047094,-0.057891,-0.061027,-0.073054,-0.078387,-0.061887,...,-0.079778,-0.074252,-0.055709,-0.057311,-0.088684,-0.095017,-0.097572,-0.085923,-0.082495,-0.070447
TSM-AN3(1),-0.032995,-0.039821,-0.037851,-0.039738,-0.035884,-0.046036,-0.046497,-0.058183,-0.064620,-0.049476,...,-0.072595,-0.067337,-0.050355,-0.055013,-0.082350,-0.088810,-0.085110,-0.084492,-0.078988,-0.066250
TSM-AN4(1),-0.032477,-0.037098,-0.035862,-0.037735,-0.025476,-0.028208,-0.043766,-0.056844,-0.063075,-0.043744,...,-0.070699,-0.065171,-0.039669,-0.045312,-0.079387,-0.088147,-0.084011,-0.083239,-0.071625,-0.054354
TSM-AN5(1),-0.050935,-0.066587,-0.061175,-0.060380,-0.052927,-0.066556,-0.062792,-0.083718,-0.092163,-0.072127,...,-0.094061,-0.082173,-0.057552,-0.057498,-0.088891,-0.107306,-0.113633,-0.090101,-0.079744,-0.069805
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TSM-NI4(11),-0.075271,-0.095248,-0.061786,-0.038129,-0.053362,-0.041522,-0.051848,-0.065297,-0.044384,-0.037350,...,-0.059873,-0.055545,-0.062195,-0.050203,-0.073123,-0.096280,-0.105068,-0.042104,-0.037558,-0.040035
TSM-NI1(12),-0.015333,-0.022836,-0.005347,0.008024,-0.003774,-0.009890,-0.012138,-0.010155,-0.002416,-0.006644,...,0.001566,-0.013185,-0.024622,0.010019,0.015370,0.017690,0.001893,0.011261,0.014733,0.016067
TSM-NI2(12),-0.043760,-0.059188,-0.030518,-0.005032,-0.020446,-0.004729,-0.031707,-0.030704,-0.009165,-0.006668,...,-0.014303,-0.016171,-0.025890,-0.004717,-0.023846,-0.026794,-0.039097,0.002968,0.007315,0.002144
TSM-NI3(12),-0.122374,-0.132230,-0.097858,-0.067768,-0.081622,-0.064172,-0.121186,-0.118916,-0.101136,-0.086089,...,-0.125013,-0.100783,-0.091706,-0.101051,-0.155425,-0.178524,-0.175157,-0.115082,-0.088343,-0.114029


In [22]:
abs(final_corr)

Unnamed: 0,PRM1,PRM2,PRM3,PRM4,PRM5,PRM6,PRM7,PRM8,PRM9,PRM10,...,PRM15,PRM16,PRM17,PRM18,PRM19,PRM20,PRM21,PRM22,PRM23,PRM24
TSM-AN1(1),0.068730,0.076285,0.068967,0.061169,0.052251,0.059741,0.075599,0.095640,0.097642,0.072605,...,0.097927,0.080971,0.057035,0.061845,0.100377,0.111970,0.118749,0.095077,0.084337,0.071059
TSM-AN2(1),0.045166,0.052866,0.053818,0.049981,0.047094,0.057891,0.061027,0.073054,0.078387,0.061887,...,0.079778,0.074252,0.055709,0.057311,0.088684,0.095017,0.097572,0.085923,0.082495,0.070447
TSM-AN3(1),0.032995,0.039821,0.037851,0.039738,0.035884,0.046036,0.046497,0.058183,0.064620,0.049476,...,0.072595,0.067337,0.050355,0.055013,0.082350,0.088810,0.085110,0.084492,0.078988,0.066250
TSM-AN4(1),0.032477,0.037098,0.035862,0.037735,0.025476,0.028208,0.043766,0.056844,0.063075,0.043744,...,0.070699,0.065171,0.039669,0.045312,0.079387,0.088147,0.084011,0.083239,0.071625,0.054354
TSM-AN5(1),0.050935,0.066587,0.061175,0.060380,0.052927,0.066556,0.062792,0.083718,0.092163,0.072127,...,0.094061,0.082173,0.057552,0.057498,0.088891,0.107306,0.113633,0.090101,0.079744,0.069805
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TSM-NI4(11),0.075271,0.095248,0.061786,0.038129,0.053362,0.041522,0.051848,0.065297,0.044384,0.037350,...,0.059873,0.055545,0.062195,0.050203,0.073123,0.096280,0.105068,0.042104,0.037558,0.040035
TSM-NI1(12),0.015333,0.022836,0.005347,0.008024,0.003774,0.009890,0.012138,0.010155,0.002416,0.006644,...,0.001566,0.013185,0.024622,0.010019,0.015370,0.017690,0.001893,0.011261,0.014733,0.016067
TSM-NI2(12),0.043760,0.059188,0.030518,0.005032,0.020446,0.004729,0.031707,0.030704,0.009165,0.006668,...,0.014303,0.016171,0.025890,0.004717,0.023846,0.026794,0.039097,0.002968,0.007315,0.002144
TSM-NI3(12),0.122374,0.132230,0.097858,0.067768,0.081622,0.064172,0.121186,0.118916,0.101136,0.086089,...,0.125013,0.100783,0.091706,0.101051,0.155425,0.178524,0.175157,0.115082,0.088343,0.114029


In [83]:
def selected_nvars(data, n):
    columns = data.columns
    selected = {}
    for r in columns:
        data_abs = abs(data)
        indices = list(data_abs.sort_values(by=[r], ascending=False)[r].head(n).index)
        selected[r] = data.T[indices].columns
    return pd.DataFrame(selected) 
selected_nvars(final_corr,5)

Unnamed: 0,PRM1,PRM2,PRM3,PRM4,PRM5,PRM6,PRM7,PRM8,PRM9,PRM10,...,PRM15,PRM16,PRM17,PRM18,PRM19,PRM20,PRM21,PRM22,PRM23,PRM24
0,TSM-NI3(1),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),...,TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(11),TSM-NI3(11),TSM-NI3(6),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4)
1,TSM-NI3(3),TSM-NI3(3),TSM-NI3(3),TSM-NI3(3),TSM-NI3(3),TSM-NI3(6),TSM-NI3(3),TSM-NI3(3),TSM-NI3(3),TSM-NI3(5),...,TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(6),TSM-NI3(10),TSM-NI3(4),TSM-NI3(3),TSM-NI3(6),TSM-NI3(6)
2,TSM-NI3(4),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(1),TSM-NI3(5),TSM-NI3(5),TSM-NI3(3),...,TSM-NI3(3),TSM-NI3(6),TSM-NI3(3),TSM-NI3(6),TSM-NI3(12),TSM-NI3(6),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5)
3,TSM-NI3(2),TSM-NI3(1),TSM-NI3(6),TSM-NI3(2),TSM-NI3(6),TSM-NI3(3),TSM-NI3(5),TSM-NI3(6),TSM-NI3(6),TSM-NI3(6),...,TSM-NI3(6),TSM-NI3(3),TSM-NI3(6),TSM-NI3(3),TSM-NI3(10),TSM-NI3(12),TSM-NI3(11),TSM-NI3(6),TSM-NI3(3),TSM-NI3(3)
4,TSM-NI3(5),TSM-NI3(2),TSM-NI3(2),TSM-NI3(6),TSM-NI3(2),TSM-NI3(7),TSM-NI3(2),TSM-NI3(2),TSM-NI3(2),TSM-NI3(1),...,TSM-NI3(2),TSM-NI4(6),TSM-NI3(1),TSM-NI3(1),TSM-NI3(7),TSM-NI3(4),TSM-NI3(10),TSM-NI3(2),TSM-NI4(6),TSM-NI3(1)


In [84]:
lists = []
wr = pd.ExcelWriter('Base_Atlantico_Norte.xlsx')
wr2 = pd.ExcelWriter('Base_Atlantico_Sul.xlsx')
wr3 = pd.ExcelWriter('Base_Pacifico_Sul.xlsx')
wr4 = pd.ExcelWriter('Base_Nino.xlsx')

for d in range(1,13):
    time_lag(ATLANTICO_NORTE.data, PRECIPITACAO.data, d,'AN').to_excel(wr, sheet_name=str(d))
    time_lag(ATLANTICO_SUL.data, PRECIPITACAO.data, d,'AS').to_excel(wr2, sheet_name=str(d))
    time_lag(PACIFICO_SUL.data, PRECIPITACAO.data, d,'PS').to_excel(wr3, sheet_name=str(d))
    time_lag(NINO.data, PRECIPITACAO.data, d,'NI').to_excel(wr4, sheet_name=str(d))
wr.save()
wr2.save()
wr3.save()
wr4.save()

In [124]:
def find_var(var,out,data):
    col = list(data.columns)
    if var in col:
        return data[[var,out]]
    return None
        



atn = pd.ExcelFile("Base_Atlantico_Norte.xlsx").sheet_names
ats = pd.ExcelFile("Base_Atlantico_Sul.xlsx").sheet_names
nin = pd.ExcelFile("Base_Nino.xlsx").sheet_names
pcs = pd.ExcelFile("Base_Pacifico_Sul.xlsx").sheet_names

ent = selected_nvars(final_corr, 5)
out = "PRM1"
col = []
#var = {}
paths = ["Base_Atlantico_Norte.xlsx","Base_Atlantico_Sul.xlsx", "Base_Nino.xlsx", "Base_Pacifico_Sul.xlsx"]
sheet_names = ['1','2','3','4','5','6','7','8','9','10','11','12']

'''
for c in ent.columns:
    var[c] = []
    for i in ent[r].values:
        for p in paths:
            for s in sheet_names:
                base = pd.read_excel(p,s)
                df = find_var(i,c,base)
                if df is not None:
                    var[c].append(df)


var.keys()
'''

'\nfor c in ent.columns:\n    var[c] = []\n    for i in ent[r].values:\n        for p in paths:\n            for s in sheet_names:\n                base = pd.read_excel(p,s)\n                df = find_var(i,c,base)\n                if df is not None:\n                    var[c].append(df)\n\n\nvar.keys()\n'

In [116]:
DATA = {}
for k in var.keys():
    DATA[k] = {"IN": {}, "OUT": {}}
    i = 0
    for r in var[k]:
        i+=1
        DATA[k]["IN"][i] = []
        DATA[k]["OUT"][i] = []
        DATA[k]["IN"][i].append(list(r[r.columns[0]].values)) 
        DATA[k]["OUT"][i].append(list(r[r.columns[1]].values))                     

In [125]:
pd.DataFrame(DATA['PRM1'])

Unnamed: 0,IN,OUT
1,"[[28.3, 28.21, 28.41, 28.92, 29.49, 29.76, 29....","[[-2, -1, -1, 1, 1, 1, -2, 0, -2, 0, 0, -2, -2..."
2,"[[28.3, 28.21, 28.41, 28.92, 29.49, 29.76, 29....","[[-1, 1, 1, 1, -2, 0, -2, 0, 0, -2, -2, -1, 0,..."
3,"[[28.3, 28.21, 28.41, 28.92, 29.49, 29.76, 29....","[[-1, -1, 1, 1, 1, -2, 0, -2, 0, 0, -2, -2, -1..."
4,"[[28.3, 28.21, 28.41, 28.92, 29.49, 29.76, 29....","[[1, -2, -1, -1, 1, 1, 1, -2, 0, -2, 0, 0, -2,..."
5,"[[28.3, 28.21, 28.41, 28.92, 29.49, 29.76, 29....","[[0, -1, 1, -2, -1, -1, 1, 1, 1, -2, 0, -2, 0,..."


In [128]:
exf = pd.ExcelWriter("Data_fit.xlsx")

for k in DATA:
    pd.DataFrame(DATA[k]).to_excel(exf,sheet_name=k)
exf.save()

In [129]:
ent.to_excel("Variaveis_selecionadas.xlsx")

In [324]:
lista = []
base = []
i = 1
for k in var.keys():
    lista = []
    for df in var[k]:
        df.columns = ['ENT'+str(i),k]
        lista.append(df)
    base.append(lista)
    i+=1  
    
final = []
for b in base:
    tmp = pd.concat(b)
    tmp.reset_index(drop=True, inplace=True)
    final.append(tmp)

base_dados = pd.concat(final,axis=1, join='inner')

In [326]:
base_dados.to_excel('data_model.xlsx')

In [37]:
ent = selected_nvars(final_corr, 5)
ent

Unnamed: 0,PRM1,PRM2,PRM3,PRM4,PRM5,PRM6,PRM7,PRM8,PRM9,PRM10,...,PRM15,PRM16,PRM17,PRM18,PRM19,PRM20,PRM21,PRM22,PRM23,PRM24
0,TSM-NI3(1),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),...,TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4),TSM-NI3(11),TSM-NI3(11),TSM-NI3(6),TSM-NI3(4),TSM-NI3(4),TSM-NI3(4)
1,TSM-NI3(3),TSM-NI3(3),TSM-NI3(3),TSM-NI3(3),TSM-NI3(3),TSM-NI3(6),TSM-NI3(3),TSM-NI3(3),TSM-NI3(3),TSM-NI3(5),...,TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(6),TSM-NI3(10),TSM-NI3(4),TSM-NI3(3),TSM-NI3(6),TSM-NI3(6)
2,TSM-NI3(4),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(1),TSM-NI3(5),TSM-NI3(5),TSM-NI3(3),...,TSM-NI3(3),TSM-NI3(6),TSM-NI3(3),TSM-NI3(6),TSM-NI3(12),TSM-NI3(6),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5),TSM-NI3(5)
3,TSM-NI3(2),TSM-NI3(1),TSM-NI3(6),TSM-NI3(2),TSM-NI3(6),TSM-NI3(3),TSM-NI3(5),TSM-NI3(6),TSM-NI3(6),TSM-NI3(6),...,TSM-NI3(6),TSM-NI3(3),TSM-NI3(6),TSM-NI3(3),TSM-NI3(10),TSM-NI3(12),TSM-NI3(11),TSM-NI3(6),TSM-NI3(3),TSM-NI3(3)
4,TSM-NI3(5),TSM-NI3(2),TSM-NI3(2),TSM-NI3(6),TSM-NI3(2),TSM-NI3(7),TSM-NI3(2),TSM-NI3(2),TSM-NI3(2),TSM-NI3(1),...,TSM-NI3(2),TSM-NI4(6),TSM-NI3(1),TSM-NI3(1),TSM-NI3(7),TSM-NI3(4),TSM-NI3(10),TSM-NI3(2),TSM-NI4(6),TSM-NI3(1)


In [52]:
[atln['DELTA 1']['X']['TSM-AN1(1)'].values,atln['DELTA 1']['X']['TSM-AN2(1)'].values]

[array([26.0098, 25.4937, 25.3498, 25.779, 26.6424, 27.258, 27.3333,
        27.7096, 28.0901, 27.6497, 27.0988, 26.2016, 25.5559, 25.3834,
        25.7558, 26.188, 26.7781, 27.3996, 27.8021, 28.1231, 28.0444,
        27.8126, 27.5219, 26.793, 26.0275, 25.5787, 25.2731, 25.6937,
        26.2337, 26.8452, 26.7796, 26.9829, 27.4399, 27.4981, 27.098,
        26.3296, 25.6059, 25.286, 25.0621, 25.3709, 25.9592, 26.9344,
        27.3486, 27.8043, 27.8666, 27.4764, 27.1502, 26.1931, 25.3087,
        25.0223, 25.1215, 25.61, 26.5528, 26.8165, 26.9251, 27.366,
        27.7102, 27.7511, 27.2708, 26.0508, 25.5623, 25.4615, 25.9283,
        26.528, 26.8743, 27.5767, 27.7265, 28.2978, 28.4667, 28.3541,
        27.9311, 27.0224, 26.3211, 25.7575, 25.6418, 26.18, 26.7701,
        27.5792, 27.7632, 27.8473, 28.1998, 27.8459, 27.0764, 26.2394,
        25.2494, 24.9275, 24.7137, 25.1015, 25.7808, 26.5637, 27.5391,
        27.792, 27.6479, 27.7649, 27.3451, 26.7493, 25.5306, 25.1462,
        25.1588, 25

In [230]:
for c in ent.columns:
    for i in ent[r].values:
        print(i,c)

TSM-AS14(2) PRM1
TSM-AS13(2) PRM1
TSM-PS30(3) PRM1
TSM-AS16(1) PRM1
TSM-AS13(1) PRM1
TSM-NI3(4) PRM1
TSM-NI3(6) PRM1
TSM-NI3(5) PRM1
TSM-NI3(3) PRM1
TSM-NI3(1) PRM1
TSM-AS14(2) PRM2
TSM-AS13(2) PRM2
TSM-PS30(3) PRM2
TSM-AS16(1) PRM2
TSM-AS13(1) PRM2
TSM-NI3(4) PRM2
TSM-NI3(6) PRM2
TSM-NI3(5) PRM2
TSM-NI3(3) PRM2
TSM-NI3(1) PRM2
TSM-AS14(2) PRM3
TSM-AS13(2) PRM3
TSM-PS30(3) PRM3
TSM-AS16(1) PRM3
TSM-AS13(1) PRM3
TSM-NI3(4) PRM3
TSM-NI3(6) PRM3
TSM-NI3(5) PRM3
TSM-NI3(3) PRM3
TSM-NI3(1) PRM3
TSM-AS14(2) PRM4
TSM-AS13(2) PRM4
TSM-PS30(3) PRM4
TSM-AS16(1) PRM4
TSM-AS13(1) PRM4
TSM-NI3(4) PRM4
TSM-NI3(6) PRM4
TSM-NI3(5) PRM4
TSM-NI3(3) PRM4
TSM-NI3(1) PRM4
TSM-AS14(2) PRM5
TSM-AS13(2) PRM5
TSM-PS30(3) PRM5
TSM-AS16(1) PRM5
TSM-AS13(1) PRM5
TSM-NI3(4) PRM5
TSM-NI3(6) PRM5
TSM-NI3(5) PRM5
TSM-NI3(3) PRM5
TSM-NI3(1) PRM5
TSM-AS14(2) PRM6
TSM-AS13(2) PRM6
TSM-PS30(3) PRM6
TSM-AS16(1) PRM6
TSM-AS13(1) PRM6
TSM-NI3(4) PRM6
TSM-NI3(6) PRM6
TSM-NI3(5) PRM6
TSM-NI3(3) PRM6
TSM-NI3(1) PRM6
TSM-AS14(2