In [None]:
"""
@author: Iciar Civantos
May 2022

The aim of this script is calculating two different correlation matrices. 
First matrix returns correlations among the predictors. 
The second matrix shows correlations between the target variable and the predictors.

"""

In [None]:
import pandas as pd
import numpy as np
import sys
import os
sys.path.append('..')

import warnings
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None
import seaborn as sns

In [None]:
import os
pathdata = 'datasets/'
pathresults = 'results'
if not os.path.exists(pathresults):
    os.makedirs(pathresults)

## **Lentils evaluated for their resistance to rust in the present**

## Load Data Rust Lentils: Temperature and Precipitation

In [None]:
roya_raw = pd.read_csv(pathdata+"DatosPrecip/ROYA_Completa_fut_pres.txt",decimal=',',sep= '\t')

cols_to_drop = list(roya_raw.columns[0:5]) + list(roya_raw.columns[[7, 8, 9, 10]])
roya_raw = roya_raw.drop(cols_to_drop, axis = 1)

roya_raw = roya_raw.rename(columns={'Longitude': 'DECLONGITUDE', 'Latitude': 'DECLATITUDE'})
roya_raw = roya_raw.applymap(lambda x: float(x))

roya_raw[['BBC_126', 'BBC_370', 'BBC_585', 'CNRM_126', 'CNRM_370','CNRM_585', 'IPSL_126', 'IPSL_370', 'IPSL_585']] = roya_raw[['BBC_126', 'BBC_370', 'BBC_585', 'CNRM_126', 'CNRM_370','CNRM_585', 'IPSL_126', 'IPSL_370', 'IPSL_585']].applymap(lambda x: x*10)
roya_raw.drop_duplicates(inplace = True)

roya_raw.head(1)

In [None]:
print(roya_raw.shape)

## Load Data Rust Lentils: radiation, soil conditions, DSR and coordinates

In [None]:
roya = pd.read_csv(pathdata+"DatosPresente/fichtraining_ROYA_CAMPO_nondup.csv",decimal=',',sep=';',)
roya = roya[['srad_12', 'srad_annual', 't_bulk_dens', 't_sand', 't_soilwater_cap', 't_teb', 'DECLONGITUDE','DECLATITUDE','ROYA_CAMPO_AVERAGE_DSR']]

roya.head(1)

In [None]:
print(roya.shape)

## Dataset Present Rust Lentils

In [None]:
roya_presente = pd.merge(roya_raw[['DECLONGITUDE','DECLATITUDE','CURRENT_BIO1', 'CURRENT_BIO12']],roya, how='inner', on = ['DECLONGITUDE','DECLATITUDE'])
roya_presente.drop_duplicates(subset = ['DECLONGITUDE','DECLATITUDE'], inplace = True)
roya_presente.head()

In [None]:
print(roya_presente.shape)

## CORRELATION FEATURES

In [None]:
correlation_matrix = roya_presente.drop('ROYA_CAMPO_AVERAGE_DSR', axis = 1).corr()
correlation_matrix


In [None]:
sns.heatmap(correlation_matrix, xticklabels=list(correlation_matrix), yticklabels=list(correlation_matrix), annot=True, fmt='.1f', linewidths = 0.5, cmap="coolwarm")

## CORRELATION TARGET

In [None]:
correlation_target = roya_presente.corr()['ROYA_CAMPO_AVERAGE_DSR'].drop('ROYA_CAMPO_AVERAGE_DSR')
correlation_target

In [None]:
plt.rc('figure', figsize=(15, 5))


_, ax = plt.subplots()
ax.bar(correlation_target.index, correlation_target.values)

ax.set_xlabel('Features')
ax.set_ylabel('Correlation')
ax.set_xticklabels(correlation_target.index, rotation=45)
ax.set_title('Feature Correlation with Target Variable')