Correlation, distribution and regression plots of original merged data

In [None]:
import pandas as pd
import numpyaas np
import matplotlib.pyplot as plt
import seaborn as sns

import os, glob

In [None]:
def plot_corr(data, figsize=(15,15)):
    '''
    Plot correlation 
    Args:
    - data: pd dataframe
    '''
    corr = data.corr()
    sns.set(font_scale=1.2)
    mask = np.triu(np.ones_like(corr, dtype=bool))
    with sns.axes_style("white"):
        f, ax = plt.subplots(figsize=figsize)
        ax = sns.heatmap(corr, mask=mask, square=True, 
                         vmin= -1, vmax=1,
                         cmap='RdBu_r', center=0, annot=True,
                        annot_kws={'fontsize':8})

In [None]:
merged = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Merged_data/MERGE_FT_TEP_UT_on_ID.csv'),
                    index_col=0)

### Plotting

group data by type of sample and cold work

In [None]:
merged.index = merged.index.str.rstrip('-12345')

In [None]:
mean_df = merged.groupby('ID').mean()

In [None]:
plot_corr(mean_df)

In [None]:
mean_df.columns

### Use only mean

In [None]:
short_mean = mean_df.loc[:, ['KJIC', 'MS_Avg', 'TEP_average',
        'Beta_avg', 'IF_amp_2.25MHz', 'IF_amp_3.5MHz',
       'BS_amp']]
short_mean['Type'] = short_mean.index.str.split('-').str[0]


In [None]:
plot_corr(short_mean, figsize=(5,5))

In [None]:
for ind in short_mean.Type.unique():
    print(ind)
    subset_dr = short_mean[short_mean.Type==ind].copy()
    plot_corr(subset_dr, figsize=(5,5))

In [None]:
sns.pairplot(short_mean)

In [None]:
sns.pairplot(short_mean, hue='Type')

In [None]:
test = pd.melt(short_mean, id_vars=['KJIC', 'Type'], value_vars=['MS_Avg', 'TEP_average', 'Beta_avg', 'IF_amp_2.25MHz', 'IF_amp_3.5MHz', 'BS_amp'] )

In [None]:
sns.lmplot(x='KJIC', y='value', data=test, col='Type', row='variable', sharey=False, sharex=False)