![correlation_heatmap.png](attachment:correlation_heatmap.png)

# Correlation Heatmap 
#### by Joe Eberle started on 8_12_2023

Process Steps :
1. Load ANY data set for which to run a correlation heatmap 
2. Render descriptive statistics and heatmap for any dataset

![credits%20II%20.png](attachment:credits%20II%20.png)


In [None]:
# Import all of the libraries you need  !pip install pandas_profiling
import ydata_profiling # provides descriptive statistics in html for any dataframe
import pandas as pd 
import display_descriptive as dd
import pandas as pd #Pandas is high performance data manipulation 
import matplotlib.pyplot as plt   # matplot is for python graphics
import numpy as np   #numpy is for array processing
import seaborn  as sns

## Optional Step 0 - Intitiate Configuration Settings and name the overall solution

In [None]:
import configparser 
config = configparser.ConfigParser()
cfg = config.read('config.ini')  

solution_name = 'correlation_heatmap'

## Optional Step 0 - Intitiate Configuration Settings and name the overall solution

In [None]:
# Establish the Python Logger  
import logging # built in python library that does not need to be installed 
import quick_logger as ql

global start_stime 
start_time = ql.set_start_time()
logging = ql.create_logger_Start(solution_name, start_time) 

In [None]:
Introduction =  "A correlation heatmap is a graphical representation of the correlation coefficients between variables in a data set.  "
Introduction = Introduction + "Correlation measures the statistical relationship between two variables,  "
Introduction = Introduction + "indicating how changes in one variable might be associated with changes in another.  "
Introduction = Introduction + "Correlation coefficients typically range between -1 and 1, with values closer to 1 indicating a strong positive correlation,  " 
Introduction = Introduction + "values closer to -1 indicating a strong negative correlation, and values closer to 0 indicating a weak or no correlation.  "

talking_introduction  = True
if talking_introduction:
    ql.talk(Introduction)
else:
    print(Introduction)


In [None]:
provide_documentation = False
if provide_documentation:  
        ql.pvlog('info',f'Process {solution_name} Step 2 - Read the Solution documentation.')     
        ql.talk(Introduction)
        credits = "This code was developed by Joe Eberle and Others"
        ql.talk(credits)
        process_steps = "The high level steps for this process are:"
        process_steps = process_steps + "Step 1 - Load any datas set into memory or a data frame"
        process_steps = process_steps + "Step 2 - Run the correlation algorythms to calculate correlation coefficients"        
        process_steps = process_steps + "Step 3 - Run the correlation Heatmap visualization"
        process_steps = process_steps + "Step 4 - Observe and understand the relationships"
        ql.talk(process_steps)  
        terms = 'The terminology for this project is:'
        terms = terms + 'Seaborn is a great library for building heatmaps and other graphics. '
        ql.talk(terms) 

## Step 1 - Load ANY data set for which to run discovery or data profiling

In [None]:
# heart_data_filename = 'C:\\Data_Science_Data\\Test_Data\\healthcare\\heart_data.csv'
# fetal_health_filename = 'C:\\Data_Science_Data\\Test_Data\\healthcare\\fetal_health.csv'
# diabetes_data_filename = 'C:\\Data_Science_Data\\Test_Data\\healthcare\\diabetes_data.csv'
# stroke_data_filename = 'C:\\Data_Science_Data\\Test_Data\\healthcare\\stroke_data.csv'
# hypertension_data_filename = 'C:\\Data_Science_Data\\Test_Data\\healthcare\\hypertension_data.csv'
# aihs_data_filename = 'C:\\working_directory\\excel\\AIHS_patient.xlsx'
# df = pd.read_csv(aihs_data_filename)

In [None]:
logging.info(f'{solution_name} - Step 1 - Load ANY data set for which to run discovery or data profiling')          
titanic_data_filename = 'C:\\Data\\titanic\\titanic.csv'
data_set_name = 'titanic_data'
df = pd.read_csv(titanic_data_filename)
status = f'{solution_name} - Step 1 - Loaded data set {data_set_name} that contains {df.shape[0]} rows and {df.shape[1]} columns.'
logging.info( status )  
print(status) 
df.head(5)
# df.describe() 

In [None]:
df.shape

In [None]:
# for field in df.columns:
#     print(f",'{field}'")

In [None]:
correlation_matrix = df.corr(numeric_only=True)     #establish a correlation matrix for all fields
top_correlation_features = correlation_matrix.index
plt.figure(figsize=(8,8))
g=sns.heatmap(df[top_correlation_features].corr(),annot=True,cmap="RdYlGn")

## Step 2 - OPTIONAL - Render descriptive statistics and profile every feature or Column of the Dataset

In [None]:
data_set_name = 'Titanic'
dd.display_descriptive_statistics(dd.get_descriptive_statistics(df,data_set_name))

## Step 0 - Process End - display log

In [None]:
# Calculate and classify the process performance 
status = ql.calculate_process_performance(solution_name, start_time) 
print(ql.append_log_file(solution_name))  

![credits%20II%20.png](attachment:credits%20II%20.png)


# https://github.com/JoeEberle/
# josepheberle@outlook.com 

In [None]:
# Here's how a correlation heatmap works:

# Data Preparation: Gather a dataset with multiple numerical variables. This could be a matrix or a pandas DataFrame.

# Calculate Correlation Coefficients: Compute the correlation coefficients between all pairs of variables in the dataset. Common correlation methods include Pearson correlation (for linear relationships), Spearman correlation (for rank-based relationships), and Kendall tau correlation.

# Create Heatmap: Represent the correlation coefficients using a color-coded heatmap. Each cell in the heatmap corresponds to the correlation between two variables. The color intensity indicates the strength and direction of the correlation: positive correlations in one color (e.g., shades of blue), negative correlations in another (e.g., shades of red), and no correlation in neutral colors (e.g., white or gray).

# Analyze the Heatmap: By examining the heatmap, you can quickly identify which variables are positively correlated, negatively correlated, or not correlated at all. Strong correlations are easily noticeable through clusters of intense colors.

# Draw Insights: Interpret the heatmap to gain insights into relationships between variables. For instance, if you're working with financial data, you might find that stock prices of certain companies are positively correlated, while others might show a negative correlation due to inverse behaviors.