# Correlation Study

## Purpose
This notebook analyzes correlations between features in the dataset to determine important relationships that can help in predictive modeling.

## Key Objectives
- Compute correlation matrices.
- Visualize relationships using heatmaps and scatter plots.
- Identify highly correlated features for feature selection.


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import streamlit as st

# Load the cleaned data
data = pd.read_csv("data/final_cleaned_train.csv")

# Show a sample of the data
st.write("Sample Data", data.head())


In [None]:
# Compute correlation matrix
correlation_matrix = data.corr()

# Display correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
st.pyplot()


In [None]:
# Identify highly correlated features (above 0.75)
threshold = 0.75
high_corr_features = correlation_matrix[abs(correlation_matrix) > threshold]
high_corr_features.dropna(how="all", inplace=True)

st.write("Features with high correlation (above 0.75):")
st.write(high_corr_features)
