# Correlation Study

## Purpose
This notebook analyzes correlations between features in the dataset to determine important relationships that can help in predictive modeling.

## Key Objectives
- Compute correlation matrices.
- Visualize relationships using heatmaps and scatter plots.
- Identify highly correlated features for feature selection.


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Set plot style
sns.set_style("whitegrid")


In [None]:
# Load the cleaned dataset
df = pd.read_csv("../data/final_cleaned_train.csv")

# Display the first few rows
df.head()


In [None]:
# Compute correlation matrix
correlation_matrix = df.corr()

# Display the top correlated features with SalePrice
correlation_matrix["SalePrice"].sort_values(ascending=False).head(10)


In [None]:
# Plot the correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Correlation Matrix Heatmap")
plt.show()


In [None]:
# Identify highly correlated features
threshold = 0.75
high_corr_features = correlation_matrix[abs(correlation_matrix) > threshold]
high_corr_features.dropna(how="all", inplace=True)

print("Features with high correlation (above 0.75):")
display(high_corr_features)
