### Correlation Analysis between Processed and Synthetic Datasets
This notebook has been updated to include a comparison of correlations between processed and synthetic data generated by the GAN.

In [None]:
# Load the processed dataset
import pandas as pd

# Ensure the 'data_encoded.csv' file is in the correct directory
processed_data = pd.read_csv('Processed data/data_encoded.csv')
processed_data.head()

In [None]:
# Load the synthetic dataset
synthetic_data = pd.read_csv('Path/To/synthesized_data_GAN.csv')
synthetic_data.head()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Selecting only numerical columns for correlation analysis
num_cols_processed = processed_data.select_dtypes(include=['float64', 'int64'])
num_cols_synthetic = synthetic_data.select_dtypes(include=['float64', 'int64'])

# Compute the correlation matrices
corr_matrix_processed = num_cols_processed.corr()
corr_matrix_synthetic = num_cols_synthetic.corr()

# Generating heatmaps
plt.figure(figsize=(18, 8))

# Processed Data Correlation Heatmap
plt.subplot(1, 2, 1)
sns.heatmap(corr_matrix_processed, cmap='coolwarm', annot=False)
plt.title('Correlation Heatmap - Processed Data')

# Synthetic Data Correlation Heatmap
plt.subplot(1, 2, 2)
sns.heatmap(corr_matrix_synthetic, cmap='coolwarm', annot=False)
plt.title('Correlation Heatmap - Synthetic Data')

plt.show()

# Heatmap of the Differences
corr_diff = corr_matrix_synthetic - corr_matrix_processed

plt.figure(figsize=(9, 8))
sns.heatmap(corr_diff, cmap='coolwarm', annot=False)
plt.title('Correlation Difference Heatmap (Synthetic - Processed)')
plt.show()