In [None]:
# # Data Analysis of Train Dataset

# ## Importing Libraries

# Importing the required libraries for data analysis and visualization.

import pandas as pd
import matplotlib.pyplot as plt

# ## Loading Data

# Loading the train dataset from the 'train.csv' file into a Pandas DataFrame.

df = pd.read_csv('train.csv')

# ## Displaying Data

# Displaying the first 5 rows of the dataset.

print(df.head())

# ## Data Types

# Displaying the data types of each column.

print(df.dtypes)

# ## Summary Statistics

# Getting summary statistics of the numeric columns.

print(df.describe())

# ## Missing Values

# Getting the count of missing values in each column.

print(df.isna().sum())

# ## Handling Missing Values

# Replacing missing values in the 'Age' column with the mean age.

mean_age = df['Age'].mean()
df['Age'].fillna(mean_age, inplace=True)

# ## Label Encoding Categorical Columns

# Defining a function to label encode categorical columns.

def label_encode(df, col):
    mapping = {val: i for i, val in enumerate(df[col].unique())}
    df[col] = df[col].replace(mapping)
    return df

# Applying label encoding to categorical columns.

categorical_cols = ["HomePlanet", "CryoSleep", "Cabin", "Destination", "VIP", "Name", "Transported"]
for col in categorical_cols:
    df = label_encode(df, col)

# ## Correlation Analysis

# Calculating the correlation matrix.

corr_matrix = df.corr()

# Getting the most highly correlated features.

highly_correlated = corr_matrix.abs().unstack().sort_values(ascending=False).drop_duplicates()[:10]
highly_correlated = highly_correlated[highly_correlated > 0.5]

# Printing the most highly correlated features.

print(highly_correlated)

# ## Correlation Matrix Heatmap

# Creating a heatmap of the correlation matrix.

fig, ax = plt.subplots(figsize=(10, 8))
heatmap = ax.imshow(corr_matrix, cmap='coolwarm', vmin=-1, vmax=1)
ax.set_xticks(range(len(corr_matrix.columns)))
ax.set_xticklabels(corr_matrix.columns, rotation=45)
ax.set_yticks(range(len(corr_matrix.columns)))
ax.set_yticklabels(corr_matrix.columns)
ax.set_title('Correlation Matrix Heatmap')
plt.colorbar(heatmap)
plt.show()