In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('C:/Users/HP PROBOOK/Downloads/EDA1/EDA1/Cardiotocographic.csv')

# Data Cleaning and Preparation

# Handle missing values
print("Missing values before handling:")
print(df.isnull().sum())

# Option 1: Fill missing values with median for numerical columns
df.fillna(df.median(), inplace=True)

# Option 2: Drop rows with missing values for categorical columns (if necessary)
# df.dropna(inplace=True)

print("\nMissing values after handling:")
print(df.isnull().sum())

# Correct Data Type Inconsistencies (if necessary)
# Example: Convert string columns to numeric
# df['numerical_column_as_string'] = pd.to_numeric(df['numerical_column_as_string'], errors='coerce')

# Detect and Treat Outliers
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]

# Statistical Summary
print("\nStatistical summary:")
print(df.describe())

# Data Visualization

# Histograms or Boxplots for Numerical Variables
for column in df.select_dtypes(include=np.number).columns:
    plt.figure(figsize=(8, 6))
    sns.histplot(df[column], kde=True)
    plt.title(f'Histogram of {column}')
    plt.xlabel(column)
    plt.ylabel('Frequency')
    plt.show()

    plt.figure(figsize=(8, 6))
    sns.boxplot(x=df[column])
    plt.title(f'Boxplot of {column}')
    plt.xlabel(column)
    plt.show()

# Bar Charts or Pie Charts for Categorical Variables
for column in df.select_dtypes(include='object').columns:
    plt.figure(figsize=(8, 6))
    df[column].value_counts().plot(kind='bar')
    plt.title(f'Frequency of {column}')
    plt.xlabel(column)
    plt.ylabel('Frequency')
    plt.show()

    plt.figure(figsize=(8, 6))
    df[column].value_counts().plot(kind='pie', autopct='%1.1f%%')
    plt.title(f'Pie Chart of {column}')
    plt.ylabel('')
    plt.show()

# Scatter Plots or Correlation Heatmaps
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

# Pair Plot
sns.pairplot(df)
plt.title('Pair Plot of the Dataset')
plt.show()

# Conclusion

## Key Insights and Patterns
# Summarize the major findings from the statistical analysis and visualizations.

## Recommendations
# Based on the patterns and insights, recommend actions or next steps.