# üìä EDA Plot Cheatsheet ‚Äì Hands-On with Code

In [None]:
# üì¶ Step 1: Import Libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

sns.set(style="whitegrid")
%matplotlib inline


In [None]:
# üìÇ Step 2: Load a Sample Dataset
df = sns.load_dataset('tips')  # You can switch to 'iris' or any other dataset
df.head()


## üìç Univariate Analysis

In [None]:
# Histogram
sns.histplot(df['total_bill'], kde=False, bins=20)
plt.title("Histogram of Total Bill")
plt.show()


In [None]:
# Box Plot
sns.boxplot(y=df['total_bill'])
plt.title("Boxplot of Total Bill")
plt.show()


In [None]:
# KDE Plot
sns.kdeplot(df['total_bill'], shade=True)
plt.title("KDE Plot of Total Bill")
plt.show()


In [None]:
# Bar Chart for Categorical
sns.countplot(x='day', data=df)
plt.title("Count of Days")
plt.show()


## üìç Bivariate Analysis

In [None]:
# Scatter Plot
sns.scatterplot(x='total_bill', y='tip', data=df)
plt.title("Total Bill vs Tip")
plt.show()


In [None]:
# Violin Plot
sns.violinplot(x='day', y='total_bill', data=df)
plt.title("Violin Plot of Total Bill by Day")
plt.show()


In [None]:
# Strip Plot
sns.stripplot(x='day', y='total_bill', data=df, jitter=True)
plt.title("Strip Plot of Total Bill by Day")
plt.show()


## üìç Multivariate Analysis

In [None]:
# Pair Plot
sns.pairplot(df.select_dtypes(include=[np.number]))
plt.show()


In [None]:
# Correlation Heatmap
plt.figure(figsize=(8,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()


## üìç Missing Values

In [None]:
# Introduce Missing Values for Demo
df_missing = df.copy()
df_missing.loc[5:10, 'tip'] = np.nan

# Heatmap of Missing Data
sns.heatmap(df_missing.isnull(), cbar=False, cmap='viridis')
plt.title("Missing Data Heatmap")
plt.show()


## üìç Feature Importance (Random Forest)

In [None]:
# Encode categorical features for model
df_model = df.copy()
for col in df_model.select_dtypes(include='category').columns:
    df_model[col] = LabelEncoder().fit_transform(df_model[col])

X = df_model.drop('tip', axis=1)
y = df_model['tip']

model = RandomForestClassifier()
model.fit(X, y)
feat_importances = pd.Series(model.feature_importances_, index=X.columns)

feat_importances.sort_values().plot(kind='barh', figsize=(8,6))
plt.title("Feature Importance (Random Forest)")
plt.show()
