Title: Histogram of a Single Feature <br>

Question 1: Create a histogram for the age feature from a dataset. Interpret what the shape of the histogram tells us about the distribution of the age feature.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Create a sample dataset with an 'age' column
np.random.seed(42)
df = pd.DataFrame({'age': np.random.normal(loc=35, scale=10, size=1000).astype(int)})

# Plotting histogram
plt.figure(figsize=(8, 5))
plt.hist(df['age'], bins=20, edgecolor='black')
plt.title('Histogram of Age')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()


Title: Boxplot for a Single Feature <br>

Question 2: Generate a boxplot for the salary feature and identify any outliers.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load your dataset (replace with actual path)
# df = pd.read_csv('your_dataset.csv')

# For demonstration: create a sample 'salary' column
import numpy as np
np.random.seed(0)
df = pd.DataFrame({'salary': np.append(np.random.normal(50000, 10000, 200), [150000, 200000])})

# Create the boxplot
plt.figure(figsize=(8, 5))
sns.boxplot(x=df['salary'])
plt.title('Boxplot of Salary')
plt.xlabel('Salary')
plt.grid(True)
plt.show()


Title: Violin Plot of a Single Feature <br>

Question 3: Use a violin plot to visualize the distribution of the height feature and comment on its shape.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Create sample height data
np.random.seed(42)
df = pd.DataFrame({'height': np.random.normal(loc=170, scale=10, size=500)})

# Create violin plot
plt.figure(figsize=(8, 5))
sns.violinplot(x=df['height'])
plt.title('Violin Plot of Height')
plt.xlabel('Height (cm)')
plt.grid(True)
plt.show()


Title: Scatter Plot to Analyze Relationship<br>

Question 4: Create a scatter plot for the weight and height features to determine if there is a trend.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Sample data generation
np.random.seed(42)
height = np.random.normal(170, 10, 500)  # Height in cm
weight = height * 0.45 + np.random.normal(0, 5, 500)  # Weight in kg (approximate linear relationship)

df = pd.DataFrame({'height': height, 'weight': weight})

# Create scatter plot
plt.figure(figsize=(8, 6))
plt.scatter(df['height'], df['weight'], alpha=0.6, edgecolor='black')
plt.title('Scatter Plot of Weight vs. Height')
plt.xlabel('Height (cm)')
plt.ylabel('Weight (kg)')
plt.grid(True)
plt.show()


Title: Correlation Heatmap<br>

Question 5 : Generate a correlation heatmap for a dataset with multiple features (e.g., height ,weight , age ) and explain the correlations observed.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Generate sample data
np.random.seed(42)
df = pd.DataFrame({
    'height': np.random.normal(170, 10, 500),
    'weight': np.random.normal(70, 15, 500),
    'age': np.random.normal(40, 12, 500)
})

# Compute correlation matrix
corr_matrix = df.corr()

# Plot heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1, fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()


Title: Descriptive Statistical Analysis<br>

Question 6: Calculate the mean, median, standard deviation, skewness, and kurtosis for the
temperature feature and discuss the results.

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis

# Sample temperature data
np.random.seed(42)
temperature = np.random.normal(loc=25, scale=5, size=500)
df = pd.DataFrame({'temperature': temperature})

# Calculations
mean_temp = df['temperature'].mean()
median_temp = df['temperature'].median()
std_temp = df['temperature'].std()
skew_temp = skew(df['temperature'])
kurt_temp = kurtosis(df['temperature'])
# Display results
print(f"Mean: {mean_temp:.2f}")
print(f"Median: {median_temp:.2f}")
print(f"Standard Deviation: {std_temp:.2f}")
print(f"Skewness: {skew_temp:.2f}")
print(f"Kurtosis: {kurt_temp:.2f}")


Title: Covariance and Correlation between Two Features<br>

Question 7: Compute the covariance and correlation between price and demand in a dataset.
Explain what these metrics indicate.

In [None]:
import pandas as pd
import numpy as np

# Create sample data
np.random.seed(42)
price = np.random.normal(100, 10, 100)
demand = 1000 - 5 * price + np.random.normal(0, 20, 100)  # Simulate inverse relationship

df = pd.DataFrame({'price': price, 'demand': demand})

# Covariance
cov = df[['price', 'demand']].cov().iloc[0, 1]

# Correlation
corr = df[['price', 'demand']].corr().iloc[0, 1]

print(f"Covariance: {cov:.2f}")
print(f"Correlation: {corr:.2f}")


Title: Pair Plot for Multivariate Analysis<br>

Question 8: Utilize a pair plot on a dataset to explore the relationships and distributions between
height , weight , and age . What insights can you glean from this visualization?

In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Generate sample data
np.random.seed(42)
df = pd.DataFrame({
    'height': np.random.normal(170, 10, 300),
    'weight': np.random.normal(70, 12, 300),
    'age': np.random.normal(40, 15, 300)
})

# Create the pair plot
sns.pairplot(df)
plt.suptitle("Pair Plot: Height, Weight, Age", y=1.02)
plt.show()


Title: Principal Component Analysis (PCA)<br>

Question 9 : Apply PCA on a dataset with multiple features (e.g., x1 , x2 , x3 , x4 ) and reduce it to two principal components. Visualize the data in the new feature space.

In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Create synthetic dataset
np.random.seed(42)
df = pd.DataFrame({
    'x1': np.random.normal(0, 1, 200),
    'x2': np.random.normal(1, 2, 200),
    'x3': np.random.normal(2, 3, 200),
    'x4': np.random.normal(3, 4, 200)
})

# Standardize features
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)

# Apply PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(scaled_data)

# Convert result to DataFrame
pca_df = pd.DataFrame(data=pca_result, columns=['PC1', 'PC2'])

# Visualize the 2D PCA results
plt.figure(figsize=(8, 6))
plt.scatter(pca_df['PC1'], pca_df['PC2'], alpha=0.6, edgecolor='k')
plt.title('PCA Projection to 2D')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.grid(True)
plt.show()


Title: Advanced Pair Plot with Hue Parameter<br>

Question 10 : Create a pair plot for height , weight , and age with an added categorical variable gender as the hue to observe different group trends.

In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Sample data with gender categories
np.random.seed(42)
n = 300
df = pd.DataFrame({
    'height': np.random.normal(170, 10, n),
    'weight': np.random.normal(70, 12, n),
    'age': np.random.normal(40, 15, n),
    'gender': np.random.choice(['Male', 'Female'], n)
})

# Create pair plot with hue = gender
sns.pairplot(df, hue='gender', diag_kind='kde', palette='Set2')
plt.suptitle('Pair Plot of Height, Weight, Age by Gender', y=1.02)
plt.show()
